Index: tracing/bin/symbolize_trace |
diff --git a/tracing/bin/symbolize_trace b/tracing/bin/symbolize_trace |
deleted file mode 100755 |
index 50bb2c5c69edd3b856b7a71f9946215d764dfc40..0000000000000000000000000000000000000000 |
--- a/tracing/bin/symbolize_trace |
+++ /dev/null |
@@ -1,1413 +0,0 @@ |
-#!/usr/bin/env python |
-# Copyright 2016 The Chromium Authors. All rights reserved. |
-# Use of this source code is governed by a BSD-style license that can be |
-# found in the LICENSE file. |
- |
-""" |
-This script processes trace files and symbolizes stack frames generated by |
-Chrome's native heap profiler. This script assumes that the Chrome binary |
-referenced in the trace contains symbols, and is the same binary used to emit |
-the trace. |
- |
-=== Overview === |
- |
-Trace file is essentially a giant JSON array of dictionaries (events). |
-Events have some predefined keys (e.g. 'pid'), but otherwise are free to |
-have anything inside. Trace file contains events from all Chrome processes |
-that were sampled during tracing period. |
- |
-This script cares only about memory dump events generated with memory-infra |
-category enabled. |
- |
-When Chrome native heap profiling is enabled, some memory dump events |
-include the following extra information: |
- |
-* (Per allocator) Information about live allocations at the moment of the |
- memory dump (the information includes backtraces, types / categories, |
- sizes, and counts of allocations). There are several allocators in |
- Chrome: e.g. malloc, blink_gc, partition_alloc. |
- |
-* (Per process) Stack frame tree of all functions that called allocators |
- above. |
- |
-This script does the following: |
- |
-1. Parses the given trace file (loads JSON). |
-2. Finds memory dump events and parses stack frame tree for each process. |
-3. Finds stack frames that have PC addresses instead of function names. |
-4. Symbolizes PCs and modifies loaded JSON. |
-5. Writes modified JSON back to the file. |
- |
-The script supports trace files from the following platforms: |
- * Android (the script itself must be run on Linux) |
- * Linux |
- * macOS |
- * Windows |
- |
-Important note - the script doesn't check that it symbolizes same binaries |
-that were used at the time trace was taken. I.e. if you take a trace, change |
-and rebuild Chrome binaries, the script will blindly use the new binaries. |
- |
-=== Details === |
- |
-There are two formats of heap profiler information: legacy and modern. The |
-main differences relevant to this script are: |
- |
-* In the modern format the stack frame tree, type name mapping, and string |
- mapping nodes are dumped incrementally. These nodes are dumped in each |
- memory dump event and carry updates that occurred since the last event. |
- |
- For example, let's say that when the first memory dump event is generated |
- we only know about a function foo() (called from main()) allocating objects |
- of type "int": |
- |
- { |
- "args": { |
- "dumps": { |
- "heaps_v2": { |
- "maps": { |
- "nodes": [ |
- { "id": 1, "name_sid": 1 }, |
- { "id": 2, "parent": 1, "name_sid": 3 }, |
- ], |
- "types": [ |
- { "id": 1, "name_sid": 2 }, |
- ], |
- "strings": [ |
- { "id": 1, "string": "main()" }, |
- { "id": 2, "string": "int" }, |
- { "id": 3, "string": "foo()" }, |
- ] |
- }, |
- "allocators": { ...live allocations per allocator... }, |
- ... |
- }, |
- ... |
- } |
- }, |
- ... |
- } |
- |
- Here: |
- * 'nodes' node encodes stack frame tree |
- * 'types' node encodes type name mappings |
- * 'strings' node encodes string mapping (explained below) |
- |
- Then, by the time second memory dump even is generated, we learn about |
- bar() (called from main()), which also allocated "int" objects. Only the |
- new information is dumped, i.e. bar() stack frame: |
- |
- { |
- "args": { |
- "dumps": { |
- "heaps_v2": { |
- "maps": { |
- "nodes": [ |
- { "id": 2, "parent": 1, "name_sid": 4 }, |
- ], |
- "types": [], |
- "strings": [ |
- { "id": 4, "string": "bar()" }, |
- ] |
- }, |
- "allocators": { ...live allocations per allocator... }, |
- ... |
- }, |
- ... |
- } |
- }, |
- ... |
- } |
- |
- Note that 'types' node is empty, since there were no updates. All three |
- nodes ('nodes', types', and 'strings') can be empty if there were no updates |
- to them. |
- |
- For simplicity, when the script updates incremental nodes, it puts updated |
- content in the first node, and clears all others. I.e. the following stack |
- frame nodes: |
- |
- 'nodes': [ |
- { "id": 1, "name_sid": 1 }, |
- { "id": 2, "parent": 1, "name_sid": 2 }, |
- ] |
- 'nodes': [ |
- { "id": 3, "parent": 2, "name_sid": 3 }, |
- ] |
- 'nodes': [ |
- { "id": 4, "parent": 3, "name_sid": 4 }, |
- { "id": 5, "parent": 1, "name_sid": 5 }, |
- ] |
- |
- After symbolization are written as: |
- |
- 'nodes': [ |
- { "id": 1, "name_sid": 1 }, |
- { "id": 2, "parent": 1, "name_sid": 2 }, |
- { "id": 3, "parent": 2, "name_sid": 3 }, |
- { "id": 4, "parent": 3, "name_sid": 4 }, |
- { "id": 5, "parent": 1, "name_sid": 5 }, |
- ] |
- 'nodes': [] |
- 'nodes': [] |
- |
- |
-* In contrast, in the legacy format stack frame tree and type mappings are |
- dumped separately from memory dump events, once per process. |
- |
- Here is how trace file with two memory dump events looks like in the |
- legacy format: |
- |
- { |
- "args": { |
- "dumps": { |
- "heaps": { ...live allocations per allocator... }, |
- ... |
- } |
- }, |
- ... |
- } |
- |
- { |
- "args": { |
- "dumps": { |
- "heaps": { ...live allocations per allocator... }, |
- ... |
- } |
- }, |
- ... |
- } |
- |
- { |
- "args": { |
- "typeNames": { |
- 1: "int", |
- } |
- }, |
- "cat": "__metadata", |
- "name": "typeNames", |
- ... |
- } |
- |
- { |
- "args": { |
- "stackFrames": { |
- 1: { "name": "main" }, |
- 2: { "name": "foo", "parent": 1 }, |
- 3: { "name": "bar", "parent": 1 }, |
- } |
- }, |
- "cat": "__metadata", |
- "name": "stackFrames", |
- ... |
- } |
- |
- |
-* Another change in the modern format is 'strings' node, which was added |
- to deduplicate stack frame names (mainly for trace file size reduction). |
- For consistency 'types' node also uses string mappings. |
- |
- |
-See crbug.com/708930 for more information about the modern format. |
-""" |
- |
-import argparse |
-import bisect |
-import collections |
-import gzip |
-import itertools |
-import json |
-import os |
-import re |
-import shutil |
-import subprocess |
-import sys |
-import tarfile |
-import zipfile |
-import tempfile |
- |
-_SYMBOLS_PATH = os.path.abspath(os.path.join( |
- os.path.dirname(os.path.realpath(__file__)), |
- '..', |
- 'third_party', |
- 'symbols')) |
-sys.path.append(_SYMBOLS_PATH) |
-# pylint: disable=import-error |
-import symbols.elf_symbolizer as elf_symbolizer |
- |
-import symbolize_trace_atos_regex |
-import symbolize_trace_macho_reader |
- |
-_PY_UTILS_PATH = os.path.abspath(os.path.join( |
- os.path.dirname(os.path.realpath(__file__)), |
- '..', |
- '..', |
- 'common', |
- 'py_utils')) |
-sys.path.append(_PY_UTILS_PATH) |
-# pylint: disable=import-error |
-import py_utils.cloud_storage as cloud_storage |
- |
-class NodeWrapper(object): |
- """Wraps an event data node(s). |
- |
- A node is a reference into a trace event JSON. Wrappers parse nodes to |
- provide convenient APIs and update nodes when asked to propagate changes |
- back (see ApplyModifications() below). |
- |
- Here is an example of legacy metadata event that contains stack frame tree: |
- |
- { |
- "args": { |
- "stackFrames": { ... } |
- }, |
- "cat": "__metadata", |
- "name": "stackFrames", |
- "ph": "M", |
- ... |
- } |
- |
- When this event is encountered, a reference to the "stackFrames" dictionary |
- is obtained and passed down to a specific wrapped class, which knows how to |
- parse / update the dictionary. |
- |
- There are two parsing patterns depending on whether node is serialized |
- incrementally: |
- |
- * If node is not incremental, then parsing is done by __init__(), |
- see MemoryMap for an example. |
- |
- * If node is incremental, then __init__() does nothing, and instead |
- ParseNext() method is called when next node (from a next event) is |
- encountered. |
- |
- Some wrappers can also modify nodes they parsed. In such cases they have |
- additional APIs: |
- |
- * 'modified' flag, which indicates whether the wrapper was changed. |
- |
- * 'ApplyModifications' method, which propagates changes made to the wrapper |
- back to nodes. Successful invocation of ApplyModifications() resets |
- 'modified' flag. |
- |
- """ |
- pass |
- |
- |
-class MemoryMap(NodeWrapper): |
- """Wraps 'process_mmaps' node. |
- |
- 'process_mmaps' node contains information about file mappings. |
- |
- "process_mmaps": { |
- "vm_regions": [ |
- { |
- "mf": "<file_path>", |
- "sa": "<start_address>", |
- "sz": "<size>", |
- ... |
- }, |
- ... |
- ] |
- } |
- """ |
- |
- class Region(object): |
- def __init__(self, start_address, size, file_path): |
- self._start_address = start_address |
- self._size = size |
- self._file_path = file_path |
- |
- @property |
- def start_address(self): |
- return self._start_address |
- |
- @property |
- def end_address(self): |
- return self._start_address + self._size |
- |
- @property |
- def size(self): |
- return self._size |
- |
- @property |
- def file_path(self): |
- return self._file_path |
- |
- def __cmp__(self, other): |
- if isinstance(other, type(self)): |
- other_start_address = other._start_address |
- elif isinstance(other, (long, int)): |
- other_start_address = other |
- else: |
- raise Exception('Cannot compare with %s' % type(other)) |
- if self._start_address < other_start_address: |
- return -1 |
- elif self._start_address > other_start_address: |
- return 1 |
- else: |
- return 0 |
- |
- def __repr__(self): |
- return 'Region(0x{:X} - 0x{:X}, {})'.format( |
- self.start_address, self.end_address, self.file_path) |
- |
- def __init__(self, process_mmaps_node): |
- regions = [] |
- for region_node in process_mmaps_node['vm_regions']: |
- regions.append(self.Region( |
- long(region_node['sa'], 16), |
- long(region_node['sz'], 16), |
- region_node['mf'])) |
- regions.sort() |
- |
- # Copy regions without duplicates and check for overlaps. |
- self._regions = [] |
- previous_region = None |
- for region in regions: |
- if previous_region is not None: |
- if region == previous_region: |
- continue |
- assert region.start_address >= previous_region.end_address, \ |
- 'Regions {} and {} overlap.'.format(previous_region, region) |
- previous_region = region |
- self._regions.append(region) |
- |
- @property |
- def regions(self): |
- return self._regions |
- |
- def FindRegion(self, address): |
- """Finds region containing |address|. Returns None if none found.""" |
- |
- region_index = bisect.bisect_right(self._regions, address) - 1 |
- if region_index >= 0: |
- region = self._regions[region_index] |
- if address >= region.start_address and address < region.end_address: |
- return region |
- return None |
- |
- |
-class UnsupportedHeapDumpVersionError(Exception): |
- """Helper exception class to signal unsupported heap dump version.""" |
- |
- def __init__(self, version): |
- message = 'Unsupported heap dump version: {}'.format(version) |
- super(UnsupportedHeapDumpVersionError, self).__init__(message) |
- |
- |
-class StringMap(NodeWrapper): |
- """Wraps all 'strings' nodes for a process. |
- |
- 'strings' node contains incremental mappings between integer ids and strings. |
- |
- "strings": [ |
- { |
- "id": <string_id>, |
- "string": <string> |
- }, |
- ... |
- ] |
- """ |
- |
- def __init__(self): |
- self._modified = False |
- self._strings_nodes = [] |
- self._string_by_id = {} |
- self._id_by_string = {} |
- self._max_string_id = 0 |
- |
- @property |
- def modified(self): |
- """Returns True if the wrapper was modified (see NodeWrapper).""" |
- return self._modified |
- |
- @property |
- def string_by_id(self): |
- return self._string_by_id |
- |
- def ParseNext(self, heap_dump_version, strings_node): |
- """Parses and interns next node (see NodeWrapper).""" |
- |
- if heap_dump_version != Trace.HEAP_DUMP_VERSION_1: |
- raise UnsupportedHeapDumpVersionError(heap_dump_version) |
- |
- self._strings_nodes.append(strings_node) |
- for string_node in strings_node: |
- self._Insert(string_node['id'], string_node['string']) |
- |
- def Clear(self): |
- """Clears all string mappings.""" |
- if self._string_by_id: |
- self._modified = True |
- # ID #0 means 'no entry' and must always be present. Carry it over. |
- null_string = self._string_by_id[0] |
- self._string_by_id = {} |
- self._id_by_string = {} |
- self._Insert(0, null_string) |
- self._max_string_id = 0 |
- |
- def AddString(self, string): |
- """Adds a string (if it doesn't exist) and returns its integer id.""" |
- string_id = self._id_by_string.get(string) |
- if string_id is None: |
- string_id = self._max_string_id + 1 |
- self._Insert(string_id, string) |
- self._modified = True |
- return string_id |
- |
- def ApplyModifications(self): |
- """Propagates modifications back to nodes (see NodeWrapper).""" |
- if not self.modified: |
- return |
- |
- assert self._strings_nodes, 'no nodes' |
- |
- # Serialize into the first node, and clear all others. |
- |
- for strings_node in self._strings_nodes: |
- del strings_node[:] |
- strings_node = self._strings_nodes[0] |
- for string_id, string in self._string_by_id.iteritems(): |
- strings_node.append({'id': string_id, 'string': string}) |
- |
- self._modified = False |
- |
- def _Insert(self, string_id, string): |
- self._id_by_string[string] = string_id |
- self._string_by_id[string_id] = string |
- self._max_string_id = max(self._max_string_id, string_id) |
- |
- |
-class TypeNameMap(NodeWrapper): |
- """Wraps all 'types' nodes for a process. |
- |
- 'types' nodes encode mappings between integer type ids and integer |
- string ids (from 'strings' nodes). |
- |
- "types": [ |
- { |
- "id": <type_id>, |
- "name_sid": <name_string_id> |
- } |
- ... |
- ] |
- |
- For simplicity string ids are translated into strings during parsing, |
- and then translated back to ids in ApplyModifications(). |
- """ |
- def __init__(self): |
- self._modified = False |
- self._type_name_nodes = [] |
- self._name_by_id = {} |
- self._id_by_name = {} |
- self._max_type_id = 0 |
- |
- @property |
- def modified(self): |
- """Returns True if the wrapper was modified (see NodeWrapper).""" |
- return self._modified |
- |
- @property |
- def name_by_id(self): |
- """Returns {id -> name} dict (must not be changed directly).""" |
- return self._name_by_id |
- |
- def ParseNext(self, heap_dump_version, type_name_node, string_map): |
- """Parses and interns next node (see NodeWrapper). |
- |
- |string_map| - A StringMap object to use to translate string ids |
- to strings. |
- """ |
- if heap_dump_version != Trace.HEAP_DUMP_VERSION_1: |
- raise UnsupportedHeapDumpVersionError(heap_dump_version) |
- |
- self._type_name_nodes.append(type_name_node) |
- for type_node in type_name_node: |
- self._Insert(type_node['id'], |
- string_map.string_by_id[type_node['name_sid']]) |
- |
- def AddType(self, type_name): |
- """Adds a type name (if it doesn't exist) and returns its id.""" |
- type_id = self._id_by_name.get(type_name) |
- if type_id is None: |
- type_id = self._max_type_id + 1 |
- self._Insert(type_id, type_name) |
- self._modified = True |
- return type_id |
- |
- def ApplyModifications(self, string_map, force=False): |
- """Propagates modifications back to nodes. |
- |
- |string_map| - A StringMap object to use to translate strings to ids. |
- |force| - Whether to propagate changes regardless of 'modified' flag. |
- """ |
- if not self.modified and not force: |
- return |
- |
- assert self._type_name_nodes, 'no nodes' |
- |
- # Serialize into the first node, and clear all others. |
- |
- for types_node in self._type_name_nodes: |
- del types_node[:] |
- types_node = self._type_name_nodes[0] |
- for type_id, type_name in self._name_by_id.iteritems(): |
- types_node.append({ |
- 'id': type_id, |
- 'name_sid': string_map.AddString(type_name)}) |
- |
- self._modified = False |
- |
- def _Insert(self, type_id, type_name): |
- self._id_by_name[type_name] = type_id |
- self._name_by_id[type_id] = type_name |
- self._max_type_id = max(self._max_type_id, type_id) |
- |
- |
-class StackFrameMap(NodeWrapper): |
- """ Wraps stack frame tree nodes for a process. |
- |
- For the legacy format this wrapper expects a single 'stackFrames' node |
- (which comes from metadata event): |
- |
- "stackFrames": { |
- "<frame_id>": { |
- "name": "<frame_name>" |
- "parent": "<parent_frame_id>" |
- }, |
- ... |
- } |
- |
- For the modern format this wrapper expects several 'nodes' nodes: |
- |
- "nodes": [ |
- { |
- "id": <frame_id>, |
- "parent": <parent_frame_id>, |
- "name_sid": <name_string_id> |
- }, |
- ... |
- ] |
- |
- In both formats frame name is a string. Native heap profiler generates |
- specially formatted frame names (e.g. "pc:10eb78dba") for function |
- addresses (PCs). Inner Frame class below parses name and extracts PC, |
- if it's there. |
- """ |
- class Frame(object): |
- def __init__(self, frame_id, name, parent_frame_id): |
- self._modified = False |
- self._id = frame_id |
- self._name = name |
- self._pc = self._ParsePC(name) |
- self._parent_id = parent_frame_id |
- self._ext = None |
- |
- @property |
- def modified(self): |
- """Returns True if the frame was modified. |
- |
- For example changing frame's name sets this flag (since the change |
- needs to be propagated back to nodes). |
- """ |
- return self._modified |
- |
- @property |
- def id(self): |
- """Frame id (integer).""" |
- return self._id |
- |
- @property |
- def pc(self): |
- """Parsed (integer) PC of the frame, or None.""" |
- return self._pc |
- |
- @property |
- def name(self): |
- """Name of the frame (see above).""" |
- return self._name |
- |
- @name.setter |
- def name(self, value): |
- """Changes the name. Doesn't affect value of |pc|.""" |
- self._modified = True |
- self._name = value |
- |
- @property |
- def parent_id(self): |
- """Parent frame id (integer).""" |
- return self._parent_id |
- |
- _PC_TAG = 'pc:' |
- |
- def _ParsePC(self, name): |
- if not name.startswith(self._PC_TAG): |
- return None |
- return long(name[len(self._PC_TAG):], 16) |
- |
- def _ClearModified(self): |
- self._modified = False |
- |
- def __init__(self): |
- self._modified = False |
- self._heap_dump_version = None |
- self._stack_frames_nodes = [] |
- self._frame_by_id = {} |
- |
- @property |
- def modified(self): |
- """Returns True if the wrapper or any of its frames were modified.""" |
- return (self._modified or |
- any(f.modified for f in self._frame_by_id.itervalues())) |
- |
- @property |
- def frame_by_id(self): |
- """Returns {id -> frame} dict (must not be modified directly).""" |
- return self._frame_by_id |
- |
- def ParseNext(self, heap_dump_version, stack_frames_node, string_map): |
- """Parses the next stack frames node (see NodeWrapper). |
- |
- For the modern format |string_map| is used to translate string ids |
- to strings. |
- """ |
- |
- frame_by_id = {} |
- if heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY: |
- if self._stack_frames_nodes: |
- raise Exception('Legacy stack frames node is expected only once.') |
- for frame_id, frame_node in stack_frames_node.iteritems(): |
- frame = self.Frame(frame_id, |
- frame_node['name'], |
- frame_node.get('parent')) |
- frame_by_id[frame.id] = frame |
- else: |
- if heap_dump_version != Trace.HEAP_DUMP_VERSION_1: |
- raise UnsupportedHeapDumpVersionError(heap_dump_version) |
- for frame_node in stack_frames_node: |
- frame = self.Frame(frame_node['id'], |
- string_map.string_by_id[frame_node['name_sid']], |
- frame_node.get('parent')) |
- frame_by_id[frame.id] = frame |
- |
- self._heap_dump_version = heap_dump_version |
- self._stack_frames_nodes.append(stack_frames_node) |
- |
- self._frame_by_id.update(frame_by_id) |
- |
- def ApplyModifications(self, string_map, force=False): |
- """Applies modifications back to nodes (see NodeWrapper).""" |
- |
- if not self.modified and not force: |
- return |
- |
- assert self._stack_frames_nodes, 'no nodes' |
- if self._heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY: |
- assert string_map is None, \ |
- 'string_map should not be used with the legacy format' |
- |
- # Serialize frames into the first node, clear all others. |
- |
- for frames_node in self._stack_frames_nodes: |
- if self._heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY: |
- frames_node.clear() |
- else: |
- del frames_node[:] |
- |
- frames_node = self._stack_frames_nodes[0] |
- for frame in self._frame_by_id.itervalues(): |
- if self._heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY: |
- frame_node = {'name': frame.name} |
- frames_node[frame.id] = frame_node |
- else: |
- frame_node = { |
- 'id': frame.id, |
- 'name_sid': string_map.AddString(frame.name) |
- } |
- frames_node.append(frame_node) |
- if frame.parent_id is not None: |
- frame_node['parent'] = frame.parent_id |
- frame._ClearModified() |
- |
- self._modified = False |
- |
- |
-class Trace(NodeWrapper): |
- """Wrapper for the root trace node (i.e. the trace JSON itself). |
- |
- This wrapper parses select nodes from memory-infra events and groups |
- parsed data per-process (see inner Process class below). |
- """ |
- |
- # Indicates legacy heap dump format. |
- HEAP_DUMP_VERSION_LEGACY = 'Legacy' |
- |
- # Indicates variation of a modern heap dump format. |
- HEAP_DUMP_VERSION_1 = 1 |
- |
- class Process(object): |
- """Collection of per-process data and wrappers.""" |
- |
- def __init__(self, pid): |
- self._pid = pid |
- self._name = None |
- self._memory_map = None |
- self._stack_frame_map = StackFrameMap() |
- self._type_name_map = TypeNameMap() |
- self._string_map = StringMap() |
- self._heap_dump_version = None |
- |
- @property |
- def modified(self): |
- return self._stack_frame_map.modified or self._type_name_map.modified |
- |
- @property |
- def pid(self): |
- return self._pid |
- |
- @property |
- def name(self): |
- return self._name |
- |
- @property |
- def unique_name(self): |
- """Returns string that includes both process name and its pid.""" |
- name = self._name if self._name else 'UnnamedProcess' |
- return '{}({})'.format(name, self._pid) |
- |
- @property |
- def memory_map(self): |
- return self._memory_map |
- |
- @property |
- def stack_frame_map(self): |
- return self._stack_frame_map |
- |
- @property |
- def type_name_map(self): |
- return self._type_name_map |
- |
- def ApplyModifications(self): |
- """Calls ApplyModifications() on contained wrappers.""" |
- if self._heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY: |
- self._stack_frame_map.ApplyModifications(None) |
- else: |
- if self._stack_frame_map.modified or self._type_name_map.modified: |
- self._string_map.Clear() |
- self._stack_frame_map.ApplyModifications(self._string_map, force=True) |
- self._type_name_map.ApplyModifications(self._string_map, force=True) |
- self._string_map.ApplyModifications() |
- |
- def __init__(self, trace_node): |
- self._trace_node = trace_node |
- self._processes = [] |
- self._heap_dump_version = None |
- self._version = None |
- self._is_chromium = True |
- self._is_64bit = False |
- self._is_win = False |
- self._is_mac = False |
- |
- # Misc per-process information needed only during parsing. |
- class ProcessExt(object): |
- def __init__(self, pid): |
- self.process = Trace.Process(pid) |
- self.mapped_entry_names = set() |
- self.process_mmaps_node = None |
- self.seen_strings_node = False |
- |
- process_ext_by_pid = {} |
- |
- if isinstance(trace_node, dict): |
- metadata = trace_node['metadata'] |
- product_version = metadata['product-version'] |
- # product-version has the form "Chrome/60.0.3103.0" |
- self._version = product_version.split('/', 1)[-1] |
- |
- command_line = metadata['command_line'] |
- self._is_win = re.search('windows', metadata['os-name'] , re.IGNORECASE) |
- self._is_mac = re.search('mac', metadata['os-name'] , re.IGNORECASE) |
- |
- if self._is_win: |
- self._is_chromium = ( |
- not re.search('Chrome SxS\\\\Application\\\\chrome.exe', command_line, |
- re.IGNORECASE) and |
- not re.search('Chrome\\\\Application\\\\chrome.exe', command_line, |
- re.IGNORECASE)) |
- if self._is_mac: |
- self._is_chromium = re.search('chromium', command_line, re.IGNORECASE) |
- |
- self._is_64bit = ( |
- re.search('x86_64', metadata['os-arch'] , re.IGNORECASE) and |
- not re.search('WOW64', metadata['user-agent'] , re.IGNORECASE)) |
- |
- # Android traces produced via 'chrome://inspect/?tracing#devices' are |
- # just list of events. |
- events = trace_node if isinstance(trace_node, list) \ |
- else trace_node['traceEvents'] |
- for event in events: |
- name = event.get('name') |
- if not name: |
- continue |
- |
- pid = event['pid'] |
- process_ext = process_ext_by_pid.get(pid) |
- if process_ext is None: |
- process_ext = ProcessExt(pid) |
- process_ext_by_pid[pid] = process_ext |
- process = process_ext.process |
- |
- phase = event['ph'] |
- if phase == self._EVENT_PHASE_METADATA: |
- if name == 'process_name': |
- process._name = event['args']['name'] |
- elif name == 'stackFrames': |
- process._stack_frame_map.ParseNext( |
- self._UseHeapDumpVersion(self.HEAP_DUMP_VERSION_LEGACY), |
- event['args']['stackFrames'], |
- process._string_map) |
- elif phase == self._EVENT_PHASE_MEMORY_DUMP: |
- dumps = event['args']['dumps'] |
- process_mmaps = dumps.get('process_mmaps') |
- if process_mmaps: |
- # We want the most recent memory map, so parsing happens later |
- # once we finished reading all events. |
- process_ext.process_mmaps_node = process_mmaps |
- heaps = dumps.get('heaps_v2') |
- if heaps: |
- version = self._UseHeapDumpVersion(heaps['version']) |
- maps = heaps.get('maps') |
- if maps: |
- process_ext.mapped_entry_names.update(maps.iterkeys()) |
- types = maps.get('types') |
- stack_frames = maps.get('nodes') |
- strings = maps.get('strings') |
- if (strings is None and (types or stack_frames) |
- and not process_ext.seen_strings_node): |
- # ApplyModifications() for TypeNameMap and StackFrameMap puts |
- # everything into the first node and depends on StringMap. So |
- # we need to make sure that 'strings' node is there if any of |
- # other two nodes present. |
- strings = [] |
- maps['strings'] = strings |
- if strings is not None: |
- process_ext.seen_strings_node = True |
- process._string_map.ParseNext(version, strings) |
- if types: |
- process._type_name_map.ParseNext( |
- version, types, process._string_map) |
- if stack_frames: |
- process._stack_frame_map.ParseNext( |
- version, stack_frames, process._string_map) |
- |
- self._processes = [] |
- for pe in process_ext_by_pid.itervalues(): |
- pe.process._heap_dump_version = self._heap_dump_version |
- if pe.process_mmaps_node: |
- # Now parse the most recent memory map. |
- pe.process._memory_map = MemoryMap(pe.process_mmaps_node) |
- self._processes.append(pe.process) |
- |
- @property |
- def node(self): |
- """Root node (that was passed to the __init__).""" |
- return self._trace_node |
- |
- @property |
- def modified(self): |
- """Returns True if trace file needs to be updated. |
- |
- Before writing trace JSON back to a file ApplyModifications() needs |
- to be called. |
- """ |
- return any(p.modified for p in self._processes) |
- |
- @property |
- def processes(self): |
- return self._processes |
- |
- @property |
- def heap_dump_version(self): |
- return self._heap_dump_version |
- |
- @property |
- def version(self): |
- return self._version |
- |
- @property |
- def is_chromium(self): |
- return self._is_chromium |
- |
- @property |
- def is_mac(self): |
- return self._is_mac |
- |
- @property |
- def is_win(self): |
- return self._is_win |
- |
- @property |
- def is_64bit(self): |
- return self._is_64bit |
- |
- def ApplyModifications(self): |
- """Propagates modifications back to the trace JSON.""" |
- for process in self._processes: |
- process.ApplyModifications() |
- assert not self.modified, 'still modified' |
- |
- # Relevant trace event phases from Chromium's |
- # src/base/trace_event/common/trace_event_common.h. |
- _EVENT_PHASE_METADATA = 'M' |
- _EVENT_PHASE_MEMORY_DUMP = 'v' |
- |
- def _UseHeapDumpVersion(self, version): |
- if self._heap_dump_version is None: |
- self._heap_dump_version = version |
- return version |
- elif self._heap_dump_version != version: |
- raise Exception( |
- ("Inconsistent trace file: first saw '{}' heap dump version, " |
- "then '{}'.").format(self._heap_dump_version, version)) |
- else: |
- return version |
- |
- |
-class SymbolizableFile(object): |
- """Holds file path, addresses to symbolize and stack frames to update. |
- |
- This class is a link between ELFSymbolizer and a trace file: it specifies |
- what to symbolize (addresses) and what to update with the symbolization |
- result (frames). |
- """ |
- def __init__(self, file_path): |
- self.path = file_path |
- self.symbolizable_path = file_path # path to use for symbolization |
- self.frames_by_address = collections.defaultdict(list) |
- |
- |
-def ResolveSymbolizableFiles(processes): |
- """Resolves and groups PCs into list of SymbolizableFiles. |
- |
- As part of the grouping process, this function resolves PC from each stack |
- frame to the corresponding mmap region. Stack frames that failed to resolve |
- are symbolized with '<unresolved>'. |
- """ |
- symfile_by_path = {} |
- for process in processes: |
- if not process.memory_map: |
- continue |
- for frame in process.stack_frame_map.frame_by_id.itervalues(): |
- if frame.pc is None: |
- continue |
- region = process.memory_map.FindRegion(frame.pc) |
- if region is None: |
- frame.name = '<unresolved>' |
- continue |
- |
- symfile = symfile_by_path.get(region.file_path) |
- if symfile is None: |
- symfile = SymbolizableFile(region.file_path) |
- symfile_by_path[symfile.path] = symfile |
- |
- relative_pc = frame.pc - region.start_address |
- symfile.frames_by_address[relative_pc].append(frame) |
- return symfile_by_path.values() |
- |
- |
-def FindInSystemPath(binary_name): |
- paths = os.environ['PATH'].split(os.pathsep) |
- for path in paths: |
- binary_path = os.path.join(path, binary_name) |
- if os.path.isfile(binary_path): |
- return binary_path |
- return None |
- |
- |
-class Symbolizer(object): |
- """Encapsulates platform-specific symbolization logic.""" |
- |
- def __init__(self): |
- self.is_mac = sys.platform == 'darwin' |
- self.is_win = sys.platform == 'win32' |
- if self.is_mac: |
- self.binary = 'atos' |
- self._matcher = symbolize_trace_atos_regex.AtosRegexMatcher() |
- elif self.is_win: |
- self.binary = 'addr2line-pdb.exe' |
- else: |
- self.binary = 'addr2line' |
- self.symbolizer_path = FindInSystemPath(self.binary) |
- |
- def _SymbolizeLinuxAndAndroid(self, symfile, unsymbolized_name): |
- def _SymbolizerCallback(sym_info, frames): |
- # Unwind inline chain to the top. |
- while sym_info.inlined_by: |
- sym_info = sym_info.inlined_by |
- |
- symbolized_name = sym_info.name if sym_info.name else unsymbolized_name |
- for frame in frames: |
- frame.name = symbolized_name |
- |
- symbolizer = elf_symbolizer.ELFSymbolizer(symfile.symbolizable_path, |
- self.symbolizer_path, |
- _SymbolizerCallback, |
- inlines=True) |
- |
- for address, frames in symfile.frames_by_address.iteritems(): |
- # SymbolizeAsync() asserts that the type of address is int. We operate |
- # on longs (since they are raw pointers possibly from 64-bit processes). |
- # It's OK to cast here because we're passing relative PC, which should |
- # always fit into int. |
- symbolizer.SymbolizeAsync(int(address), frames) |
- |
- symbolizer.Join() |
- |
- |
- def _SymbolizeMac(self, symfile): |
- load_address = (symbolize_trace_macho_reader. |
- ReadMachOTextLoadAddress(symfile.symbolizable_path)) |
- assert load_address is not None |
- |
- address_os_file, address_file_path = tempfile.mkstemp() |
- try: |
- with os.fdopen(address_os_file, 'w') as address_file: |
- for address in symfile.frames_by_address.iterkeys(): |
- address_file.write('{:x} '.format(address + load_address)) |
- |
- cmd = [self.symbolizer_path, '-arch', 'x86_64', '-l', |
- '0x%x' % load_address, '-o', symfile.symbolizable_path, |
- '-f', address_file_path] |
- output_array = subprocess.check_output(cmd).split('\n') |
- |
- for i, frames in enumerate(symfile.frames_by_address.itervalues()): |
- symbolized_name = self._matcher.Match(output_array[i]) |
- for frame in frames: |
- frame.name = symbolized_name |
- finally: |
- os.remove(address_file_path) |
- |
- def _SymbolizeWin(self, symfile): |
- """Invoke symbolizer binary on windows and write all input in one go. |
- |
- Unlike linux, on windows, symbolization talks through a shared system |
- service that handles communication with the NT symbol servers. This |
- creates an explicit serialization (and therefor lock contention) of |
- any process using the symbol API for files do not have a local PDB. |
- |
- Thus, even though the windows symbolizer binary can be make command line |
- compatible with the POSIX addr2line interface, parallelizing the |
- symbolization does not yield the same performance effects. Running |
- just one symbolizer seems good enough for now. Can optimize later |
- if this becomes a bottleneck. |
- """ |
- cmd = [self.symbolizer_path, '--functions', '--demangle', '--exe', |
- symfile.symbolizable_path] |
- |
- proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stdin=subprocess.PIPE, |
- stderr=sys.stderr) |
- addrs = ["%x" % relative_pc for relative_pc in |
- symfile.frames_by_address.keys()] |
- (stdout_data, stderr_data) = proc.communicate('\n'.join(addrs)) |
- stdout_data = stdout_data.split('\n') |
- |
- # This is known to be in the same order as stderr_data. |
- for i, addr in enumerate(addrs): |
- for frame in symfile.frames_by_address[int(addr, 16)]: |
- # Output of addr2line with --functions is always 2 outputs per |
- # symbol, function name followed by source line number. Only grab |
- # the function name as line info is not always available. |
- frame.name = stdout_data[i * 2] |
- |
- def Symbolize(self, symfile, unsymbolized_name): |
- if self.is_mac: |
- self._SymbolizeMac(symfile) |
- elif self.is_win: |
- self._SymbolizeWin(symfile) |
- else: |
- self._SymbolizeLinuxAndAndroid(symfile, unsymbolized_name) |
- |
- def IsSymbolizableFile(self, file_path): |
- if self.is_win: |
- extension = os.path.splitext(file_path)[1].lower() |
- return extension in ['.dll', '.exe'] |
- else: |
- result = subprocess.check_output(['file', '-0', file_path]) |
- type_string = result[result.find('\0') + 1:] |
- return bool(re.match(r'.*(ELF|Mach-O) (32|64)-bit\b.*', |
- type_string, re.DOTALL)) |
- |
- |
-def SymbolizeFiles(symfiles, symbolizer): |
- """Symbolizes each file in the given list of SymbolizableFiles |
- and updates stack frames with symbolization results.""" |
- |
- if not symfiles: |
- print 'Nothing to symbolize.' |
- return |
- |
- print 'Symbolizing...' |
- |
- def _SubPrintf(message, *args): |
- print (' ' + message).format(*args) |
- |
- for symfile in symfiles: |
- unsymbolized_name = '<{}>'.format( |
- symfile.path if symfile.path else 'unnamed') |
- |
- problem = None |
- if not os.path.isabs(symfile.symbolizable_path): |
- problem = 'not a file' |
- elif not os.path.isfile(symfile.symbolizable_path): |
- problem = "file doesn't exist" |
- elif not symbolizer.IsSymbolizableFile(symfile.symbolizable_path): |
- problem = 'file is not symbolizable' |
- if problem: |
- _SubPrintf("Won't symbolize {} PCs for '{}': {}.", |
- len(symfile.frames_by_address), |
- symfile.symbolizable_path, |
- problem) |
- for frames in symfile.frames_by_address.itervalues(): |
- for frame in frames: |
- frame.name = unsymbolized_name |
- continue |
- |
- _SubPrintf('Symbolizing {} PCs from {}...', |
- len(symfile.frames_by_address), |
- symfile.symbolizable_path) |
- |
- symbolizer.Symbolize(symfile, unsymbolized_name) |
- |
- |
-# Matches Android library paths, supports both K (/data/app-lib/<>/lib.so) |
-# as well as L+ (/data/app/<>/lib/<>/lib.so). Library name is available |
-# via 'name' group. |
-ANDROID_PATH_MATCHER = re.compile( |
- r'^/data/(?:' |
- r'app/[^/]+/lib/[^/]+/|' |
- r'app-lib/[^/]+/|' |
- r'data/[^/]+/incremental-install-files/lib/' |
- r')(?P<name>.*\.so)') |
- |
-# Subpath of output path where unstripped libraries are stored. |
-ANDROID_UNSTRIPPED_SUBPATH = 'lib.unstripped' |
- |
- |
-def HaveFilesFromAndroid(symfiles): |
- return any(ANDROID_PATH_MATCHER.match(f.path) for f in symfiles) |
- |
- |
-def RemapAndroidFiles(symfiles, output_path): |
- for symfile in symfiles: |
- match = ANDROID_PATH_MATCHER.match(symfile.path) |
- if match: |
- name = match.group('name') |
- symfile.symbolizable_path = os.path.join( |
- output_path, ANDROID_UNSTRIPPED_SUBPATH, name) |
- else: |
- # Clobber file path to trigger "not a file" problem in SymbolizeFiles(). |
- # Without this, files won't be symbolized with "file not found" problem, |
- # which is not accurate. |
- symfile.symbolizable_path = 'android://{}'.format(symfile.path) |
- |
- |
-def RemapMacFiles(symfiles, symbol_base_directory, version): |
- suffix = ("Google Chrome Framework.dSYM/Contents/Resources/DWARF/" |
- "Google Chrome Framework") |
- symbol_sub_dir = os.path.join(symbol_base_directory, version) |
- symbolizable_path = os.path.join(symbol_sub_dir, suffix) |
- |
- for symfile in symfiles: |
- if symfile.path.endswith("Google Chrome Framework"): |
- symfile.symbolizable_path = symbolizable_path |
- |
-def RemapWinFiles(symfiles, symbol_base_directory, version, is64bit): |
- folder = "win64" if is64bit else "win" |
- symbol_sub_dir = os.path.join(symbol_base_directory, |
- "chrome-" + folder + "-" + version) |
- for symfile in symfiles: |
- image = os.path.join(symbol_sub_dir, os.path.basename(symfile.path)) |
- symbols = image + ".pdb" |
- if os.path.isfile(image) and os.path.isfile(symbols): |
- symfile.symbolizable_path = image |
- |
-def Symbolize(options, trace, symbolizer): |
- symfiles = ResolveSymbolizableFiles(trace.processes) |
- |
- # Android trace files don't have any indication they are from Android. |
- # So we're checking for Android-specific paths. |
- if HaveFilesFromAndroid(symfiles): |
- if not options.output_directory: |
- sys.exit('The trace file appears to be from Android. Please ' |
- 'specify output directory to properly symbolize it.') |
- RemapAndroidFiles(symfiles, os.path.abspath(options.output_directory)) |
- |
- |
- if not trace.is_chromium: |
- if symbolizer.is_mac: |
- RemapMacFiles(symfiles, options.symbol_base_directory, trace.version) |
- if symbolizer.is_win: |
- RemapWinFiles(symfiles, options.symbol_base_directory, trace.version, |
- trace.is_64bit) |
- |
- SymbolizeFiles(symfiles, symbolizer) |
- |
- |
-def OpenTraceFile(file_path, mode): |
- if file_path.endswith('.gz'): |
- return gzip.open(file_path, mode + 'b') |
- else: |
- return open(file_path, mode + 't') |
- |
- |
-def FetchAndExtractSymbolsMac(symbol_base_directory, version): |
- def GetLocalPath(base_dir, version): |
- return os.path.join(base_dir, version + ".tar.bz2") |
- def GetSymbolsPath(version): |
- return "desktop-*/" + version + "/mac64/Google Chrome.dSYM.tar.bz2" |
- def ExtractSymbolTarFile(symbol_sub_dir, symbol_tar_file): |
- os.makedirs(symbol_sub_dir) |
- with tarfile.open(os.path.expanduser(symbol_tar_file), "r:bz2") as tar: |
- tar.extractall(symbol_sub_dir) |
- |
- symbol_sub_dir = os.path.join(symbol_base_directory, version) |
- if os.path.isdir(symbol_sub_dir): |
- return True |
- |
- bzip_path = GetLocalPath(symbol_base_directory, version) |
- if not os.path.isfile(bzip_path): |
- |
- _CLOUD_STORAGE_BUCKET = "chrome-unsigned" |
- if not cloud_storage.Exists(_CLOUD_STORAGE_BUCKET, GetSymbolsPath(version)): |
- print "Can't find symbols on GCS." |
- return False |
- print "Downloading symbols files from GCS, please wait." |
- cloud_storage.Get(_CLOUD_STORAGE_BUCKET, GetSymbolsPath(version), bzip_path) |
- |
- ExtractSymbolTarFile(symbol_sub_dir, bzip_path) |
- return True |
- |
- |
-def FetchAndExtractSymbolsWin(symbol_base_directory, version, is64bit): |
- def DownloadAndExtractZipFile(zip_path, source, destination): |
- if not os.path.isfile(zip_path): |
- _CLOUD_STORAGE_BUCKET = "chrome-unsigned" |
- if not cloud_storage.Exists(_CLOUD_STORAGE_BUCKET, source): |
- print "Can't find symbols on GCS." |
- return False |
- print "Downloading symbols files from GCS, please wait." |
- cloud_storage.Get(_CLOUD_STORAGE_BUCKET, source, zip_path) |
- if not os.path.isfile(zip_path): |
- print "Can't download symbols on GCS." |
- return False |
- with zipfile.ZipFile(zip_path, "r") as zip: |
- for member in zip.namelist(): |
- filename = os.path.basename(member) |
- # Skip directories. |
- if not filename: |
- continue |
- # Extract archived files. |
- source = zip.open(member) |
- target = file(os.path.join(symbol_sub_dir, filename), "wb") |
- with source, target: |
- shutil.copyfileobj(source, target) |
- |
- folder = "win64" if is64bit else "win" |
- gcs_folder = "desktop-*/" + version + "/" + folder + "-pgo/" |
- |
- symbol_sub_dir = os.path.join(symbol_base_directory, |
- "chrome-" + folder + "-" + version) |
- if os.path.isdir(symbol_sub_dir): |
- return True |
- |
- os.makedirs(symbol_sub_dir) |
- DownloadAndExtractZipFile( |
- os.path.join(symbol_base_directory, |
- "chrome-" + folder + "-" + version + "-syms.zip"), |
- gcs_folder + "chrome-win32-syms.zip", |
- symbol_sub_dir) |
- DownloadAndExtractZipFile( |
- os.path.join(symbol_base_directory, |
- "chrome-" + folder + "-" + version + ".zip"), |
- gcs_folder + "chrome-" + folder + "-pgo.zip", |
- symbol_sub_dir) |
- |
- return True |
- |
-# Suffix used for backup files. |
-BACKUP_FILE_TAG = '.BACKUP' |
- |
-def main(): |
- parser = argparse.ArgumentParser() |
- parser.add_argument( |
- 'file', |
- help='Trace file to symbolize (.json or .json.gz)') |
- |
- parser.add_argument( |
- '--no-backup', dest='backup', default='true', action='store_false', |
- help="Don't create {} files".format(BACKUP_FILE_TAG)) |
- |
- parser.add_argument( |
- '--output-directory', |
- help='The path to the build output directory, such as out/Debug.') |
- |
- home_dir = os.path.expanduser('~') |
- default_dir = os.path.join(home_dir, "symbols") |
- parser.add_argument( |
- '--symbol-base-directory', |
- default=default_dir, |
- help='Directory where symbols are downloaded and cached.') |
- |
- symbolizer = Symbolizer() |
- if symbolizer.symbolizer_path is None: |
- sys.exit("Can't symbolize - no %s in PATH." % symbolizer.binary) |
- |
- options = parser.parse_args() |
- |
- trace_file_path = options.file |
- |
- print 'Reading trace file...' |
- with OpenTraceFile(trace_file_path, 'r') as trace_file: |
- trace = Trace(json.load(trace_file)) |
- |
- # Perform some sanity checks. |
- if trace.is_win and sys.platform != 'win32': |
- print "Cannot symbolize a windows trace on this architecture!" |
- return False |
- |
- # If the trace is from Chromium, assume that symbols are already present. |
- # Otherwise the trace is from Google Chrome. Assume that this is not a local |
- # build of Google Chrome with symbols, and that we need to fetch symbols |
- # from gcs. |
- if not trace.is_chromium: |
- has_symbols = False |
- if symbolizer.is_mac: |
- has_symbols = FetchAndExtractSymbolsMac(options.symbol_base_directory, |
- trace.version) |
- if symbolizer.is_win: |
- has_symbols = FetchAndExtractSymbolsWin(options.symbol_base_directory, |
- trace.version, trace.is_64bit) |
- if not has_symbols: |
- print 'Cannot fetch symbols from GCS' |
- return False |
- |
- Symbolize(options, trace, symbolizer) |
- |
- if trace.modified: |
- trace.ApplyModifications() |
- |
- if options.backup: |
- backup_file_path = trace_file_path + BACKUP_FILE_TAG |
- print 'Backing up trace file to {}'.format(backup_file_path) |
- os.rename(trace_file_path, backup_file_path) |
- |
- print 'Updating the trace file...' |
- with OpenTraceFile(trace_file_path, 'w') as trace_file: |
- json.dump(trace.node, trace_file) |
- else: |
- print 'No modifications were made - not updating the trace file.' |
- |
- |
-if __name__ == '__main__': |
- main() |