| Index: tracing/bin/symbolize_trace
 | 
| diff --git a/tracing/bin/symbolize_trace b/tracing/bin/symbolize_trace
 | 
| deleted file mode 100755
 | 
| index 50bb2c5c69edd3b856b7a71f9946215d764dfc40..0000000000000000000000000000000000000000
 | 
| --- a/tracing/bin/symbolize_trace
 | 
| +++ /dev/null
 | 
| @@ -1,1413 +0,0 @@
 | 
| -#!/usr/bin/env python
 | 
| -# Copyright 2016 The Chromium Authors. All rights reserved.
 | 
| -# Use of this source code is governed by a BSD-style license that can be
 | 
| -# found in the LICENSE file.
 | 
| -
 | 
| -"""
 | 
| -This script processes trace files and symbolizes stack frames generated by
 | 
| -Chrome's native heap profiler. This script assumes that the Chrome binary
 | 
| -referenced in the trace contains symbols, and is the same binary used to emit
 | 
| -the trace.
 | 
| -
 | 
| -=== Overview ===
 | 
| -
 | 
| -Trace file is essentially a giant JSON array of dictionaries (events).
 | 
| -Events have some predefined keys (e.g. 'pid'), but otherwise are free to
 | 
| -have anything inside. Trace file contains events from all Chrome processes
 | 
| -that were sampled during tracing period.
 | 
| -
 | 
| -This script cares only about memory dump events generated with memory-infra
 | 
| -category enabled.
 | 
| -
 | 
| -When Chrome native heap profiling is enabled, some memory dump events
 | 
| -include the following extra information:
 | 
| -
 | 
| -* (Per allocator) Information about live allocations at the moment of the
 | 
| -  memory dump (the information includes backtraces, types / categories,
 | 
| -  sizes, and counts of allocations). There are several allocators in
 | 
| -  Chrome: e.g. malloc, blink_gc, partition_alloc.
 | 
| -
 | 
| -* (Per process) Stack frame tree of all functions that called allocators
 | 
| -  above.
 | 
| -
 | 
| -This script does the following:
 | 
| -
 | 
| -1. Parses the given trace file (loads JSON).
 | 
| -2. Finds memory dump events and parses stack frame tree for each process.
 | 
| -3. Finds stack frames that have PC addresses instead of function names.
 | 
| -4. Symbolizes PCs and modifies loaded JSON.
 | 
| -5. Writes modified JSON back to the file.
 | 
| -
 | 
| -The script supports trace files from the following platforms:
 | 
| -  * Android (the script itself must be run on Linux)
 | 
| -  * Linux
 | 
| -  * macOS
 | 
| -  * Windows
 | 
| -
 | 
| -Important note - the script doesn't check that it symbolizes same binaries
 | 
| -that were used at the time trace was taken. I.e. if you take a trace, change
 | 
| -and rebuild Chrome binaries, the script will blindly use the new binaries.
 | 
| -
 | 
| -=== Details ===
 | 
| -
 | 
| -There are two formats of heap profiler information: legacy and modern. The
 | 
| -main differences relevant to this script are:
 | 
| -
 | 
| -* In the modern format the stack frame tree, type name mapping, and string
 | 
| -  mapping nodes are dumped incrementally. These nodes are dumped in each
 | 
| -  memory dump event and carry updates that occurred since the last event.
 | 
| -
 | 
| -  For example, let's say that when the first memory dump event is generated
 | 
| -  we only know about a function foo() (called from main()) allocating objects
 | 
| -  of type "int":
 | 
| -
 | 
| -  {
 | 
| -    "args": {
 | 
| -      "dumps": {
 | 
| -        "heaps_v2": {
 | 
| -          "maps": {
 | 
| -            "nodes": [
 | 
| -              { "id": 1, "name_sid": 1 },
 | 
| -              { "id": 2, "parent": 1, "name_sid": 3 },
 | 
| -            ],
 | 
| -            "types": [
 | 
| -              { "id": 1, "name_sid": 2 },
 | 
| -            ],
 | 
| -            "strings": [
 | 
| -              { "id": 1, "string": "main()" },
 | 
| -              { "id": 2, "string": "int" },
 | 
| -              { "id": 3, "string": "foo()" },
 | 
| -            ]
 | 
| -          },
 | 
| -          "allocators": { ...live allocations per allocator... },
 | 
| -          ...
 | 
| -        },
 | 
| -        ...
 | 
| -      }
 | 
| -    },
 | 
| -    ...
 | 
| -  }
 | 
| -
 | 
| -  Here:
 | 
| -    * 'nodes' node encodes stack frame tree
 | 
| -    * 'types' node encodes type name mappings
 | 
| -    * 'strings' node encodes string mapping (explained below)
 | 
| -
 | 
| -  Then, by the time second memory dump even is generated, we learn about
 | 
| -  bar() (called from main()), which also allocated "int" objects. Only the
 | 
| -  new information is dumped, i.e. bar() stack frame:
 | 
| -
 | 
| -  {
 | 
| -    "args": {
 | 
| -      "dumps": {
 | 
| -        "heaps_v2": {
 | 
| -          "maps": {
 | 
| -            "nodes": [
 | 
| -              { "id": 2, "parent": 1, "name_sid": 4 },
 | 
| -            ],
 | 
| -            "types": [],
 | 
| -            "strings": [
 | 
| -              { "id": 4, "string": "bar()" },
 | 
| -            ]
 | 
| -          },
 | 
| -          "allocators": { ...live allocations per allocator... },
 | 
| -          ...
 | 
| -        },
 | 
| -        ...
 | 
| -      }
 | 
| -    },
 | 
| -    ...
 | 
| -  }
 | 
| -
 | 
| -  Note that 'types' node is empty, since there were no updates. All three
 | 
| -  nodes ('nodes', types', and 'strings') can be empty if there were no updates
 | 
| -  to them.
 | 
| -
 | 
| -  For simplicity, when the script updates incremental nodes, it puts updated
 | 
| -  content in the first node, and clears all others. I.e. the following stack
 | 
| -  frame nodes:
 | 
| -
 | 
| -  'nodes': [
 | 
| -    { "id": 1, "name_sid": 1 },
 | 
| -    { "id": 2, "parent": 1, "name_sid": 2 },
 | 
| -  ]
 | 
| -  'nodes': [
 | 
| -    { "id": 3, "parent": 2, "name_sid": 3 },
 | 
| -  ]
 | 
| -  'nodes': [
 | 
| -    { "id": 4, "parent": 3, "name_sid": 4 },
 | 
| -    { "id": 5, "parent": 1, "name_sid": 5 },
 | 
| -  ]
 | 
| -
 | 
| -  After symbolization are written as:
 | 
| -
 | 
| -  'nodes': [
 | 
| -    { "id": 1, "name_sid": 1 },
 | 
| -    { "id": 2, "parent": 1, "name_sid": 2 },
 | 
| -    { "id": 3, "parent": 2, "name_sid": 3 },
 | 
| -    { "id": 4, "parent": 3, "name_sid": 4 },
 | 
| -    { "id": 5, "parent": 1, "name_sid": 5 },
 | 
| -  ]
 | 
| -  'nodes': []
 | 
| -  'nodes': []
 | 
| -
 | 
| -
 | 
| -* In contrast, in the legacy format stack frame tree and type mappings are
 | 
| -  dumped separately from memory dump events, once per process.
 | 
| -
 | 
| -  Here is how trace file with two memory dump events looks like in the
 | 
| -  legacy format:
 | 
| -
 | 
| -  {
 | 
| -    "args": {
 | 
| -      "dumps": {
 | 
| -        "heaps": { ...live allocations per allocator... },
 | 
| -        ...
 | 
| -      }
 | 
| -    },
 | 
| -    ...
 | 
| -  }
 | 
| -
 | 
| -  {
 | 
| -    "args": {
 | 
| -      "dumps": {
 | 
| -        "heaps": { ...live allocations per allocator... },
 | 
| -        ...
 | 
| -      }
 | 
| -    },
 | 
| -    ...
 | 
| -  }
 | 
| -
 | 
| -  {
 | 
| -    "args": {
 | 
| -      "typeNames": {
 | 
| -        1: "int",
 | 
| -      }
 | 
| -    },
 | 
| -    "cat": "__metadata",
 | 
| -    "name": "typeNames",
 | 
| -    ...
 | 
| -  }
 | 
| -
 | 
| -  {
 | 
| -    "args": {
 | 
| -      "stackFrames": {
 | 
| -        1: { "name": "main" },
 | 
| -        2: { "name": "foo", "parent": 1 },
 | 
| -        3: { "name": "bar", "parent": 1 },
 | 
| -      }
 | 
| -    },
 | 
| -    "cat": "__metadata",
 | 
| -    "name": "stackFrames",
 | 
| -    ...
 | 
| -  }
 | 
| -
 | 
| -
 | 
| -* Another change in the modern format is 'strings' node, which was added
 | 
| -  to deduplicate stack frame names (mainly for trace file size reduction).
 | 
| -  For consistency 'types' node also uses string mappings.
 | 
| -
 | 
| -
 | 
| -See crbug.com/708930 for more information about the modern format.
 | 
| -"""
 | 
| -
 | 
| -import argparse
 | 
| -import bisect
 | 
| -import collections
 | 
| -import gzip
 | 
| -import itertools
 | 
| -import json
 | 
| -import os
 | 
| -import re
 | 
| -import shutil
 | 
| -import subprocess
 | 
| -import sys
 | 
| -import tarfile
 | 
| -import zipfile
 | 
| -import tempfile
 | 
| -
 | 
| -_SYMBOLS_PATH = os.path.abspath(os.path.join(
 | 
| -    os.path.dirname(os.path.realpath(__file__)),
 | 
| -    '..',
 | 
| -    'third_party',
 | 
| -    'symbols'))
 | 
| -sys.path.append(_SYMBOLS_PATH)
 | 
| -# pylint: disable=import-error
 | 
| -import symbols.elf_symbolizer as elf_symbolizer
 | 
| -
 | 
| -import symbolize_trace_atos_regex
 | 
| -import symbolize_trace_macho_reader
 | 
| -
 | 
| -_PY_UTILS_PATH = os.path.abspath(os.path.join(
 | 
| -    os.path.dirname(os.path.realpath(__file__)),
 | 
| -    '..',
 | 
| -    '..',
 | 
| -    'common',
 | 
| -    'py_utils'))
 | 
| -sys.path.append(_PY_UTILS_PATH)
 | 
| -# pylint: disable=import-error
 | 
| -import py_utils.cloud_storage as cloud_storage
 | 
| -
 | 
| -class NodeWrapper(object):
 | 
| -  """Wraps an event data node(s).
 | 
| -
 | 
| -  A node is a reference into a trace event JSON. Wrappers parse nodes to
 | 
| -  provide convenient APIs and update nodes when asked to propagate changes
 | 
| -  back (see ApplyModifications() below).
 | 
| -
 | 
| -  Here is an example of legacy metadata event that contains stack frame tree:
 | 
| -
 | 
| -  {
 | 
| -    "args": {
 | 
| -      "stackFrames": { ... }
 | 
| -    },
 | 
| -    "cat": "__metadata",
 | 
| -    "name": "stackFrames",
 | 
| -    "ph": "M",
 | 
| -    ...
 | 
| -  }
 | 
| -
 | 
| -  When this event is encountered, a reference to the "stackFrames" dictionary
 | 
| -  is obtained and passed down to a specific wrapped class, which knows how to
 | 
| -  parse / update the dictionary.
 | 
| -
 | 
| -  There are two parsing patterns depending on whether node is serialized
 | 
| -  incrementally:
 | 
| -
 | 
| -  * If node is not incremental, then parsing is done by __init__(),
 | 
| -    see MemoryMap for an example.
 | 
| -
 | 
| -  * If node is incremental, then __init__() does nothing, and instead
 | 
| -    ParseNext() method is called when next node (from a next event) is
 | 
| -    encountered.
 | 
| -
 | 
| -  Some wrappers can also modify nodes they parsed. In such cases they have
 | 
| -  additional APIs:
 | 
| -
 | 
| -  * 'modified' flag, which indicates whether the wrapper was changed.
 | 
| -
 | 
| -  * 'ApplyModifications' method, which propagates changes made to the wrapper
 | 
| -    back to nodes. Successful invocation of ApplyModifications() resets
 | 
| -    'modified' flag.
 | 
| -
 | 
| -  """
 | 
| -  pass
 | 
| -
 | 
| -
 | 
| -class MemoryMap(NodeWrapper):
 | 
| -  """Wraps 'process_mmaps' node.
 | 
| -
 | 
| -  'process_mmaps' node contains information about file mappings.
 | 
| -
 | 
| -  "process_mmaps": {
 | 
| -    "vm_regions": [
 | 
| -      {
 | 
| -        "mf": "<file_path>",
 | 
| -        "sa": "<start_address>",
 | 
| -        "sz": "<size>",
 | 
| -        ...
 | 
| -      },
 | 
| -      ...
 | 
| -    ]
 | 
| -  }
 | 
| -  """
 | 
| -
 | 
| -  class Region(object):
 | 
| -    def __init__(self, start_address, size, file_path):
 | 
| -      self._start_address = start_address
 | 
| -      self._size = size
 | 
| -      self._file_path = file_path
 | 
| -
 | 
| -    @property
 | 
| -    def start_address(self):
 | 
| -      return self._start_address
 | 
| -
 | 
| -    @property
 | 
| -    def end_address(self):
 | 
| -      return self._start_address + self._size
 | 
| -
 | 
| -    @property
 | 
| -    def size(self):
 | 
| -      return self._size
 | 
| -
 | 
| -    @property
 | 
| -    def file_path(self):
 | 
| -      return self._file_path
 | 
| -
 | 
| -    def __cmp__(self, other):
 | 
| -      if isinstance(other, type(self)):
 | 
| -        other_start_address = other._start_address
 | 
| -      elif isinstance(other, (long, int)):
 | 
| -        other_start_address = other
 | 
| -      else:
 | 
| -        raise Exception('Cannot compare with %s' % type(other))
 | 
| -      if self._start_address < other_start_address:
 | 
| -        return -1
 | 
| -      elif self._start_address > other_start_address:
 | 
| -        return 1
 | 
| -      else:
 | 
| -        return 0
 | 
| -
 | 
| -    def __repr__(self):
 | 
| -      return 'Region(0x{:X} - 0x{:X}, {})'.format(
 | 
| -          self.start_address, self.end_address, self.file_path)
 | 
| -
 | 
| -  def __init__(self, process_mmaps_node):
 | 
| -    regions = []
 | 
| -    for region_node in process_mmaps_node['vm_regions']:
 | 
| -      regions.append(self.Region(
 | 
| -          long(region_node['sa'], 16),
 | 
| -          long(region_node['sz'], 16),
 | 
| -          region_node['mf']))
 | 
| -    regions.sort()
 | 
| -
 | 
| -    # Copy regions without duplicates and check for overlaps.
 | 
| -    self._regions = []
 | 
| -    previous_region = None
 | 
| -    for region in regions:
 | 
| -      if previous_region is not None:
 | 
| -        if region == previous_region:
 | 
| -          continue
 | 
| -        assert region.start_address >= previous_region.end_address, \
 | 
| -            'Regions {} and {} overlap.'.format(previous_region, region)
 | 
| -      previous_region = region
 | 
| -      self._regions.append(region)
 | 
| -
 | 
| -  @property
 | 
| -  def regions(self):
 | 
| -    return self._regions
 | 
| -
 | 
| -  def FindRegion(self, address):
 | 
| -    """Finds region containing |address|. Returns None if none found."""
 | 
| -
 | 
| -    region_index = bisect.bisect_right(self._regions, address) - 1
 | 
| -    if region_index >= 0:
 | 
| -      region = self._regions[region_index]
 | 
| -      if address >= region.start_address and address < region.end_address:
 | 
| -        return region
 | 
| -    return None
 | 
| -
 | 
| -
 | 
| -class UnsupportedHeapDumpVersionError(Exception):
 | 
| -  """Helper exception class to signal unsupported heap dump version."""
 | 
| -
 | 
| -  def __init__(self, version):
 | 
| -    message = 'Unsupported heap dump version: {}'.format(version)
 | 
| -    super(UnsupportedHeapDumpVersionError, self).__init__(message)
 | 
| -
 | 
| -
 | 
| -class StringMap(NodeWrapper):
 | 
| -  """Wraps all 'strings' nodes for a process.
 | 
| -
 | 
| -  'strings' node contains incremental mappings between integer ids and strings.
 | 
| -
 | 
| -  "strings": [
 | 
| -    {
 | 
| -      "id": <string_id>,
 | 
| -      "string": <string>
 | 
| -    },
 | 
| -    ...
 | 
| -  ]
 | 
| -  """
 | 
| -
 | 
| -  def __init__(self):
 | 
| -    self._modified = False
 | 
| -    self._strings_nodes = []
 | 
| -    self._string_by_id = {}
 | 
| -    self._id_by_string = {}
 | 
| -    self._max_string_id = 0
 | 
| -
 | 
| -  @property
 | 
| -  def modified(self):
 | 
| -    """Returns True if the wrapper was modified (see NodeWrapper)."""
 | 
| -    return self._modified
 | 
| -
 | 
| -  @property
 | 
| -  def string_by_id(self):
 | 
| -    return self._string_by_id
 | 
| -
 | 
| -  def ParseNext(self, heap_dump_version, strings_node):
 | 
| -    """Parses and interns next node (see NodeWrapper)."""
 | 
| -
 | 
| -    if heap_dump_version != Trace.HEAP_DUMP_VERSION_1:
 | 
| -      raise UnsupportedHeapDumpVersionError(heap_dump_version)
 | 
| -
 | 
| -    self._strings_nodes.append(strings_node)
 | 
| -    for string_node in strings_node:
 | 
| -      self._Insert(string_node['id'], string_node['string'])
 | 
| -
 | 
| -  def Clear(self):
 | 
| -    """Clears all string mappings."""
 | 
| -    if self._string_by_id:
 | 
| -      self._modified = True
 | 
| -      # ID #0 means 'no entry' and must always be present. Carry it over.
 | 
| -      null_string = self._string_by_id[0]
 | 
| -      self._string_by_id = {}
 | 
| -      self._id_by_string = {}
 | 
| -      self._Insert(0, null_string)
 | 
| -      self._max_string_id = 0
 | 
| -
 | 
| -  def AddString(self, string):
 | 
| -    """Adds a string (if it doesn't exist) and returns its integer id."""
 | 
| -    string_id = self._id_by_string.get(string)
 | 
| -    if string_id is None:
 | 
| -      string_id = self._max_string_id + 1
 | 
| -      self._Insert(string_id, string)
 | 
| -      self._modified = True
 | 
| -    return string_id
 | 
| -
 | 
| -  def ApplyModifications(self):
 | 
| -    """Propagates modifications back to nodes (see NodeWrapper)."""
 | 
| -    if not self.modified:
 | 
| -      return
 | 
| -
 | 
| -    assert self._strings_nodes, 'no nodes'
 | 
| -
 | 
| -    # Serialize into the first node, and clear all others.
 | 
| -
 | 
| -    for strings_node in self._strings_nodes:
 | 
| -      del strings_node[:]
 | 
| -    strings_node = self._strings_nodes[0]
 | 
| -    for string_id, string in self._string_by_id.iteritems():
 | 
| -      strings_node.append({'id': string_id, 'string': string})
 | 
| -
 | 
| -    self._modified = False
 | 
| -
 | 
| -  def _Insert(self, string_id, string):
 | 
| -    self._id_by_string[string] = string_id
 | 
| -    self._string_by_id[string_id] = string
 | 
| -    self._max_string_id = max(self._max_string_id, string_id)
 | 
| -
 | 
| -
 | 
| -class TypeNameMap(NodeWrapper):
 | 
| -  """Wraps all 'types' nodes for a process.
 | 
| -
 | 
| -  'types' nodes encode mappings between integer type ids and integer
 | 
| -  string ids (from 'strings' nodes).
 | 
| -
 | 
| -  "types": [
 | 
| -    {
 | 
| -      "id": <type_id>,
 | 
| -      "name_sid": <name_string_id>
 | 
| -    }
 | 
| -    ...
 | 
| -  ]
 | 
| -
 | 
| -  For simplicity string ids are translated into strings during parsing,
 | 
| -  and then translated back to ids in ApplyModifications().
 | 
| -  """
 | 
| -  def __init__(self):
 | 
| -    self._modified = False
 | 
| -    self._type_name_nodes = []
 | 
| -    self._name_by_id = {}
 | 
| -    self._id_by_name = {}
 | 
| -    self._max_type_id = 0
 | 
| -
 | 
| -  @property
 | 
| -  def modified(self):
 | 
| -    """Returns True if the wrapper was modified (see NodeWrapper)."""
 | 
| -    return self._modified
 | 
| -
 | 
| -  @property
 | 
| -  def name_by_id(self):
 | 
| -    """Returns {id -> name} dict (must not be changed directly)."""
 | 
| -    return self._name_by_id
 | 
| -
 | 
| -  def ParseNext(self, heap_dump_version, type_name_node, string_map):
 | 
| -    """Parses and interns next node (see NodeWrapper).
 | 
| -
 | 
| -    |string_map| - A StringMap object to use to translate string ids
 | 
| -                   to strings.
 | 
| -    """
 | 
| -    if heap_dump_version != Trace.HEAP_DUMP_VERSION_1:
 | 
| -      raise UnsupportedHeapDumpVersionError(heap_dump_version)
 | 
| -
 | 
| -    self._type_name_nodes.append(type_name_node)
 | 
| -    for type_node in type_name_node:
 | 
| -      self._Insert(type_node['id'],
 | 
| -                   string_map.string_by_id[type_node['name_sid']])
 | 
| -
 | 
| -  def AddType(self, type_name):
 | 
| -    """Adds a type name (if it doesn't exist) and returns its id."""
 | 
| -    type_id = self._id_by_name.get(type_name)
 | 
| -    if type_id is None:
 | 
| -      type_id = self._max_type_id + 1
 | 
| -      self._Insert(type_id, type_name)
 | 
| -      self._modified = True
 | 
| -    return type_id
 | 
| -
 | 
| -  def ApplyModifications(self, string_map, force=False):
 | 
| -    """Propagates modifications back to nodes.
 | 
| -
 | 
| -    |string_map| - A StringMap object to use to translate strings to ids.
 | 
| -    |force| - Whether to propagate changes regardless of 'modified' flag.
 | 
| -    """
 | 
| -    if not self.modified and not force:
 | 
| -      return
 | 
| -
 | 
| -    assert self._type_name_nodes, 'no nodes'
 | 
| -
 | 
| -    # Serialize into the first node, and clear all others.
 | 
| -
 | 
| -    for types_node in self._type_name_nodes:
 | 
| -      del types_node[:]
 | 
| -    types_node = self._type_name_nodes[0]
 | 
| -    for type_id, type_name in self._name_by_id.iteritems():
 | 
| -      types_node.append({
 | 
| -          'id': type_id,
 | 
| -          'name_sid': string_map.AddString(type_name)})
 | 
| -
 | 
| -    self._modified = False
 | 
| -
 | 
| -  def _Insert(self, type_id, type_name):
 | 
| -    self._id_by_name[type_name] = type_id
 | 
| -    self._name_by_id[type_id] = type_name
 | 
| -    self._max_type_id = max(self._max_type_id, type_id)
 | 
| -
 | 
| -
 | 
| -class StackFrameMap(NodeWrapper):
 | 
| -  """ Wraps stack frame tree nodes for a process.
 | 
| -
 | 
| -  For the legacy format this wrapper expects a single 'stackFrames' node
 | 
| -  (which comes from metadata event):
 | 
| -
 | 
| -  "stackFrames": {
 | 
| -    "<frame_id>": {
 | 
| -      "name": "<frame_name>"
 | 
| -      "parent": "<parent_frame_id>"
 | 
| -    },
 | 
| -    ...
 | 
| -  }
 | 
| -
 | 
| -  For the modern format this wrapper expects several 'nodes' nodes:
 | 
| -
 | 
| -  "nodes": [
 | 
| -    {
 | 
| -      "id": <frame_id>,
 | 
| -      "parent": <parent_frame_id>,
 | 
| -      "name_sid": <name_string_id>
 | 
| -    },
 | 
| -    ...
 | 
| -  ]
 | 
| -
 | 
| -  In both formats frame name is a string. Native heap profiler generates
 | 
| -  specially formatted frame names (e.g. "pc:10eb78dba") for function
 | 
| -  addresses (PCs). Inner Frame class below parses name and extracts PC,
 | 
| -  if it's there.
 | 
| -  """
 | 
| -  class Frame(object):
 | 
| -    def __init__(self, frame_id, name, parent_frame_id):
 | 
| -      self._modified = False
 | 
| -      self._id = frame_id
 | 
| -      self._name = name
 | 
| -      self._pc = self._ParsePC(name)
 | 
| -      self._parent_id = parent_frame_id
 | 
| -      self._ext = None
 | 
| -
 | 
| -    @property
 | 
| -    def modified(self):
 | 
| -      """Returns True if the frame was modified.
 | 
| -
 | 
| -      For example changing frame's name sets this flag (since the change
 | 
| -      needs to be propagated back to nodes).
 | 
| -      """
 | 
| -      return self._modified
 | 
| -
 | 
| -    @property
 | 
| -    def id(self):
 | 
| -      """Frame id (integer)."""
 | 
| -      return self._id
 | 
| -
 | 
| -    @property
 | 
| -    def pc(self):
 | 
| -      """Parsed (integer) PC of the frame, or None."""
 | 
| -      return self._pc
 | 
| -
 | 
| -    @property
 | 
| -    def name(self):
 | 
| -      """Name of the frame (see above)."""
 | 
| -      return self._name
 | 
| -
 | 
| -    @name.setter
 | 
| -    def name(self, value):
 | 
| -      """Changes the name. Doesn't affect value of |pc|."""
 | 
| -      self._modified = True
 | 
| -      self._name = value
 | 
| -
 | 
| -    @property
 | 
| -    def parent_id(self):
 | 
| -      """Parent frame id (integer)."""
 | 
| -      return self._parent_id
 | 
| -
 | 
| -    _PC_TAG = 'pc:'
 | 
| -
 | 
| -    def _ParsePC(self, name):
 | 
| -      if not name.startswith(self._PC_TAG):
 | 
| -        return None
 | 
| -      return long(name[len(self._PC_TAG):], 16)
 | 
| -
 | 
| -    def _ClearModified(self):
 | 
| -      self._modified = False
 | 
| -
 | 
| -  def __init__(self):
 | 
| -    self._modified = False
 | 
| -    self._heap_dump_version = None
 | 
| -    self._stack_frames_nodes = []
 | 
| -    self._frame_by_id = {}
 | 
| -
 | 
| -  @property
 | 
| -  def modified(self):
 | 
| -    """Returns True if the wrapper or any of its frames were modified."""
 | 
| -    return (self._modified or
 | 
| -            any(f.modified for f in self._frame_by_id.itervalues()))
 | 
| -
 | 
| -  @property
 | 
| -  def frame_by_id(self):
 | 
| -    """Returns {id -> frame} dict (must not be modified directly)."""
 | 
| -    return self._frame_by_id
 | 
| -
 | 
| -  def ParseNext(self, heap_dump_version, stack_frames_node, string_map):
 | 
| -    """Parses the next stack frames node (see NodeWrapper).
 | 
| -
 | 
| -    For the modern format |string_map| is used to translate string ids
 | 
| -    to strings.
 | 
| -    """
 | 
| -
 | 
| -    frame_by_id = {}
 | 
| -    if heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY:
 | 
| -      if self._stack_frames_nodes:
 | 
| -        raise Exception('Legacy stack frames node is expected only once.')
 | 
| -      for frame_id, frame_node in stack_frames_node.iteritems():
 | 
| -        frame = self.Frame(frame_id,
 | 
| -                           frame_node['name'],
 | 
| -                           frame_node.get('parent'))
 | 
| -        frame_by_id[frame.id] = frame
 | 
| -    else:
 | 
| -      if heap_dump_version != Trace.HEAP_DUMP_VERSION_1:
 | 
| -        raise UnsupportedHeapDumpVersionError(heap_dump_version)
 | 
| -      for frame_node in stack_frames_node:
 | 
| -        frame = self.Frame(frame_node['id'],
 | 
| -                           string_map.string_by_id[frame_node['name_sid']],
 | 
| -                           frame_node.get('parent'))
 | 
| -        frame_by_id[frame.id] = frame
 | 
| -
 | 
| -    self._heap_dump_version = heap_dump_version
 | 
| -    self._stack_frames_nodes.append(stack_frames_node)
 | 
| -
 | 
| -    self._frame_by_id.update(frame_by_id)
 | 
| -
 | 
| -  def ApplyModifications(self, string_map, force=False):
 | 
| -    """Applies modifications back to nodes (see NodeWrapper)."""
 | 
| -
 | 
| -    if not self.modified and not force:
 | 
| -      return
 | 
| -
 | 
| -    assert self._stack_frames_nodes, 'no nodes'
 | 
| -    if self._heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY:
 | 
| -      assert string_map is None, \
 | 
| -          'string_map should not be used with the legacy format'
 | 
| -
 | 
| -    # Serialize frames into the first node, clear all others.
 | 
| -
 | 
| -    for frames_node in self._stack_frames_nodes:
 | 
| -      if self._heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY:
 | 
| -        frames_node.clear()
 | 
| -      else:
 | 
| -        del frames_node[:]
 | 
| -
 | 
| -    frames_node = self._stack_frames_nodes[0]
 | 
| -    for frame in self._frame_by_id.itervalues():
 | 
| -      if self._heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY:
 | 
| -        frame_node = {'name': frame.name}
 | 
| -        frames_node[frame.id] = frame_node
 | 
| -      else:
 | 
| -        frame_node = {
 | 
| -            'id': frame.id,
 | 
| -            'name_sid': string_map.AddString(frame.name)
 | 
| -        }
 | 
| -        frames_node.append(frame_node)
 | 
| -      if frame.parent_id is not None:
 | 
| -        frame_node['parent'] = frame.parent_id
 | 
| -      frame._ClearModified()
 | 
| -
 | 
| -    self._modified = False
 | 
| -
 | 
| -
 | 
| -class Trace(NodeWrapper):
 | 
| -  """Wrapper for the root trace node (i.e. the trace JSON itself).
 | 
| -
 | 
| -  This wrapper parses select nodes from memory-infra events and groups
 | 
| -  parsed data per-process (see inner Process class below).
 | 
| -  """
 | 
| -
 | 
| -  # Indicates legacy heap dump format.
 | 
| -  HEAP_DUMP_VERSION_LEGACY = 'Legacy'
 | 
| -
 | 
| -  # Indicates variation of a modern heap dump format.
 | 
| -  HEAP_DUMP_VERSION_1 = 1
 | 
| -
 | 
| -  class Process(object):
 | 
| -    """Collection of per-process data and wrappers."""
 | 
| -
 | 
| -    def __init__(self, pid):
 | 
| -      self._pid = pid
 | 
| -      self._name = None
 | 
| -      self._memory_map = None
 | 
| -      self._stack_frame_map = StackFrameMap()
 | 
| -      self._type_name_map = TypeNameMap()
 | 
| -      self._string_map = StringMap()
 | 
| -      self._heap_dump_version = None
 | 
| -
 | 
| -    @property
 | 
| -    def modified(self):
 | 
| -      return self._stack_frame_map.modified or self._type_name_map.modified
 | 
| -
 | 
| -    @property
 | 
| -    def pid(self):
 | 
| -      return self._pid
 | 
| -
 | 
| -    @property
 | 
| -    def name(self):
 | 
| -      return self._name
 | 
| -
 | 
| -    @property
 | 
| -    def unique_name(self):
 | 
| -      """Returns string that includes both process name and its pid."""
 | 
| -      name = self._name if self._name else 'UnnamedProcess'
 | 
| -      return '{}({})'.format(name, self._pid)
 | 
| -
 | 
| -    @property
 | 
| -    def memory_map(self):
 | 
| -      return self._memory_map
 | 
| -
 | 
| -    @property
 | 
| -    def stack_frame_map(self):
 | 
| -      return self._stack_frame_map
 | 
| -
 | 
| -    @property
 | 
| -    def type_name_map(self):
 | 
| -      return self._type_name_map
 | 
| -
 | 
| -    def ApplyModifications(self):
 | 
| -      """Calls ApplyModifications() on contained wrappers."""
 | 
| -      if self._heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY:
 | 
| -        self._stack_frame_map.ApplyModifications(None)
 | 
| -      else:
 | 
| -        if self._stack_frame_map.modified or self._type_name_map.modified:
 | 
| -          self._string_map.Clear()
 | 
| -          self._stack_frame_map.ApplyModifications(self._string_map, force=True)
 | 
| -          self._type_name_map.ApplyModifications(self._string_map, force=True)
 | 
| -          self._string_map.ApplyModifications()
 | 
| -
 | 
| -  def __init__(self, trace_node):
 | 
| -    self._trace_node = trace_node
 | 
| -    self._processes = []
 | 
| -    self._heap_dump_version = None
 | 
| -    self._version = None
 | 
| -    self._is_chromium = True
 | 
| -    self._is_64bit = False
 | 
| -    self._is_win = False
 | 
| -    self._is_mac = False
 | 
| -
 | 
| -    # Misc per-process information needed only during parsing.
 | 
| -    class ProcessExt(object):
 | 
| -      def __init__(self, pid):
 | 
| -        self.process = Trace.Process(pid)
 | 
| -        self.mapped_entry_names = set()
 | 
| -        self.process_mmaps_node = None
 | 
| -        self.seen_strings_node = False
 | 
| -
 | 
| -    process_ext_by_pid = {}
 | 
| -
 | 
| -    if isinstance(trace_node, dict):
 | 
| -      metadata = trace_node['metadata']
 | 
| -      product_version = metadata['product-version']
 | 
| -      # product-version has the form "Chrome/60.0.3103.0"
 | 
| -      self._version = product_version.split('/', 1)[-1]
 | 
| -
 | 
| -      command_line = metadata['command_line']
 | 
| -      self._is_win = re.search('windows', metadata['os-name'] , re.IGNORECASE)
 | 
| -      self._is_mac = re.search('mac', metadata['os-name'] , re.IGNORECASE)
 | 
| -
 | 
| -      if self._is_win:
 | 
| -        self._is_chromium = (
 | 
| -          not re.search('Chrome SxS\\\\Application\\\\chrome.exe', command_line,
 | 
| -                        re.IGNORECASE) and
 | 
| -          not re.search('Chrome\\\\Application\\\\chrome.exe', command_line,
 | 
| -                        re.IGNORECASE))
 | 
| -      if self._is_mac:
 | 
| -        self._is_chromium = re.search('chromium', command_line, re.IGNORECASE)
 | 
| -
 | 
| -      self._is_64bit = (
 | 
| -        re.search('x86_64', metadata['os-arch'] , re.IGNORECASE) and
 | 
| -        not re.search('WOW64', metadata['user-agent'] , re.IGNORECASE))
 | 
| -
 | 
| -    # Android traces produced via 'chrome://inspect/?tracing#devices' are
 | 
| -    # just list of events.
 | 
| -    events = trace_node if isinstance(trace_node, list) \
 | 
| -             else trace_node['traceEvents']
 | 
| -    for event in events:
 | 
| -      name = event.get('name')
 | 
| -      if not name:
 | 
| -        continue
 | 
| -
 | 
| -      pid = event['pid']
 | 
| -      process_ext = process_ext_by_pid.get(pid)
 | 
| -      if process_ext is None:
 | 
| -        process_ext = ProcessExt(pid)
 | 
| -        process_ext_by_pid[pid] = process_ext
 | 
| -      process = process_ext.process
 | 
| -
 | 
| -      phase = event['ph']
 | 
| -      if phase == self._EVENT_PHASE_METADATA:
 | 
| -        if name == 'process_name':
 | 
| -          process._name = event['args']['name']
 | 
| -        elif name == 'stackFrames':
 | 
| -          process._stack_frame_map.ParseNext(
 | 
| -              self._UseHeapDumpVersion(self.HEAP_DUMP_VERSION_LEGACY),
 | 
| -              event['args']['stackFrames'],
 | 
| -              process._string_map)
 | 
| -      elif phase == self._EVENT_PHASE_MEMORY_DUMP:
 | 
| -        dumps = event['args']['dumps']
 | 
| -        process_mmaps = dumps.get('process_mmaps')
 | 
| -        if process_mmaps:
 | 
| -          # We want the most recent memory map, so parsing happens later
 | 
| -          # once we finished reading all events.
 | 
| -          process_ext.process_mmaps_node = process_mmaps
 | 
| -        heaps = dumps.get('heaps_v2')
 | 
| -        if heaps:
 | 
| -          version = self._UseHeapDumpVersion(heaps['version'])
 | 
| -          maps = heaps.get('maps')
 | 
| -          if maps:
 | 
| -            process_ext.mapped_entry_names.update(maps.iterkeys())
 | 
| -            types = maps.get('types')
 | 
| -            stack_frames = maps.get('nodes')
 | 
| -            strings = maps.get('strings')
 | 
| -            if (strings is None and (types or stack_frames)
 | 
| -                and not process_ext.seen_strings_node):
 | 
| -              # ApplyModifications() for TypeNameMap and StackFrameMap puts
 | 
| -              # everything into the first node and depends on StringMap. So
 | 
| -              # we need to make sure that 'strings' node is there if any of
 | 
| -              # other two nodes present.
 | 
| -              strings = []
 | 
| -              maps['strings'] = strings
 | 
| -            if strings is not None:
 | 
| -              process_ext.seen_strings_node = True
 | 
| -              process._string_map.ParseNext(version, strings)
 | 
| -            if types:
 | 
| -              process._type_name_map.ParseNext(
 | 
| -                  version, types, process._string_map)
 | 
| -            if stack_frames:
 | 
| -              process._stack_frame_map.ParseNext(
 | 
| -                  version, stack_frames, process._string_map)
 | 
| -
 | 
| -    self._processes = []
 | 
| -    for pe in process_ext_by_pid.itervalues():
 | 
| -      pe.process._heap_dump_version = self._heap_dump_version
 | 
| -      if pe.process_mmaps_node:
 | 
| -        # Now parse the most recent memory map.
 | 
| -        pe.process._memory_map = MemoryMap(pe.process_mmaps_node)
 | 
| -      self._processes.append(pe.process)
 | 
| -
 | 
| -  @property
 | 
| -  def node(self):
 | 
| -    """Root node (that was passed to the __init__)."""
 | 
| -    return self._trace_node
 | 
| -
 | 
| -  @property
 | 
| -  def modified(self):
 | 
| -    """Returns True if trace file needs to be updated.
 | 
| -
 | 
| -    Before writing trace JSON back to a file ApplyModifications() needs
 | 
| -    to be called.
 | 
| -    """
 | 
| -    return any(p.modified for p in self._processes)
 | 
| -
 | 
| -  @property
 | 
| -  def processes(self):
 | 
| -    return self._processes
 | 
| -
 | 
| -  @property
 | 
| -  def heap_dump_version(self):
 | 
| -    return self._heap_dump_version
 | 
| -
 | 
| -  @property
 | 
| -  def version(self):
 | 
| -    return self._version
 | 
| -
 | 
| -  @property
 | 
| -  def is_chromium(self):
 | 
| -    return self._is_chromium
 | 
| -
 | 
| -  @property
 | 
| -  def is_mac(self):
 | 
| -    return self._is_mac
 | 
| -
 | 
| -  @property
 | 
| -  def is_win(self):
 | 
| -    return self._is_win
 | 
| -
 | 
| -  @property
 | 
| -  def is_64bit(self):
 | 
| -    return self._is_64bit
 | 
| -
 | 
| -  def ApplyModifications(self):
 | 
| -    """Propagates modifications back to the trace JSON."""
 | 
| -    for process in self._processes:
 | 
| -      process.ApplyModifications()
 | 
| -    assert not self.modified, 'still modified'
 | 
| -
 | 
| -  # Relevant trace event phases from Chromium's
 | 
| -  # src/base/trace_event/common/trace_event_common.h.
 | 
| -  _EVENT_PHASE_METADATA = 'M'
 | 
| -  _EVENT_PHASE_MEMORY_DUMP = 'v'
 | 
| -
 | 
| -  def _UseHeapDumpVersion(self, version):
 | 
| -    if self._heap_dump_version is None:
 | 
| -      self._heap_dump_version = version
 | 
| -      return version
 | 
| -    elif self._heap_dump_version != version:
 | 
| -      raise Exception(
 | 
| -          ("Inconsistent trace file: first saw '{}' heap dump version, "
 | 
| -           "then '{}'.").format(self._heap_dump_version, version))
 | 
| -    else:
 | 
| -      return version
 | 
| -
 | 
| -
 | 
| -class SymbolizableFile(object):
 | 
| -  """Holds file path, addresses to symbolize and stack frames to update.
 | 
| -
 | 
| -  This class is a link between ELFSymbolizer and a trace file: it specifies
 | 
| -  what to symbolize (addresses) and what to update with the symbolization
 | 
| -  result (frames).
 | 
| -  """
 | 
| -  def __init__(self, file_path):
 | 
| -    self.path = file_path
 | 
| -    self.symbolizable_path = file_path # path to use for symbolization
 | 
| -    self.frames_by_address = collections.defaultdict(list)
 | 
| -
 | 
| -
 | 
| -def ResolveSymbolizableFiles(processes):
 | 
| -  """Resolves and groups PCs into list of SymbolizableFiles.
 | 
| -
 | 
| -  As part of the grouping process, this function resolves PC from each stack
 | 
| -  frame to the corresponding mmap region. Stack frames that failed to resolve
 | 
| -  are symbolized with '<unresolved>'.
 | 
| -  """
 | 
| -  symfile_by_path = {}
 | 
| -  for process in processes:
 | 
| -    if not process.memory_map:
 | 
| -      continue
 | 
| -    for frame in process.stack_frame_map.frame_by_id.itervalues():
 | 
| -      if frame.pc is None:
 | 
| -        continue
 | 
| -      region = process.memory_map.FindRegion(frame.pc)
 | 
| -      if region is None:
 | 
| -        frame.name = '<unresolved>'
 | 
| -        continue
 | 
| -
 | 
| -      symfile = symfile_by_path.get(region.file_path)
 | 
| -      if symfile is None:
 | 
| -        symfile = SymbolizableFile(region.file_path)
 | 
| -        symfile_by_path[symfile.path] = symfile
 | 
| -
 | 
| -      relative_pc = frame.pc - region.start_address
 | 
| -      symfile.frames_by_address[relative_pc].append(frame)
 | 
| -  return symfile_by_path.values()
 | 
| -
 | 
| -
 | 
| -def FindInSystemPath(binary_name):
 | 
| -  paths = os.environ['PATH'].split(os.pathsep)
 | 
| -  for path in paths:
 | 
| -    binary_path = os.path.join(path, binary_name)
 | 
| -    if os.path.isfile(binary_path):
 | 
| -      return binary_path
 | 
| -  return None
 | 
| -
 | 
| -
 | 
| -class Symbolizer(object):
 | 
| -  """Encapsulates platform-specific symbolization logic."""
 | 
| -
 | 
| -  def __init__(self):
 | 
| -    self.is_mac = sys.platform == 'darwin'
 | 
| -    self.is_win = sys.platform == 'win32'
 | 
| -    if self.is_mac:
 | 
| -      self.binary = 'atos'
 | 
| -      self._matcher = symbolize_trace_atos_regex.AtosRegexMatcher()
 | 
| -    elif self.is_win:
 | 
| -      self.binary = 'addr2line-pdb.exe'
 | 
| -    else:
 | 
| -      self.binary = 'addr2line'
 | 
| -    self.symbolizer_path = FindInSystemPath(self.binary)
 | 
| -
 | 
| -  def _SymbolizeLinuxAndAndroid(self, symfile, unsymbolized_name):
 | 
| -    def _SymbolizerCallback(sym_info, frames):
 | 
| -      # Unwind inline chain to the top.
 | 
| -      while sym_info.inlined_by:
 | 
| -        sym_info = sym_info.inlined_by
 | 
| -
 | 
| -      symbolized_name = sym_info.name if sym_info.name else unsymbolized_name
 | 
| -      for frame in frames:
 | 
| -        frame.name = symbolized_name
 | 
| -
 | 
| -    symbolizer = elf_symbolizer.ELFSymbolizer(symfile.symbolizable_path,
 | 
| -                                              self.symbolizer_path,
 | 
| -                                              _SymbolizerCallback,
 | 
| -                                              inlines=True)
 | 
| -
 | 
| -    for address, frames in symfile.frames_by_address.iteritems():
 | 
| -      # SymbolizeAsync() asserts that the type of address is int. We operate
 | 
| -      # on longs (since they are raw pointers possibly from 64-bit processes).
 | 
| -      # It's OK to cast here because we're passing relative PC, which should
 | 
| -      # always fit into int.
 | 
| -      symbolizer.SymbolizeAsync(int(address), frames)
 | 
| -
 | 
| -    symbolizer.Join()
 | 
| -
 | 
| -
 | 
| -  def _SymbolizeMac(self, symfile):
 | 
| -    load_address = (symbolize_trace_macho_reader.
 | 
| -        ReadMachOTextLoadAddress(symfile.symbolizable_path))
 | 
| -    assert load_address is not None
 | 
| -
 | 
| -    address_os_file, address_file_path = tempfile.mkstemp()
 | 
| -    try:
 | 
| -      with os.fdopen(address_os_file, 'w') as address_file:
 | 
| -        for address in symfile.frames_by_address.iterkeys():
 | 
| -          address_file.write('{:x} '.format(address + load_address))
 | 
| -
 | 
| -      cmd = [self.symbolizer_path, '-arch', 'x86_64', '-l',
 | 
| -             '0x%x' % load_address, '-o', symfile.symbolizable_path,
 | 
| -             '-f', address_file_path]
 | 
| -      output_array = subprocess.check_output(cmd).split('\n')
 | 
| -
 | 
| -      for i, frames in enumerate(symfile.frames_by_address.itervalues()):
 | 
| -        symbolized_name = self._matcher.Match(output_array[i])
 | 
| -        for frame in frames:
 | 
| -          frame.name = symbolized_name
 | 
| -    finally:
 | 
| -      os.remove(address_file_path)
 | 
| -
 | 
| -  def _SymbolizeWin(self, symfile):
 | 
| -    """Invoke symbolizer binary on windows and write all input in one go.
 | 
| -
 | 
| -    Unlike linux, on windows, symbolization talks through a shared system
 | 
| -    service that handles communication with the NT symbol servers. This
 | 
| -    creates an explicit serialization (and therefor lock contention) of
 | 
| -    any process using the symbol API for files do not have a local PDB.
 | 
| -
 | 
| -    Thus, even though the windows symbolizer binary can be make command line
 | 
| -    compatible with the POSIX addr2line interface, parallelizing the
 | 
| -    symbolization does not yield the same performance effects. Running
 | 
| -    just one symbolizer seems good enough for now. Can optimize later
 | 
| -    if this becomes a bottleneck.
 | 
| -    """
 | 
| -    cmd = [self.symbolizer_path, '--functions', '--demangle', '--exe',
 | 
| -                symfile.symbolizable_path]
 | 
| -
 | 
| -    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stdin=subprocess.PIPE,
 | 
| -            stderr=sys.stderr)
 | 
| -    addrs = ["%x" % relative_pc for relative_pc in
 | 
| -             symfile.frames_by_address.keys()]
 | 
| -    (stdout_data, stderr_data) = proc.communicate('\n'.join(addrs))
 | 
| -    stdout_data = stdout_data.split('\n')
 | 
| -
 | 
| -    # This is known to be in the same order as stderr_data.
 | 
| -    for i, addr in enumerate(addrs):
 | 
| -        for frame in  symfile.frames_by_address[int(addr, 16)]:
 | 
| -            # Output of addr2line with --functions is always 2 outputs per
 | 
| -            # symbol, function name followed by source line number. Only grab
 | 
| -            # the function name as line info is not always available.
 | 
| -            frame.name = stdout_data[i * 2]
 | 
| -
 | 
| -  def Symbolize(self, symfile, unsymbolized_name):
 | 
| -    if self.is_mac:
 | 
| -      self._SymbolizeMac(symfile)
 | 
| -    elif self.is_win:
 | 
| -      self._SymbolizeWin(symfile)
 | 
| -    else:
 | 
| -      self._SymbolizeLinuxAndAndroid(symfile, unsymbolized_name)
 | 
| -
 | 
| -  def IsSymbolizableFile(self, file_path):
 | 
| -    if self.is_win:
 | 
| -      extension = os.path.splitext(file_path)[1].lower()
 | 
| -      return extension in ['.dll', '.exe']
 | 
| -    else:
 | 
| -      result = subprocess.check_output(['file', '-0', file_path])
 | 
| -      type_string = result[result.find('\0') + 1:]
 | 
| -      return bool(re.match(r'.*(ELF|Mach-O) (32|64)-bit\b.*',
 | 
| -                  type_string, re.DOTALL))
 | 
| -
 | 
| -
 | 
| -def SymbolizeFiles(symfiles, symbolizer):
 | 
| -  """Symbolizes each file in the given list of SymbolizableFiles
 | 
| -     and updates stack frames with symbolization results."""
 | 
| -
 | 
| -  if not symfiles:
 | 
| -    print 'Nothing to symbolize.'
 | 
| -    return
 | 
| -
 | 
| -  print 'Symbolizing...'
 | 
| -
 | 
| -  def _SubPrintf(message, *args):
 | 
| -    print ('  ' + message).format(*args)
 | 
| -
 | 
| -  for symfile in symfiles:
 | 
| -    unsymbolized_name = '<{}>'.format(
 | 
| -        symfile.path if symfile.path else 'unnamed')
 | 
| -
 | 
| -    problem = None
 | 
| -    if not os.path.isabs(symfile.symbolizable_path):
 | 
| -      problem = 'not a file'
 | 
| -    elif not os.path.isfile(symfile.symbolizable_path):
 | 
| -      problem = "file doesn't exist"
 | 
| -    elif not symbolizer.IsSymbolizableFile(symfile.symbolizable_path):
 | 
| -      problem = 'file is not symbolizable'
 | 
| -    if problem:
 | 
| -      _SubPrintf("Won't symbolize {} PCs for '{}': {}.",
 | 
| -                 len(symfile.frames_by_address),
 | 
| -                 symfile.symbolizable_path,
 | 
| -                 problem)
 | 
| -      for frames in symfile.frames_by_address.itervalues():
 | 
| -        for frame in frames:
 | 
| -          frame.name = unsymbolized_name
 | 
| -      continue
 | 
| -
 | 
| -    _SubPrintf('Symbolizing {} PCs from {}...',
 | 
| -               len(symfile.frames_by_address),
 | 
| -               symfile.symbolizable_path)
 | 
| -
 | 
| -    symbolizer.Symbolize(symfile, unsymbolized_name)
 | 
| -
 | 
| -
 | 
| -# Matches Android library paths, supports both K (/data/app-lib/<>/lib.so)
 | 
| -# as well as L+ (/data/app/<>/lib/<>/lib.so). Library name is available
 | 
| -# via 'name' group.
 | 
| -ANDROID_PATH_MATCHER = re.compile(
 | 
| -    r'^/data/(?:'
 | 
| -      r'app/[^/]+/lib/[^/]+/|'
 | 
| -      r'app-lib/[^/]+/|'
 | 
| -      r'data/[^/]+/incremental-install-files/lib/'
 | 
| -    r')(?P<name>.*\.so)')
 | 
| -
 | 
| -# Subpath of output path where unstripped libraries are stored.
 | 
| -ANDROID_UNSTRIPPED_SUBPATH = 'lib.unstripped'
 | 
| -
 | 
| -
 | 
| -def HaveFilesFromAndroid(symfiles):
 | 
| -  return any(ANDROID_PATH_MATCHER.match(f.path) for f in symfiles)
 | 
| -
 | 
| -
 | 
| -def RemapAndroidFiles(symfiles, output_path):
 | 
| -  for symfile in symfiles:
 | 
| -    match = ANDROID_PATH_MATCHER.match(symfile.path)
 | 
| -    if match:
 | 
| -      name = match.group('name')
 | 
| -      symfile.symbolizable_path = os.path.join(
 | 
| -          output_path, ANDROID_UNSTRIPPED_SUBPATH, name)
 | 
| -    else:
 | 
| -      # Clobber file path to trigger "not a file" problem in SymbolizeFiles().
 | 
| -      # Without this, files won't be symbolized with "file not found" problem,
 | 
| -      # which is not accurate.
 | 
| -      symfile.symbolizable_path = 'android://{}'.format(symfile.path)
 | 
| -
 | 
| -
 | 
| -def RemapMacFiles(symfiles, symbol_base_directory, version):
 | 
| -  suffix = ("Google Chrome Framework.dSYM/Contents/Resources/DWARF/"
 | 
| -            "Google Chrome Framework")
 | 
| -  symbol_sub_dir = os.path.join(symbol_base_directory, version)
 | 
| -  symbolizable_path = os.path.join(symbol_sub_dir, suffix)
 | 
| -
 | 
| -  for symfile in symfiles:
 | 
| -    if symfile.path.endswith("Google Chrome Framework"):
 | 
| -      symfile.symbolizable_path = symbolizable_path
 | 
| -
 | 
| -def RemapWinFiles(symfiles, symbol_base_directory, version, is64bit):
 | 
| -  folder = "win64" if is64bit else "win"
 | 
| -  symbol_sub_dir = os.path.join(symbol_base_directory,
 | 
| -                                "chrome-" + folder + "-" + version)
 | 
| -  for symfile in symfiles:
 | 
| -    image = os.path.join(symbol_sub_dir, os.path.basename(symfile.path))
 | 
| -    symbols = image + ".pdb"
 | 
| -    if os.path.isfile(image) and os.path.isfile(symbols):
 | 
| -      symfile.symbolizable_path = image
 | 
| -
 | 
| -def Symbolize(options, trace, symbolizer):
 | 
| -  symfiles = ResolveSymbolizableFiles(trace.processes)
 | 
| -
 | 
| -  # Android trace files don't have any indication they are from Android.
 | 
| -  # So we're checking for Android-specific paths.
 | 
| -  if HaveFilesFromAndroid(symfiles):
 | 
| -    if not options.output_directory:
 | 
| -      sys.exit('The trace file appears to be from Android. Please '
 | 
| -               'specify output directory to properly symbolize it.')
 | 
| -    RemapAndroidFiles(symfiles, os.path.abspath(options.output_directory))
 | 
| -
 | 
| -
 | 
| -  if not trace.is_chromium:
 | 
| -    if symbolizer.is_mac:
 | 
| -      RemapMacFiles(symfiles, options.symbol_base_directory, trace.version)
 | 
| -    if symbolizer.is_win:
 | 
| -      RemapWinFiles(symfiles, options.symbol_base_directory, trace.version,
 | 
| -                    trace.is_64bit)
 | 
| -
 | 
| -  SymbolizeFiles(symfiles, symbolizer)
 | 
| -
 | 
| -
 | 
| -def OpenTraceFile(file_path, mode):
 | 
| -  if file_path.endswith('.gz'):
 | 
| -    return gzip.open(file_path, mode + 'b')
 | 
| -  else:
 | 
| -    return open(file_path, mode + 't')
 | 
| -
 | 
| -
 | 
| -def FetchAndExtractSymbolsMac(symbol_base_directory, version):
 | 
| -  def GetLocalPath(base_dir, version):
 | 
| -    return os.path.join(base_dir, version + ".tar.bz2")
 | 
| -  def GetSymbolsPath(version):
 | 
| -    return "desktop-*/" + version + "/mac64/Google Chrome.dSYM.tar.bz2"
 | 
| -  def ExtractSymbolTarFile(symbol_sub_dir, symbol_tar_file):
 | 
| -    os.makedirs(symbol_sub_dir)
 | 
| -    with tarfile.open(os.path.expanduser(symbol_tar_file), "r:bz2") as tar:
 | 
| -      tar.extractall(symbol_sub_dir)
 | 
| -
 | 
| -  symbol_sub_dir = os.path.join(symbol_base_directory, version)
 | 
| -  if os.path.isdir(symbol_sub_dir):
 | 
| -    return True
 | 
| -
 | 
| -  bzip_path = GetLocalPath(symbol_base_directory, version)
 | 
| -  if not os.path.isfile(bzip_path):
 | 
| -
 | 
| -    _CLOUD_STORAGE_BUCKET = "chrome-unsigned"
 | 
| -    if not cloud_storage.Exists(_CLOUD_STORAGE_BUCKET, GetSymbolsPath(version)):
 | 
| -      print "Can't find symbols on GCS."
 | 
| -      return False
 | 
| -    print "Downloading symbols files from GCS, please wait."
 | 
| -    cloud_storage.Get(_CLOUD_STORAGE_BUCKET, GetSymbolsPath(version), bzip_path)
 | 
| -
 | 
| -  ExtractSymbolTarFile(symbol_sub_dir, bzip_path)
 | 
| -  return True
 | 
| -
 | 
| -
 | 
| -def FetchAndExtractSymbolsWin(symbol_base_directory, version, is64bit):
 | 
| -  def DownloadAndExtractZipFile(zip_path, source, destination):
 | 
| -    if not os.path.isfile(zip_path):
 | 
| -      _CLOUD_STORAGE_BUCKET = "chrome-unsigned"
 | 
| -      if not cloud_storage.Exists(_CLOUD_STORAGE_BUCKET, source):
 | 
| -        print "Can't find symbols on GCS."
 | 
| -        return False
 | 
| -      print "Downloading symbols files from GCS, please wait."
 | 
| -      cloud_storage.Get(_CLOUD_STORAGE_BUCKET, source, zip_path)
 | 
| -      if not os.path.isfile(zip_path):
 | 
| -        print "Can't download symbols on GCS."
 | 
| -        return False
 | 
| -    with zipfile.ZipFile(zip_path, "r") as zip:
 | 
| -      for member in zip.namelist():
 | 
| -         filename = os.path.basename(member)
 | 
| -         # Skip directories.
 | 
| -         if not filename:
 | 
| -           continue
 | 
| -         # Extract archived files.
 | 
| -         source = zip.open(member)
 | 
| -         target = file(os.path.join(symbol_sub_dir, filename), "wb")
 | 
| -         with source, target:
 | 
| -           shutil.copyfileobj(source, target)
 | 
| -
 | 
| -  folder = "win64" if is64bit else "win"
 | 
| -  gcs_folder = "desktop-*/" + version + "/" + folder + "-pgo/"
 | 
| -
 | 
| -  symbol_sub_dir = os.path.join(symbol_base_directory,
 | 
| -                                "chrome-" + folder + "-" + version)
 | 
| -  if os.path.isdir(symbol_sub_dir):
 | 
| -    return True
 | 
| -
 | 
| -  os.makedirs(symbol_sub_dir)
 | 
| -  DownloadAndExtractZipFile(
 | 
| -      os.path.join(symbol_base_directory,
 | 
| -                   "chrome-" + folder + "-" + version + "-syms.zip"),
 | 
| -      gcs_folder + "chrome-win32-syms.zip",
 | 
| -      symbol_sub_dir)
 | 
| -  DownloadAndExtractZipFile(
 | 
| -      os.path.join(symbol_base_directory,
 | 
| -                   "chrome-" + folder + "-" + version + ".zip"),
 | 
| -      gcs_folder + "chrome-" + folder + "-pgo.zip",
 | 
| -      symbol_sub_dir)
 | 
| -
 | 
| -  return True
 | 
| -
 | 
| -# Suffix used for backup files.
 | 
| -BACKUP_FILE_TAG = '.BACKUP'
 | 
| -
 | 
| -def main():
 | 
| -  parser = argparse.ArgumentParser()
 | 
| -  parser.add_argument(
 | 
| -      'file',
 | 
| -      help='Trace file to symbolize (.json or .json.gz)')
 | 
| -
 | 
| -  parser.add_argument(
 | 
| -      '--no-backup', dest='backup', default='true', action='store_false',
 | 
| -      help="Don't create {} files".format(BACKUP_FILE_TAG))
 | 
| -
 | 
| -  parser.add_argument(
 | 
| -      '--output-directory',
 | 
| -      help='The path to the build output directory, such as out/Debug.')
 | 
| -
 | 
| -  home_dir = os.path.expanduser('~')
 | 
| -  default_dir = os.path.join(home_dir, "symbols")
 | 
| -  parser.add_argument(
 | 
| -      '--symbol-base-directory',
 | 
| -      default=default_dir,
 | 
| -      help='Directory where symbols are downloaded and cached.')
 | 
| -
 | 
| -  symbolizer = Symbolizer()
 | 
| -  if symbolizer.symbolizer_path is None:
 | 
| -    sys.exit("Can't symbolize - no %s in PATH." % symbolizer.binary)
 | 
| -
 | 
| -  options = parser.parse_args()
 | 
| -
 | 
| -  trace_file_path = options.file
 | 
| -
 | 
| -  print 'Reading trace file...'
 | 
| -  with OpenTraceFile(trace_file_path, 'r') as trace_file:
 | 
| -    trace = Trace(json.load(trace_file))
 | 
| -
 | 
| -  # Perform some sanity checks.
 | 
| -  if trace.is_win and sys.platform != 'win32':
 | 
| -    print "Cannot symbolize a windows trace on this architecture!"
 | 
| -    return False
 | 
| -
 | 
| -  # If the trace is from Chromium, assume that symbols are already present.
 | 
| -  # Otherwise the trace is from Google Chrome. Assume that this is not a local
 | 
| -  # build of Google Chrome with symbols, and that we need to fetch symbols
 | 
| -  # from gcs.
 | 
| -  if not trace.is_chromium:
 | 
| -    has_symbols = False
 | 
| -    if symbolizer.is_mac:
 | 
| -      has_symbols = FetchAndExtractSymbolsMac(options.symbol_base_directory,
 | 
| -                                              trace.version)
 | 
| -    if symbolizer.is_win:
 | 
| -      has_symbols = FetchAndExtractSymbolsWin(options.symbol_base_directory,
 | 
| -                                              trace.version, trace.is_64bit)
 | 
| -    if not has_symbols:
 | 
| -      print 'Cannot fetch symbols from GCS'
 | 
| -      return False
 | 
| -
 | 
| -  Symbolize(options, trace, symbolizer)
 | 
| -
 | 
| -  if trace.modified:
 | 
| -    trace.ApplyModifications()
 | 
| -
 | 
| -    if options.backup:
 | 
| -      backup_file_path = trace_file_path + BACKUP_FILE_TAG
 | 
| -      print 'Backing up trace file to {}'.format(backup_file_path)
 | 
| -      os.rename(trace_file_path, backup_file_path)
 | 
| -
 | 
| -    print 'Updating the trace file...'
 | 
| -    with OpenTraceFile(trace_file_path, 'w') as trace_file:
 | 
| -      json.dump(trace.node, trace_file)
 | 
| -  else:
 | 
| -    print 'No modifications were made - not updating the trace file.'
 | 
| -
 | 
| -
 | 
| -if __name__ == '__main__':
 | 
| -  main()
 | 
| 
 |