| Index: tracing/bin/symbolize_trace
|
| diff --git a/tracing/bin/symbolize_trace b/tracing/bin/symbolize_trace
|
| index 7c6f5a4e37b04144fc5bf255bb5509857023654a..9f934c0402777e2d1199d7fcc996f5068fead2aa 100755
|
| --- a/tracing/bin/symbolize_trace
|
| +++ b/tracing/bin/symbolize_trace
|
| @@ -3,10 +3,217 @@
|
| # Use of this source code is governed by a BSD-style license that can be
|
| # found in the LICENSE file.
|
|
|
| +"""
|
| +This script processes trace files and symbolizes stack frames generated by
|
| +Chrome's native heap profiler.
|
| +
|
| +=== Overview ===
|
| +
|
| +Trace file is essentially a giant JSON array of dictionaries (events).
|
| +Events have some predefined keys (e.g. 'pid'), but otherwise are free to
|
| +have anything inside. Trace file contains events from all Chrome processes
|
| +that were sampled during tracing period.
|
| +
|
| +This script cares only about memory dump events generated with memory-infra
|
| +category enabled.
|
| +
|
| +When Chrome native heap profiling is enabled, some memory dump events
|
| +include the following extra information:
|
| +
|
| +* (Per allocator) Information about live allocations at the moment of the
|
| + memory dump (the information includes backtraces, types / categories,
|
| + sizes, and counts of allocations). There are several allocators in
|
| + Chrome: e.g. malloc, blink_gc, partition_alloc.
|
| +
|
| +* (Per process) Stack frame tree of all functions that called allocators
|
| + above.
|
| +
|
| +This script does the following:
|
| +
|
| +1. Parses the given trace file (loads JSON).
|
| +2. Finds memory dump events and parses stack frame tree for each process.
|
| +3. Finds stack frames that have PC addresses instead of function names.
|
| +4. Symbolizes PCs and modifies loaded JSON.
|
| +5. Writes modified JSON back to the file.
|
| +
|
| +The script supports trace files from the following platforms:
|
| + * Android (the script itself must be run on Linux)
|
| + * Linux
|
| + * macOS
|
| + * Windows
|
| +
|
| +Important note - the script doesn't check that it symbolizes same binaries
|
| +that were used at the time trace was taken. I.e. if you take a trace, change
|
| +and rebuild Chrome binaries, the script will blindly use the new binaries.
|
| +
|
| +=== Details ===
|
| +
|
| +There are two formats of heap profiler information: legacy and modern. The
|
| +main differences relevant to this script are:
|
| +
|
| +* In the modern format the stack frame tree, type name mapping, and string
|
| + mapping nodes are dumped incrementally. These nodes are dumped in each
|
| + memory dump event and carry updates that occurred since the last event.
|
| +
|
| + For example, let's say that when the first memory dump event is generated
|
| + we only know about a function foo() (called from main()) allocating objects
|
| + of type "int":
|
| +
|
| + {
|
| + "args": {
|
| + "dumps": {
|
| + "heaps_v2": {
|
| + "maps": {
|
| + "nodes": [
|
| + { "id": 1, "name_sid": 1 },
|
| + { "id": 2, "parent": 1, "name_sid": 3 },
|
| + ],
|
| + "types": [
|
| + { "id": 1, "name_sid": 2 },
|
| + ],
|
| + "strings": [
|
| + { "id": 1, "string": "main()" },
|
| + { "id": 2, "string": "int" },
|
| + { "id": 3, "string": "foo()" },
|
| + ]
|
| + },
|
| + "allocators": { ...live allocations per allocator... },
|
| + ...
|
| + },
|
| + ...
|
| + }
|
| + },
|
| + ...
|
| + }
|
| +
|
| + Here:
|
| + * 'nodes' node encodes stack frame tree
|
| + * 'types' node encodes type name mappings
|
| + * 'strings' node encodes string mapping (explained below)
|
| +
|
| + Then, by the time second memory dump even is generated, we learn about
|
| + bar() (called from main()), which also allocated "int" objects. Only the
|
| + new information is dumped, i.e. bar() stack frame:
|
| +
|
| + {
|
| + "args": {
|
| + "dumps": {
|
| + "heaps_v2": {
|
| + "maps": {
|
| + "nodes": [
|
| + { "id": 2, "parent": 1, "name_sid": 4 },
|
| + ],
|
| + "types": [],
|
| + "strings": [
|
| + { "id": 4, "string": "bar()" },
|
| + ]
|
| + },
|
| + "allocators": { ...live allocations per allocator... },
|
| + ...
|
| + },
|
| + ...
|
| + }
|
| + },
|
| + ...
|
| + }
|
| +
|
| + Note that 'types' node is empty, since there were no updates. All three
|
| + nodes ('nodes', types', and 'strings') can be empty if there were no updates
|
| + to them.
|
| +
|
| + For simplicity, when the script updates incremental nodes, it puts updated
|
| + content in the first node, and clears all others. I.e. the following stack
|
| + frame nodes:
|
| +
|
| + 'nodes': [
|
| + { "id": 1, "name_sid": 1 },
|
| + { "id": 2, "parent": 1, "name_sid": 2 },
|
| + ]
|
| + 'nodes': [
|
| + { "id": 3, "parent": 2, "name_sid": 3 },
|
| + ]
|
| + 'nodes': [
|
| + { "id": 4, "parent": 3, "name_sid": 4 },
|
| + { "id": 5, "parent": 1, "name_sid": 5 },
|
| + ]
|
| +
|
| + After symbolization are written as:
|
| +
|
| + 'nodes': [
|
| + { "id": 1, "name_sid": 1 },
|
| + { "id": 2, "parent": 1, "name_sid": 2 },
|
| + { "id": 3, "parent": 2, "name_sid": 3 },
|
| + { "id": 4, "parent": 3, "name_sid": 4 },
|
| + { "id": 5, "parent": 1, "name_sid": 5 },
|
| + ]
|
| + 'nodes': []
|
| + 'nodes': []
|
| +
|
| +
|
| +* In contrast, in the legacy format stack frame tree and type mappings are
|
| + dumped separately from memory dump events, once per process.
|
| +
|
| + Here is how trace file with two memory dump events looks like in the
|
| + legacy format:
|
| +
|
| + {
|
| + "args": {
|
| + "dumps": {
|
| + "heaps": { ...live allocations per allocator... },
|
| + ...
|
| + }
|
| + },
|
| + ...
|
| + }
|
| +
|
| + {
|
| + "args": {
|
| + "dumps": {
|
| + "heaps": { ...live allocations per allocator... },
|
| + ...
|
| + }
|
| + },
|
| + ...
|
| + }
|
| +
|
| + {
|
| + "args": {
|
| + "typeNames": {
|
| + 1: "int",
|
| + }
|
| + },
|
| + "cat": "__metadata",
|
| + "name": "typeNames",
|
| + ...
|
| + }
|
| +
|
| + {
|
| + "args": {
|
| + "stackFrames": {
|
| + 1: { "name": "main" },
|
| + 2: { "name": "foo", "parent": 1 },
|
| + 3: { "name": "bar", "parent": 1 },
|
| + }
|
| + },
|
| + "cat": "__metadata",
|
| + "name": "stackFrames",
|
| + ...
|
| + }
|
| +
|
| +
|
| +* Another change in the modern format is 'strings' node, which was added
|
| + to deduplicate stack frame names (mainly for trace file size reduction).
|
| + For consistency 'types' node also uses string mappings.
|
| +
|
| +
|
| +See crbug.com/708930 for more information about the modern format.
|
| +"""
|
| +
|
| import argparse
|
| import bisect
|
| import collections
|
| import gzip
|
| +import itertools
|
| import json
|
| import os
|
| import re
|
| @@ -26,166 +233,69 @@ import symbolize_trace_atos_regex
|
| import symbolize_trace_macho_reader
|
|
|
|
|
| -# Relevant trace event phases from Chromium's
|
| -# src/base/trace_event/common/trace_event_common.h.
|
| -TRACE_EVENT_PHASE_METADATA = 'M'
|
| -TRACE_EVENT_PHASE_MEMORY_DUMP = 'v'
|
| +class NodeWrapper(object):
|
| + """Wraps an event data node(s).
|
|
|
| + A node is a reference into a trace event JSON. Wrappers parse nodes to
|
| + provide convenient APIs and update nodes when asked to propagate changes
|
| + back (see ApplyModifications() below).
|
|
|
| -# Matches Android library paths, supports both K (/data/app-lib/<>/lib.so)
|
| -# as well as L+ (/data/app/<>/lib/<>/lib.so). Library name is available
|
| -# via 'name' group.
|
| -ANDROID_PATH_MATCHER = re.compile(
|
| - r'^/data/(?:'
|
| - r'app/[^/]+/lib/[^/]+/|'
|
| - r'app-lib/[^/]+/|'
|
| - r'data/[^/]+/incremental-install-files/lib/'
|
| - r')(?P<name>.*\.so)')
|
| + Here is an example of legacy metadata event that contains stack frame tree:
|
|
|
| -# Subpath of output path where unstripped libraries are stored.
|
| -ANDROID_UNSTRIPPED_SUBPATH = 'lib.unstripped'
|
| + {
|
| + "args": {
|
| + "stackFrames": { ... }
|
| + },
|
| + "cat": "__metadata",
|
| + "name": "stackFrames",
|
| + "ph": "M",
|
| + ...
|
| + }
|
|
|
| + When this event is encountered, a reference to the "stackFrames" dictionary
|
| + is obtained and passed down to a specific wrapped class, which knows how to
|
| + parse / update the dictionary.
|
|
|
| -def FindInSystemPath(binary_name):
|
| - paths = os.environ['PATH'].split(os.pathsep)
|
| - for path in paths:
|
| - binary_path = os.path.join(path, binary_name)
|
| - if os.path.isfile(binary_path):
|
| - return binary_path
|
| - return None
|
| + There are two parsing patterns depending on whether node is serialized
|
| + incrementally:
|
|
|
| + * If node is not incremental, then parsing is done by __init__(),
|
| + see MemoryMap for an example.
|
|
|
| -class Symbolizer(object):
|
| - # Encapsulates platform-specific symbolization logic.
|
| - def __init__(self):
|
| - self.is_mac = sys.platform == 'darwin'
|
| - self.is_win = sys.platform == 'win32'
|
| - if self.is_mac:
|
| - self.binary = 'atos'
|
| - self._matcher = symbolize_trace_atos_regex.AtosRegexMatcher()
|
| - elif self.is_win:
|
| - self.binary = 'addr2line-pdb.exe'
|
| - else:
|
| - self.binary = 'addr2line'
|
| - self.symbolizer_path = FindInSystemPath(self.binary)
|
| -
|
| - def _SymbolizeLinuxAndAndroid(self, symfile, unsymbolized_name):
|
| - def _SymbolizerCallback(sym_info, frames):
|
| - # Unwind inline chain to the top.
|
| - while sym_info.inlined_by:
|
| - sym_info = sym_info.inlined_by
|
| -
|
| - symbolized_name = sym_info.name if sym_info.name else unsymbolized_name
|
| - for frame in frames:
|
| - frame.name = symbolized_name
|
| + * If node is incremental, then __init__() does nothing, and instead
|
| + ParseNext() method is called when next node (from a next event) is
|
| + encountered.
|
|
|
| - symbolizer = elf_symbolizer.ELFSymbolizer(symfile.symbolizable_path,
|
| - self.symbolizer_path,
|
| - _SymbolizerCallback,
|
| - inlines=True)
|
| + Some wrappers can also modify nodes they parsed. In such cases they have
|
| + additional APIs:
|
|
|
| - for address, frames in symfile.frames_by_address.iteritems():
|
| - # SymbolizeAsync() asserts that the type of address is int. We operate
|
| - # on longs (since they are raw pointers possibly from 64-bit processes).
|
| - # It's OK to cast here because we're passing relative PC, which should
|
| - # always fit into int.
|
| - symbolizer.SymbolizeAsync(int(address), frames)
|
| -
|
| - symbolizer.Join()
|
| + * 'modified' flag, which indicates whether the wrapper was changed.
|
|
|
| + * 'ApplyModifications' method, which propagates changes made to the wrapper
|
| + back to nodes. Successful invocation of ApplyModifications() resets
|
| + 'modified' flag.
|
|
|
| - def _SymbolizeMac(self, symfile):
|
| - chars_max = int(subprocess.check_output("getconf ARG_MAX", shell=True))
|
| -
|
| - # 16 for the address, 2 for "0x", 1 for the space
|
| - chars_per_address = 19
|
| -
|
| - load_address = (symbolize_trace_macho_reader.
|
| - ReadMachOTextLoadAddress(symfile.symbolizable_path))
|
| - assert load_address is not None
|
| -
|
| - cmd_base = [self.symbolizer_path, '-arch', 'x86_64', '-l',
|
| - '0x%x' % load_address, '-o',
|
| - symfile.symbolizable_path]
|
| - chars_for_other_arguments = len(' '.join(cmd_base)) + 1
|
| -
|
| - # The maximum number of inputs that can be processed at once is limited by
|
| - # ARG_MAX. This currently evalutes to ~13000 on macOS.
|
| - max_inputs = (chars_max - chars_for_other_arguments) / chars_per_address
|
| -
|
| - all_keys = symfile.frames_by_address.keys()
|
| - processed_keys_count = 0
|
| - while len(all_keys):
|
| - input_count = min(len(all_keys), max_inputs)
|
| - keys_to_process = all_keys[0:input_count]
|
| -
|
| - cmd = list(cmd_base)
|
| - cmd.extend([hex(int(x) + load_address)
|
| - for x in keys_to_process])
|
| - output_array = subprocess.check_output(cmd).split('\n')
|
| - for i in range(len(keys_to_process)):
|
| - for frame in (symfile.frames_by_address.values()
|
| - [i + processed_keys_count]):
|
| - frame.name = self._matcher.Match(output_array[i])
|
| - processed_keys_count += len(keys_to_process)
|
| - all_keys = all_keys[input_count:]
|
| -
|
| -
|
| - def _SymbolizeWin(self, symfile):
|
| - """Invoke symbolizer binary on windows and write all input in one go.
|
| -
|
| - Unlike linux, on windows, symbolization talks through a shared system
|
| - service that handles communication with the NT symbol servers. This
|
| - creates an explicit serialization (and therefor lock contention) of
|
| - any process using the symbol API for files do not have a local PDB.
|
| -
|
| - Thus, even though the windows symbolizer binary can be make command line
|
| - compatible with the POSIX addr2line interface, paralellizing the
|
| - symbolization does not yield the same performance effects. Running
|
| - just one symbolizer seems good enough for now. Can optimize later
|
| - if this becomes a bottleneck.
|
| - """
|
| - cmd = [self.symbolizer_path, '--functions', '--demangle', '--exe',
|
| - symfile.symbolizable_path]
|
| -
|
| - proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stdin=subprocess.PIPE,
|
| - stderr=sys.stderr)
|
| - addrs = ["%x" % relative_pc for relative_pc in
|
| - symfile.frames_by_address.keys()]
|
| - (stdout_data, stderr_data) = proc.communicate('\n'.join(addrs))
|
| - stdout_data = stdout_data.split('\n')
|
| -
|
| - # This is known to be in the same order as stderr_data.
|
| - for i, addr in enumerate(addrs):
|
| - for frame in symfile.frames_by_address[int(addr, 16)]:
|
| - # Output of addr2line with --functions is always 2 outputs per
|
| - # symbol, function name followed by source line number. Only grab
|
| - # the function name as line info is not always available.
|
| - frame.name = stdout_data[i * 2]
|
| -
|
| -
|
| - def Symbolize(self, symfile, unsymbolized_name):
|
| - if self.is_mac:
|
| - self._SymbolizeMac(symfile)
|
| - if self.is_win:
|
| - self._SymbolizeWin(symfile)
|
| - else:
|
| - self._SymbolizeLinuxAndAndroid(symfile, unsymbolized_name)
|
| + """
|
| + pass
|
|
|
|
|
| - def IsSymbolizableFile(self, file_path):
|
| - if self.is_win:
|
| - extension = os.path.splitext(file_path)[1].lower()
|
| - return extension in ['.dll', '.exe']
|
| - else:
|
| - result = subprocess.check_output(['file', '-0', file_path])
|
| - type_string = result[result.find('\0') + 1:]
|
| - return bool(re.match(r'.*(ELF|Mach-O) (32|64)-bit\b.*',
|
| - type_string, re.DOTALL))
|
| +class MemoryMap(NodeWrapper):
|
| + """Wraps 'process_mmaps' node.
|
|
|
| + 'process_mmaps' node contains information about file mappings.
|
|
|
| -class ProcessMemoryMaps(object):
|
| - """Represents 'process_mmaps' trace file entry."""
|
| + "process_mmaps": {
|
| + "vm_regions": [
|
| + {
|
| + "mf": "<file_path>",
|
| + "sa": "<start_address>",
|
| + "sz": "<size>",
|
| + ...
|
| + },
|
| + ...
|
| + ]
|
| + }
|
| + """
|
|
|
| class Region(object):
|
| def __init__(self, start_address, size, file_path):
|
| @@ -221,15 +331,13 @@ class ProcessMemoryMaps(object):
|
| return 'Region(0x{:X} - 0x{:X}, {})'.format(
|
| self.start_address, self.end_address, self.file_path)
|
|
|
| - def __init__(self, process_mmaps):
|
| - """Parses 'process_mmaps' dictionary."""
|
| -
|
| + def __init__(self, process_mmaps_node):
|
| regions = []
|
| - for region_value in process_mmaps['vm_regions']:
|
| + for region_node in process_mmaps_node['vm_regions']:
|
| regions.append(self.Region(
|
| - long(region_value['sa'], 16),
|
| - long(region_value['sz'], 16),
|
| - region_value['mf']))
|
| + long(region_node['sa'], 16),
|
| + long(region_node['sz'], 16),
|
| + region_node['mf']))
|
| regions.sort()
|
|
|
| # Copy regions without duplicates and check for overlaps.
|
| @@ -259,104 +367,542 @@ class ProcessMemoryMaps(object):
|
| return None
|
|
|
|
|
| -class StackFrames(object):
|
| - """Represents 'stackFrames' trace file entry."""
|
| +class UnsupportedHeapDumpVersionError(Exception):
|
| + """Helper exception class to signal unsupported heap dump version."""
|
| +
|
| + def __init__(self, version):
|
| + message = 'Unsupported heap dump version: {}'.format(version)
|
| + super(UnsupportedHeapDumpVersionError, self).__init__(message)
|
| +
|
| +
|
| +class StringMap(NodeWrapper):
|
| + """Wraps all 'strings' nodes for a process.
|
| +
|
| + 'strings' node contains incremental mappings between integer ids and strings.
|
| +
|
| + "strings": [
|
| + {
|
| + "id": <string_id>,
|
| + "string": <string>
|
| + },
|
| + ...
|
| + ]
|
| + """
|
| +
|
| + def __init__(self):
|
| + self._modified = False
|
| + self._strings_nodes = []
|
| + self._string_by_id = {}
|
| + self._id_by_string = {}
|
| + self._max_string_id = 0
|
| +
|
| + @property
|
| + def modified(self):
|
| + """Returns True if the wrapper was modified (see NodeWrapper)."""
|
| + return self._modified
|
| +
|
| + @property
|
| + def string_by_id(self):
|
| + return self._string_by_id
|
| +
|
| + def ParseNext(self, heap_dump_version, strings_node):
|
| + """Parses and interns next node (see NodeWrapper)."""
|
| +
|
| + if heap_dump_version != Trace.HEAP_DUMP_VERSION_1:
|
| + raise UnsupportedHeapDumpVersionError(heap_dump_version)
|
|
|
| - class PCFrame(object):
|
| - def __init__(self, pc, frame):
|
| + self._strings_nodes.append(strings_node)
|
| + for string_node in strings_node:
|
| + self._Insert(string_node['id'], string_node['string'])
|
| +
|
| + def Clear(self):
|
| + """Clears all string mappings."""
|
| + if self._string_by_id:
|
| + self._modified = True
|
| + # ID #0 means 'no entry' and must always be present. Carry it over.
|
| + null_string = self._string_by_id[0]
|
| + self._string_by_id = {}
|
| + self._id_by_string = {}
|
| + self._Insert(0, null_string)
|
| + self._max_string_id = 0
|
| +
|
| + def AddString(self, string):
|
| + """Adds a string (if it doesn't exist) and returns its integer id."""
|
| + string_id = self._id_by_string.get(string)
|
| + if string_id is None:
|
| + string_id = self._max_string_id + 1
|
| + self._Insert(string_id, string)
|
| + self._modified = True
|
| + return string_id
|
| +
|
| + def ApplyModifications(self):
|
| + """Propagates modifications back to nodes (see NodeWrapper)."""
|
| + if not self.modified:
|
| + return
|
| +
|
| + assert self._strings_nodes, 'no nodes'
|
| +
|
| + # Serialize into the first node, and clear all others.
|
| +
|
| + for strings_node in self._strings_nodes:
|
| + del strings_node[:]
|
| + strings_node = self._strings_nodes[0]
|
| + for string_id, string in self._string_by_id.iteritems():
|
| + strings_node.append({'id': string_id, 'string': string})
|
| +
|
| + self._modified = False
|
| +
|
| + def _Insert(self, string_id, string):
|
| + self._id_by_string[string] = string_id
|
| + self._string_by_id[string_id] = string
|
| + self._max_string_id = max(self._max_string_id, string_id)
|
| +
|
| +
|
| +class TypeNameMap(NodeWrapper):
|
| + """Wraps all 'types' nodes for a process.
|
| +
|
| + 'types' nodes encode mappings between integer type ids and integer
|
| + string ids (from 'strings' nodes).
|
| +
|
| + "types": [
|
| + {
|
| + "id": <type_id>,
|
| + "name_sid": <name_string_id>
|
| + }
|
| + ...
|
| + ]
|
| +
|
| + For simplicity string ids are translated into strings during parsing,
|
| + and then translated back to ids in ApplyModifications().
|
| + """
|
| + def __init__(self):
|
| + self._modified = False
|
| + self._type_name_nodes = []
|
| + self._name_by_id = {}
|
| + self._id_by_name = {}
|
| + self._max_type_id = 0
|
| +
|
| + @property
|
| + def modified(self):
|
| + """Returns True if the wrapper was modified (see NodeWrapper)."""
|
| + return self._modified
|
| +
|
| + @property
|
| + def name_by_id(self):
|
| + """Returns {id -> name} dict (must not be changed directly)."""
|
| + return self._name_by_id
|
| +
|
| + def ParseNext(self, heap_dump_version, type_name_node, string_map):
|
| + """Parses and interns next node (see NodeWrapper).
|
| +
|
| + |string_map| - A StringMap object to use to translate string ids
|
| + to strings.
|
| + """
|
| + if heap_dump_version != Trace.HEAP_DUMP_VERSION_1:
|
| + raise UnsupportedHeapDumpVersionError(heap_dump_version)
|
| +
|
| + self._type_name_nodes.append(type_name_node)
|
| + for type_node in type_name_node:
|
| + self._Insert(type_node['id'],
|
| + string_map.string_by_id[type_node['name_sid']])
|
| +
|
| + def AddType(self, type_name):
|
| + """Adds a type name (if it doesn't exist) and returns its id."""
|
| + type_id = self._id_by_name.get(type_name)
|
| + if type_id is None:
|
| + type_id = self._max_type_id + 1
|
| + self._Insert(type_id, type_name)
|
| + self._modified = True
|
| + return type_id
|
| +
|
| + def ApplyModifications(self, string_map, force=False):
|
| + """Propagates modifications back to nodes.
|
| +
|
| + |string_map| - A StringMap object to use to translate strings to ids.
|
| + |force| - Whether to propagate changes regardless of 'modified' flag.
|
| + """
|
| + if not self.modified and not force:
|
| + return
|
| +
|
| + assert self._type_name_nodes, 'no nodes'
|
| +
|
| + # Serialize into the first node, and clear all others.
|
| +
|
| + for types_node in self._type_name_nodes:
|
| + del types_node[:]
|
| + types_node = self._type_name_nodes[0]
|
| + for type_id, type_name in self._name_by_id.iteritems():
|
| + types_node.append({
|
| + 'id': type_id,
|
| + 'name_sid': string_map.AddString(type_name)})
|
| +
|
| + self._modified = False
|
| +
|
| + def _Insert(self, type_id, type_name):
|
| + self._id_by_name[type_name] = type_id
|
| + self._name_by_id[type_id] = type_name
|
| + self._max_type_id = max(self._max_type_id, type_id)
|
| +
|
| +
|
| +class StackFrameMap(NodeWrapper):
|
| + """ Wraps stack frame tree nodes for a process.
|
| +
|
| + For the legacy format this wrapper expects a single 'stackFrames' node
|
| + (which comes from metadata event):
|
| +
|
| + "stackFrames": {
|
| + "<frame_id>": {
|
| + "name": "<frame_name>"
|
| + "parent": "<parent_frame_id>"
|
| + },
|
| + ...
|
| + }
|
| +
|
| + For the modern format this wrapper expects several 'nodes' nodes:
|
| +
|
| + "nodes": [
|
| + {
|
| + "id": <frame_id>,
|
| + "parent": <parent_frame_id>,
|
| + "name_sid": <name_string_id>
|
| + },
|
| + ...
|
| + ]
|
| +
|
| + In both formats frame name is a string. Native heap profiler generates
|
| + specially formatted frame names (e.g. "pc:10eb78dba") for function
|
| + addresses (PCs). Inner Frame class below parses name and extracts PC,
|
| + if it's there.
|
| + """
|
| + class Frame(object):
|
| + def __init__(self, frame_id, name, parent_frame_id):
|
| self._modified = False
|
| - self._pc = pc
|
| - self._frame = frame
|
| + self._id = frame_id
|
| + self._name = name
|
| + self._pc = self._ParsePC(name)
|
| + self._parent_id = parent_frame_id
|
| + self._ext = None
|
|
|
| @property
|
| def modified(self):
|
| + """Returns True if the frame was modified.
|
| +
|
| + For example changing frame's name sets this flag (since the change
|
| + needs to be propagated back to nodes).
|
| + """
|
| return self._modified
|
|
|
| @property
|
| + def id(self):
|
| + """Frame id (integer)."""
|
| + return self._id
|
| +
|
| + @property
|
| def pc(self):
|
| + """Parsed (integer) PC of the frame, or None."""
|
| return self._pc
|
|
|
| @property
|
| def name(self):
|
| - return self._frame['name']
|
| + """Name of the frame (see above)."""
|
| + return self._name
|
|
|
| @name.setter
|
| def name(self, value):
|
| + """Changes the name. Doesn't affect value of |pc|."""
|
| self._modified = True
|
| - self._frame['name'] = value
|
| + self._name = value
|
|
|
| - def __init__(self, stack_frames):
|
| - """Constructs object using 'stackFrames' dictionary."""
|
| - self._pc_frames = []
|
| - for frame in stack_frames.itervalues():
|
| - pc_frame = self._ParsePCFrame(frame)
|
| - if pc_frame:
|
| - self._pc_frames.append(pc_frame)
|
| + @property
|
| + def parent_id(self):
|
| + """Parent frame id (integer)."""
|
| + return self._parent_id
|
|
|
| - @property
|
| - def pc_frames(self):
|
| - return self._pc_frames
|
| + _PC_TAG = 'pc:'
|
| +
|
| + def _ParsePC(self, name):
|
| + if not name.startswith(self._PC_TAG):
|
| + return None
|
| + return long(name[len(self._PC_TAG):], 16)
|
| +
|
| + def _ClearModified(self):
|
| + self._modified = False
|
| +
|
| + def __init__(self):
|
| + self._modified = False
|
| + self._heap_dump_version = None
|
| + self._stack_frames_nodes = []
|
| + self._frame_by_id = {}
|
|
|
| @property
|
| def modified(self):
|
| - return any(f.modified for f in self._pc_frames)
|
| + """Returns True if the wrapper or any of its frames were modified."""
|
| + return (self._modified or
|
| + any(f.modified for f in self._frame_by_id.itervalues()))
|
|
|
| - _PC_TAG = 'pc:'
|
| + @property
|
| + def frame_by_id(self):
|
| + """Returns {id -> frame} dict (must not be modified directly)."""
|
| + return self._frame_by_id
|
| +
|
| + def ParseNext(self, heap_dump_version, stack_frames_node, string_map):
|
| + """Parses the next stack frames node (see NodeWrapper).
|
| +
|
| + For the modern format |string_map| is used to translate string ids
|
| + to strings.
|
| + """
|
| +
|
| + frame_by_id = {}
|
| + if heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY:
|
| + if self._stack_frames_nodes:
|
| + raise Exception('Legacy stack frames node is expected only once.')
|
| + for frame_id, frame_node in stack_frames_node.iteritems():
|
| + frame = self.Frame(frame_id,
|
| + frame_node['name'],
|
| + frame_node.get('parent'))
|
| + frame_by_id[frame.id] = frame
|
| + else:
|
| + if heap_dump_version != Trace.HEAP_DUMP_VERSION_1:
|
| + raise UnsupportedHeapDumpVersionError(heap_dump_version)
|
| + for frame_node in stack_frames_node:
|
| + frame = self.Frame(frame_node['id'],
|
| + string_map.string_by_id[frame_node['name_sid']],
|
| + frame_node.get('parent'))
|
| + frame_by_id[frame.id] = frame
|
| +
|
| + self._heap_dump_version = heap_dump_version
|
| + self._stack_frames_nodes.append(stack_frames_node)
|
|
|
| - @classmethod
|
| - def _ParsePCFrame(self, frame):
|
| - name = frame['name']
|
| - if not name.startswith(self._PC_TAG):
|
| - return None
|
| - pc = long(name[len(self._PC_TAG):], 16)
|
| - return self.PCFrame(pc, frame)
|
| + self._frame_by_id = frame_by_id
|
| +
|
| + def ApplyModifications(self, string_map, force=False):
|
| + """Applies modifications back to nodes (see NodeWrapper)."""
|
| +
|
| + if not self.modified and not force:
|
| + return
|
| +
|
| + assert self._stack_frames_nodes, 'no nodes'
|
| + if self._heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY:
|
| + assert string_map is None, \
|
| + 'string_map should not be used with the legacy format'
|
| +
|
| + # Serialize frames into the first node, clear all others.
|
| +
|
| + for frames_node in self._stack_frames_nodes:
|
| + if self._heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY:
|
| + frames_node.clear()
|
| + else:
|
| + del frames_node[:]
|
|
|
| + frames_node = self._stack_frames_nodes[0]
|
| + for frame in self._frame_by_id.itervalues():
|
| + if self._heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY:
|
| + frame_node = {'name': frame.name}
|
| + frames_node[frame.id] = frame_node
|
| + else:
|
| + frame_node = {
|
| + 'id': frame.id,
|
| + 'name_sid': string_map.AddString(frame.name)
|
| + }
|
| + frames_node.append(frame_node)
|
| + if frame.parent_id is not None:
|
| + frame_node['parent'] = frame.parent_id
|
| + frame._ClearModified()
|
|
|
| -class Process(object):
|
| - """Holds various bits of information about a process in a trace file."""
|
| + self._modified = False
|
|
|
| - def __init__(self, pid):
|
| - self.pid = pid
|
| - self.name = None
|
| - self.mmaps = None
|
| - self.stack_frames = None
|
|
|
| +class Trace(NodeWrapper):
|
| + """Wrapper for the root trace node (i.e. the trace JSON itself).
|
|
|
| -def CollectProcesses(trace):
|
| - """Parses trace dictionary and returns pid->Process map of all processes
|
| - suitable for symbolization (which have both mmaps and stack_frames).
|
| + This wrapper parses select nodes from memory-infra events and groups
|
| + parsed data per-process (see inner Process class below).
|
| """
|
|
|
| - process_map = {}
|
| + # Indicates legacy heap dump format.
|
| + HEAP_DUMP_VERSION_LEGACY = 'Legacy'
|
|
|
| - # Android traces produced via 'chrome://inspect/?tracing#devices' are
|
| - # just list of events.
|
| - events = trace if isinstance(trace, list) else trace['traceEvents']
|
| - for event in events:
|
| - name = event.get('name')
|
| - if not name:
|
| - continue
|
| + # Indicates variation of a modern heap dump format.
|
| + HEAP_DUMP_VERSION_1 = 1
|
| +
|
| + class Process(object):
|
| + """Collection of per-process data and wrappers."""
|
| +
|
| + def __init__(self, pid):
|
| + self._pid = pid
|
| + self._name = None
|
| + self._memory_map = None
|
| + self._stack_frame_map = StackFrameMap()
|
| + self._type_name_map = TypeNameMap()
|
| + self._string_map = StringMap()
|
| + self._heap_dump_version = None
|
| +
|
| + @property
|
| + def modified(self):
|
| + return self._stack_frame_map.modified or self._type_name_map.modified
|
|
|
| - pid = event['pid']
|
| - process = process_map.get(pid)
|
| - if process is None:
|
| - process = Process(pid)
|
| - process_map[pid] = process
|
| + @property
|
| + def pid(self):
|
| + return self._pid
|
|
|
| - phase = event['ph']
|
| - if phase == TRACE_EVENT_PHASE_METADATA:
|
| - if name == 'process_name':
|
| - process.name = event['args']['name']
|
| - elif name == 'stackFrames':
|
| - process.stack_frames = StackFrames(event['args']['stackFrames'])
|
| - elif phase == TRACE_EVENT_PHASE_MEMORY_DUMP:
|
| - process_mmaps = event['args']['dumps'].get('process_mmaps')
|
| - if process_mmaps:
|
| - # TODO(dskiba): this parses all process_mmaps, but retains only the
|
| - # last one. We need to parse only once (lazy parsing?).
|
| - process.mmaps = ProcessMemoryMaps(process_mmaps)
|
| + @property
|
| + def name(self):
|
| + return self._name
|
|
|
| - return [p for p in process_map.itervalues() if p.mmaps and p.stack_frames]
|
| + @property
|
| + def unique_name(self):
|
| + """Returns string that includes both process name and its pid."""
|
| + name = self._name if self._name else 'UnnamedProcess'
|
| + return '{}({})'.format(name, self._pid)
|
| +
|
| + @property
|
| + def memory_map(self):
|
| + return self._memory_map
|
| +
|
| + @property
|
| + def stack_frame_map(self):
|
| + return self._stack_frame_map
|
| +
|
| + @property
|
| + def type_name_map(self):
|
| + return self._type_name_map
|
| +
|
| + def ApplyModifications(self):
|
| + """Calls ApplyModifications() on contained wrappers."""
|
| + if self._heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY:
|
| + self._stack_frame_map.ApplyModifications(None)
|
| + else:
|
| + if self._stack_frame_map.modified or self._type_name_map.modified:
|
| + self._string_map.Clear()
|
| + self._stack_frame_map.ApplyModifications(self._string_map, force=True)
|
| + self._type_name_map.ApplyModifications(self._string_map, force=True)
|
| + self._string_map.ApplyModifications()
|
| +
|
| + def __init__(self, trace_node):
|
| + self._trace_node = trace_node
|
| + self._processes = []
|
| + self._heap_dump_version = None
|
| +
|
| + # Misc per-process information needed only during parsing.
|
| + class ProcessExt(object):
|
| + def __init__(self, pid):
|
| + self.process = Trace.Process(pid)
|
| + self.mapped_entry_names = set()
|
| + self.process_mmaps_node = None
|
| + self.seen_strings_node = False
|
| +
|
| + process_ext_by_pid = {}
|
| +
|
| + # Android traces produced via 'chrome://inspect/?tracing#devices' are
|
| + # just list of events.
|
| + events = trace_node if isinstance(trace_node, list) \
|
| + else trace_node['traceEvents']
|
| + for event in events:
|
| + name = event.get('name')
|
| + if not name:
|
| + continue
|
| +
|
| + pid = event['pid']
|
| + process_ext = process_ext_by_pid.get(pid)
|
| + if process_ext is None:
|
| + process_ext = ProcessExt(pid)
|
| + process_ext_by_pid[pid] = process_ext
|
| + process = process_ext.process
|
| +
|
| + phase = event['ph']
|
| + if phase == self._EVENT_PHASE_METADATA:
|
| + if name == 'process_name':
|
| + process._name = event['args']['name']
|
| + elif name == 'stackFrames':
|
| + process._stack_frame_map.ParseNext(
|
| + self._UseHeapDumpVersion(self.HEAP_DUMP_VERSION_LEGACY),
|
| + event['args']['stackFrames'],
|
| + process._string_map)
|
| + elif phase == self._EVENT_PHASE_MEMORY_DUMP:
|
| + dumps = event['args']['dumps']
|
| + process_mmaps = dumps.get('process_mmaps')
|
| + if process_mmaps:
|
| + # We want the most recent memory map, so parsing happens later
|
| + # once we finished reading all events.
|
| + process_ext.process_mmaps_node = process_mmaps
|
| + heaps = dumps.get('heaps_v2')
|
| + if heaps:
|
| + version = self._UseHeapDumpVersion(heaps['version'])
|
| + maps = heaps.get('maps')
|
| + if maps:
|
| + process_ext.mapped_entry_names.update(maps.iterkeys())
|
| + types = maps.get('types')
|
| + stack_frames = maps.get('nodes')
|
| + strings = maps.get('strings')
|
| + if (strings is None and (types or stack_frames)
|
| + and not process_ext.seen_strings_node):
|
| + # ApplyModifications() for TypeNameMap and StackFrameMap puts
|
| + # everything into the first node and depends on StringMap. So
|
| + # we need to make sure that 'strings' node is there if any of
|
| + # other two nodes present.
|
| + strings = []
|
| + maps['strings'] = strings
|
| + if strings is not None:
|
| + process_ext.seen_strings_node = True
|
| + process._string_map.ParseNext(version, strings)
|
| + if types:
|
| + process._type_name_map.ParseNext(
|
| + version, types, process._string_map)
|
| + if stack_frames:
|
| + process._stack_frame_map.ParseNext(
|
| + version, stack_frames, process._string_map)
|
| +
|
| + self._processes = []
|
| + for pe in process_ext_by_pid.itervalues():
|
| + pe.process._heap_dump_version = self._heap_dump_version
|
| + if pe.process_mmaps_node:
|
| + # Now parse the most recent memory map.
|
| + pe.process._memory_map = MemoryMap(pe.process_mmaps_node)
|
| + self._processes.append(pe.process)
|
| +
|
| + @property
|
| + def node(self):
|
| + """Root node (that was passed to the __init__)."""
|
| + return self._trace_node
|
| +
|
| + @property
|
| + def modified(self):
|
| + """Returns True if trace file needs to be updated.
|
| +
|
| + Before writing trace JSON back to a file ApplyModifications() needs
|
| + to be called.
|
| + """
|
| + return any(p.modified for p in self._processes)
|
| +
|
| + @property
|
| + def processes(self):
|
| + return self._processes
|
| +
|
| + @property
|
| + def heap_dump_version(self):
|
| + return self._heap_dump_version
|
| +
|
| + def ApplyModifications(self):
|
| + """Propagates modifications back to the trace JSON."""
|
| + for process in self._processes:
|
| + process.ApplyModifications()
|
| + assert not self.modified, 'still modified'
|
| +
|
| + # Relevant trace event phases from Chromium's
|
| + # src/base/trace_event/common/trace_event_common.h.
|
| + _EVENT_PHASE_METADATA = 'M'
|
| + _EVENT_PHASE_MEMORY_DUMP = 'v'
|
| +
|
| + def _UseHeapDumpVersion(self, version):
|
| + if self._heap_dump_version is None:
|
| + self._heap_dump_version = version
|
| + return version
|
| + elif self._heap_dump_version != version:
|
| + raise Exception(
|
| + ("Inconsistent trace file: first saw '{}' heap dump version, "
|
| + "then '{}'.").format(self._heap_dump_version, version))
|
| + else:
|
| + return version
|
|
|
|
|
| class SymbolizableFile(object):
|
| @@ -381,8 +927,12 @@ def ResolveSymbolizableFiles(processes):
|
| """
|
| symfile_by_path = {}
|
| for process in processes:
|
| - for frame in process.stack_frames.pc_frames:
|
| - region = process.mmaps.FindRegion(frame.pc)
|
| + if not process.memory_map:
|
| + continue
|
| + for frame in process.stack_frame_map.frame_by_id.itervalues():
|
| + if frame.pc is None:
|
| + continue
|
| + region = process.memory_map.FindRegion(frame.pc)
|
| if region is None:
|
| frame.name = '<unresolved>'
|
| continue
|
| @@ -397,15 +947,155 @@ def ResolveSymbolizableFiles(processes):
|
| return symfile_by_path.values()
|
|
|
|
|
| +def FindInSystemPath(binary_name):
|
| + paths = os.environ['PATH'].split(os.pathsep)
|
| + for path in paths:
|
| + binary_path = os.path.join(path, binary_name)
|
| + if os.path.isfile(binary_path):
|
| + return binary_path
|
| + return None
|
| +
|
| +
|
| +class Symbolizer(object):
|
| + """Encapsulates platform-specific symbolization logic."""
|
| +
|
| + def __init__(self):
|
| + self.is_mac = sys.platform == 'darwin'
|
| + self.is_win = sys.platform == 'win32'
|
| + if self.is_mac:
|
| + self.binary = 'atos'
|
| + self._matcher = symbolize_trace_atos_regex.AtosRegexMatcher()
|
| + elif self.is_win:
|
| + self.binary = 'addr2line-pdb.exe'
|
| + else:
|
| + self.binary = 'addr2line'
|
| + self.symbolizer_path = FindInSystemPath(self.binary)
|
| +
|
| + def _SymbolizeLinuxAndAndroid(self, symfile, unsymbolized_name):
|
| + def _SymbolizerCallback(sym_info, frames):
|
| + # Unwind inline chain to the top.
|
| + while sym_info.inlined_by:
|
| + sym_info = sym_info.inlined_by
|
| +
|
| + symbolized_name = sym_info.name if sym_info.name else unsymbolized_name
|
| + for frame in frames:
|
| + frame.name = symbolized_name
|
| + frame.ext.source_path = sym_info.source_path
|
| +
|
| + symbolizer = elf_symbolizer.ELFSymbolizer(symfile.symbolizable_path,
|
| + self.symbolizer_path,
|
| + _SymbolizerCallback,
|
| + inlines=True)
|
| +
|
| + for address, frames in symfile.frames_by_address.iteritems():
|
| + # SymbolizeAsync() asserts that the type of address is int. We operate
|
| + # on longs (since they are raw pointers possibly from 64-bit processes).
|
| + # It's OK to cast here because we're passing relative PC, which should
|
| + # always fit into int.
|
| + symbolizer.SymbolizeAsync(int(address), frames)
|
| +
|
| + symbolizer.Join()
|
| +
|
| +
|
| + def _SymbolizeMac(self, symfile):
|
| + chars_max = int(subprocess.check_output("getconf ARG_MAX", shell=True))
|
| +
|
| + # 16 for the address, 2 for "0x", 1 for the space
|
| + chars_per_address = 19
|
| +
|
| + load_address = (symbolize_trace_macho_reader.
|
| + ReadMachOTextLoadAddress(symfile.symbolizable_path))
|
| + assert load_address is not None
|
| +
|
| + cmd_base = [self.symbolizer_path, '-arch', 'x86_64', '-l',
|
| + '0x%x' % load_address, '-o',
|
| + symfile.symbolizable_path]
|
| + chars_for_other_arguments = len(' '.join(cmd_base)) + 1
|
| +
|
| + # The maximum number of inputs that can be processed at once is limited by
|
| + # ARG_MAX. This currently evalutes to ~13000 on macOS.
|
| + max_inputs = (chars_max - chars_for_other_arguments) / chars_per_address
|
| +
|
| + all_keys = symfile.frames_by_address.keys()
|
| + processed_keys_count = 0
|
| + while len(all_keys):
|
| + input_count = min(len(all_keys), max_inputs)
|
| + keys_to_process = all_keys[0:input_count]
|
| + cmd = list(cmd_base)
|
| + cmd.extend([hex(int(x) + load_address)
|
| + for x in keys_to_process])
|
| + output_array = subprocess.check_output(cmd).split('\n')
|
| + for i in range(len(keys_to_process)):
|
| + for frame in (symfile.frames_by_address.values()
|
| + [i + processed_keys_count]):
|
| + frame.name = self._matcher.Match(output_array[i])
|
| + processed_keys_count += len(keys_to_process)
|
| + all_keys = all_keys[input_count:]
|
| +
|
| + def _SymbolizeWin(self, symfile):
|
| + """Invoke symbolizer binary on windows and write all input in one go.
|
| +
|
| + Unlike linux, on windows, symbolization talks through a shared system
|
| + service that handles communication with the NT symbol servers. This
|
| + creates an explicit serialization (and therefor lock contention) of
|
| + any process using the symbol API for files do not have a local PDB.
|
| +
|
| + Thus, even though the windows symbolizer binary can be make command line
|
| + compatible with the POSIX addr2line interface, paralellizing the
|
| + symbolization does not yield the same performance effects. Running
|
| + just one symbolizer seems good enough for now. Can optimize later
|
| + if this becomes a bottleneck.
|
| + """
|
| + cmd = [self.symbolizer_path, '--functions', '--demangle', '--exe',
|
| + symfile.symbolizable_path]
|
| +
|
| + proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stdin=subprocess.PIPE,
|
| + stderr=sys.stderr)
|
| + addrs = ["%x" % relative_pc for relative_pc in
|
| + symfile.frames_by_address.keys()]
|
| + (stdout_data, stderr_data) = proc.communicate('\n'.join(addrs))
|
| + stdout_data = stdout_data.split('\n')
|
| +
|
| + # This is known to be in the same order as stderr_data.
|
| + for i, addr in enumerate(addrs):
|
| + for frame in symfile.frames_by_address[int(addr, 16)]:
|
| + # Output of addr2line with --functions is always 2 outputs per
|
| + # symbol, function name followed by source line number. Only grab
|
| + # the function name as line info is not always available.
|
| + frame.name = stdout_data[i * 2]
|
| +
|
| + def Symbolize(self, symfile, unsymbolized_name):
|
| + if self.is_mac:
|
| + self._SymbolizeMac(symfile)
|
| + elif self.is_win:
|
| + self._SymbolizeWin(symfile)
|
| + else:
|
| + self._SymbolizeLinuxAndAndroid(symfile, unsymbolized_name)
|
| +
|
| + def IsSymbolizableFile(self, file_path):
|
| + if self.is_win:
|
| + extension = os.path.splitext(file_path)[1].lower()
|
| + return extension in ['.dll', '.exe']
|
| + else:
|
| + result = subprocess.check_output(['file', '-0', file_path])
|
| + type_string = result[result.find('\0') + 1:]
|
| + return bool(re.match(r'.*(ELF|Mach-O) (32|64)-bit\b.*',
|
| + type_string, re.DOTALL))
|
| +
|
| +
|
| def SymbolizeFiles(symfiles, symbolizer):
|
| """Symbolizes each file in the given list of SymbolizableFiles
|
| and updates stack frames with symbolization results."""
|
| +
|
| + if not symfiles:
|
| + print 'Nothing to symbolize.'
|
| + return
|
| +
|
| print 'Symbolizing...'
|
|
|
| def _SubPrintf(message, *args):
|
| print (' ' + message).format(*args)
|
|
|
| - symbolized = False
|
| for symfile in symfiles:
|
| unsymbolized_name = '<{}>'.format(
|
| symfile.path if symfile.path else 'unnamed')
|
| @@ -432,9 +1122,20 @@ def SymbolizeFiles(symfiles, symbolizer):
|
| symfile.path)
|
|
|
| symbolizer.Symbolize(symfile, unsymbolized_name)
|
| - symbolized = True
|
|
|
| - return symbolized
|
| +
|
| +# Matches Android library paths, supports both K (/data/app-lib/<>/lib.so)
|
| +# as well as L+ (/data/app/<>/lib/<>/lib.so). Library name is available
|
| +# via 'name' group.
|
| +ANDROID_PATH_MATCHER = re.compile(
|
| + r'^/data/(?:'
|
| + r'app/[^/]+/lib/[^/]+/|'
|
| + r'app-lib/[^/]+/|'
|
| + r'data/[^/]+/incremental-install-files/lib/'
|
| + r')(?P<name>.*\.so)')
|
| +
|
| +# Subpath of output path where unstripped libraries are stored.
|
| +ANDROID_UNSTRIPPED_SUBPATH = 'lib.unstripped'
|
|
|
|
|
| def HaveFilesFromAndroid(symfiles):
|
| @@ -455,59 +1156,71 @@ def RemapAndroidFiles(symfiles, output_path):
|
| symfile.symbolizable_path = 'android://{}'.format(symfile.path)
|
|
|
|
|
| +def Symbolize(options, trace, symbolizer):
|
| + symfiles = ResolveSymbolizableFiles(trace.processes)
|
| +
|
| + # Android trace files don't have any indication they are from Android.
|
| + # So we're checking for Android-specific paths.
|
| + if HaveFilesFromAndroid(symfiles):
|
| + if not options.output_directory:
|
| + sys.exit('The trace file appears to be from Android. Please '
|
| + 'specify output directory to properly symbolize it.')
|
| + RemapAndroidFiles(symfiles, os.path.abspath(options.output_directory))
|
| +
|
| + SymbolizeFiles(symfiles, symbolizer)
|
| +
|
| +
|
| +def OpenTraceFile(file_path, mode):
|
| + if file_path.endswith('.gz'):
|
| + return gzip.open(file_path, mode + 'b')
|
| + else:
|
| + return open(file_path, mode + 't')
|
| +
|
| +
|
| # Suffix used for backup files.
|
| BACKUP_FILE_TAG = '.BACKUP'
|
|
|
| def main():
|
| parser = argparse.ArgumentParser()
|
| - parser.add_argument('file',
|
| - help='Trace file to symbolize (.json or .json.gz)')
|
| - parser.add_argument('--no-backup',
|
| - dest='backup', default='true', action='store_false',
|
| - help="Don't create {} files".format(BACKUP_FILE_TAG))
|
| - parser.add_argument('--output-directory',
|
| - help='The path to the build output directory, such ' +
|
| - 'as out/Debug. Only needed for Android.')
|
| - options = parser.parse_args()
|
| + parser.add_argument(
|
| + 'file',
|
| + help='Trace file to symbolize (.json or .json.gz)')
|
|
|
| - trace_file_path = options.file
|
| - def _OpenTraceFile(mode):
|
| - if trace_file_path.endswith('.gz'):
|
| - return gzip.open(trace_file_path, mode + 'b')
|
| - else:
|
| - return open(trace_file_path, mode + 't')
|
| + parser.add_argument(
|
| + '--no-backup', dest='backup', default='true', action='store_false',
|
| + help="Don't create {} files".format(BACKUP_FILE_TAG))
|
| +
|
| + parser.add_argument(
|
| + '--output-directory',
|
| + help='The path to the build output directory, such as out/Debug.')
|
|
|
| symbolizer = Symbolizer()
|
| if symbolizer.symbolizer_path is None:
|
| sys.exit("Can't symbolize - no %s in PATH." % symbolizer.binary)
|
|
|
| + options = parser.parse_args()
|
| +
|
| + trace_file_path = options.file
|
| +
|
| print 'Reading trace file...'
|
| - with _OpenTraceFile('r') as trace_file:
|
| - trace = json.load(trace_file)
|
| + with OpenTraceFile(trace_file_path, 'r') as trace_file:
|
| + trace = Trace(json.load(trace_file))
|
|
|
| - processes = CollectProcesses(trace)
|
| - symfiles = ResolveSymbolizableFiles(processes)
|
| + Symbolize(options, trace, symbolizer)
|
|
|
| - # Android trace files don't have any indication they are from Android.
|
| - # So we're checking for Android-specific paths.
|
| - if HaveFilesFromAndroid(symfiles):
|
| - if not options.output_directory:
|
| - parser.error('The trace file appears to be from Android. Please '
|
| - "specify output directory (e.g. 'out/Debug') to properly "
|
| - 'symbolize it.')
|
| - RemapAndroidFiles(symfiles, os.path.abspath(options.output_directory))
|
| + if trace.modified:
|
| + trace.ApplyModifications()
|
|
|
| - if SymbolizeFiles(symfiles, symbolizer):
|
| if options.backup:
|
| backup_file_path = trace_file_path + BACKUP_FILE_TAG
|
| - print 'Backing up trace file to {}...'.format(backup_file_path)
|
| + print 'Backing up trace file to {}'.format(backup_file_path)
|
| os.rename(trace_file_path, backup_file_path)
|
|
|
| - print 'Updating trace file...'
|
| - with _OpenTraceFile('w') as trace_file:
|
| - json.dump(trace, trace_file)
|
| + print 'Updating the trace file...'
|
| + with OpenTraceFile(trace_file_path, 'w') as trace_file:
|
| + json.dump(trace.node, trace_file)
|
| else:
|
| - print 'No PCs symbolized - not updating trace file.'
|
| + print 'No modifications were made - not updating the trace file.'
|
|
|
|
|
| if __name__ == '__main__':
|
|
|