Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(556)

Unified Diff: tracing/bin/symbolize_trace

Issue 2810523002: symbolize_trace: support new heap dump format. (Closed)
Patch Set: Remove everything except symbolization Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tracing/bin/symbolize_trace
diff --git a/tracing/bin/symbolize_trace b/tracing/bin/symbolize_trace
index 7c6f5a4e37b04144fc5bf255bb5509857023654a..a7416d2b54e0960143898ae75a693674da69d823 100755
--- a/tracing/bin/symbolize_trace
+++ b/tracing/bin/symbolize_trace
@@ -7,6 +7,7 @@ import argparse
import bisect
import collections
import gzip
+import itertools
import json
import os
import re
@@ -26,165 +27,7 @@ import symbolize_trace_atos_regex
import symbolize_trace_macho_reader
-# Relevant trace event phases from Chromium's
-# src/base/trace_event/common/trace_event_common.h.
-TRACE_EVENT_PHASE_METADATA = 'M'
-TRACE_EVENT_PHASE_MEMORY_DUMP = 'v'
-
-
-# Matches Android library paths, supports both K (/data/app-lib/<>/lib.so)
-# as well as L+ (/data/app/<>/lib/<>/lib.so). Library name is available
-# via 'name' group.
-ANDROID_PATH_MATCHER = re.compile(
- r'^/data/(?:'
- r'app/[^/]+/lib/[^/]+/|'
- r'app-lib/[^/]+/|'
- r'data/[^/]+/incremental-install-files/lib/'
- r')(?P<name>.*\.so)')
-
-# Subpath of output path where unstripped libraries are stored.
-ANDROID_UNSTRIPPED_SUBPATH = 'lib.unstripped'
-
-
-def FindInSystemPath(binary_name):
- paths = os.environ['PATH'].split(os.pathsep)
- for path in paths:
- binary_path = os.path.join(path, binary_name)
- if os.path.isfile(binary_path):
- return binary_path
- return None
-
-
-class Symbolizer(object):
- # Encapsulates platform-specific symbolization logic.
- def __init__(self):
- self.is_mac = sys.platform == 'darwin'
- self.is_win = sys.platform == 'win32'
- if self.is_mac:
- self.binary = 'atos'
- self._matcher = symbolize_trace_atos_regex.AtosRegexMatcher()
- elif self.is_win:
- self.binary = 'addr2line-pdb.exe'
- else:
- self.binary = 'addr2line'
- self.symbolizer_path = FindInSystemPath(self.binary)
-
- def _SymbolizeLinuxAndAndroid(self, symfile, unsymbolized_name):
- def _SymbolizerCallback(sym_info, frames):
- # Unwind inline chain to the top.
- while sym_info.inlined_by:
- sym_info = sym_info.inlined_by
-
- symbolized_name = sym_info.name if sym_info.name else unsymbolized_name
- for frame in frames:
- frame.name = symbolized_name
-
- symbolizer = elf_symbolizer.ELFSymbolizer(symfile.symbolizable_path,
- self.symbolizer_path,
- _SymbolizerCallback,
- inlines=True)
-
- for address, frames in symfile.frames_by_address.iteritems():
- # SymbolizeAsync() asserts that the type of address is int. We operate
- # on longs (since they are raw pointers possibly from 64-bit processes).
- # It's OK to cast here because we're passing relative PC, which should
- # always fit into int.
- symbolizer.SymbolizeAsync(int(address), frames)
-
- symbolizer.Join()
-
-
- def _SymbolizeMac(self, symfile):
- chars_max = int(subprocess.check_output("getconf ARG_MAX", shell=True))
-
- # 16 for the address, 2 for "0x", 1 for the space
- chars_per_address = 19
-
- load_address = (symbolize_trace_macho_reader.
- ReadMachOTextLoadAddress(symfile.symbolizable_path))
- assert load_address is not None
-
- cmd_base = [self.symbolizer_path, '-arch', 'x86_64', '-l',
- '0x%x' % load_address, '-o',
- symfile.symbolizable_path]
- chars_for_other_arguments = len(' '.join(cmd_base)) + 1
-
- # The maximum number of inputs that can be processed at once is limited by
- # ARG_MAX. This currently evalutes to ~13000 on macOS.
- max_inputs = (chars_max - chars_for_other_arguments) / chars_per_address
-
- all_keys = symfile.frames_by_address.keys()
- processed_keys_count = 0
- while len(all_keys):
- input_count = min(len(all_keys), max_inputs)
- keys_to_process = all_keys[0:input_count]
-
- cmd = list(cmd_base)
- cmd.extend([hex(int(x) + load_address)
- for x in keys_to_process])
- output_array = subprocess.check_output(cmd).split('\n')
- for i in range(len(keys_to_process)):
- for frame in (symfile.frames_by_address.values()
- [i + processed_keys_count]):
- frame.name = self._matcher.Match(output_array[i])
- processed_keys_count += len(keys_to_process)
- all_keys = all_keys[input_count:]
-
-
- def _SymbolizeWin(self, symfile):
- """Invoke symbolizer binary on windows and write all input in one go.
-
- Unlike linux, on windows, symbolization talks through a shared system
- service that handles communication with the NT symbol servers. This
- creates an explicit serialization (and therefor lock contention) of
- any process using the symbol API for files do not have a local PDB.
-
- Thus, even though the windows symbolizer binary can be make command line
- compatible with the POSIX addr2line interface, paralellizing the
- symbolization does not yield the same performance effects. Running
- just one symbolizer seems good enough for now. Can optimize later
- if this becomes a bottleneck.
- """
- cmd = [self.symbolizer_path, '--functions', '--demangle', '--exe',
- symfile.symbolizable_path]
-
- proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stdin=subprocess.PIPE,
- stderr=sys.stderr)
- addrs = ["%x" % relative_pc for relative_pc in
- symfile.frames_by_address.keys()]
- (stdout_data, stderr_data) = proc.communicate('\n'.join(addrs))
- stdout_data = stdout_data.split('\n')
-
- # This is known to be in the same order as stderr_data.
- for i, addr in enumerate(addrs):
- for frame in symfile.frames_by_address[int(addr, 16)]:
- # Output of addr2line with --functions is always 2 outputs per
- # symbol, function name followed by source line number. Only grab
- # the function name as line info is not always available.
- frame.name = stdout_data[i * 2]
-
-
- def Symbolize(self, symfile, unsymbolized_name):
- if self.is_mac:
- self._SymbolizeMac(symfile)
- if self.is_win:
- self._SymbolizeWin(symfile)
- else:
- self._SymbolizeLinuxAndAndroid(symfile, unsymbolized_name)
-
-
- def IsSymbolizableFile(self, file_path):
- if self.is_win:
- extension = os.path.splitext(file_path)[1].lower()
- return extension in ['.dll', '.exe']
- else:
- result = subprocess.check_output(['file', '-0', file_path])
- type_string = result[result.find('\0') + 1:]
- return bool(re.match(r'.*(ELF|Mach-O) (32|64)-bit\b.*',
- type_string, re.DOTALL))
-
-
-class ProcessMemoryMaps(object):
+class MemoryMap(object):
"""Represents 'process_mmaps' trace file entry."""
class Region(object):
@@ -221,15 +64,13 @@ class ProcessMemoryMaps(object):
return 'Region(0x{:X} - 0x{:X}, {})'.format(
self.start_address, self.end_address, self.file_path)
- def __init__(self, process_mmaps):
- """Parses 'process_mmaps' dictionary."""
-
+ def __init__(self, process_mmaps_json):
regions = []
- for region_value in process_mmaps['vm_regions']:
+ for region_json in process_mmaps_json['vm_regions']:
regions.append(self.Region(
- long(region_value['sa'], 16),
- long(region_value['sz'], 16),
- region_value['mf']))
+ long(region_json['sa'], 16),
+ long(region_json['sz'], 16),
+ region_json['mf']))
regions.sort()
# Copy regions without duplicates and check for overlaps.
@@ -259,104 +100,550 @@ class ProcessMemoryMaps(object):
return None
-class StackFrames(object):
- """Represents 'stackFrames' trace file entry."""
+class UnsupportedHeapDumpVersionError(Exception):
+ def __init__(self, version):
+ message = 'Unsupported heap dump version: {}'.format(version)
+ super(UnsupportedHeapDumpVersionError, self).__init__(message)
+
+
+class StringMap(object):
awong 2017/04/20 19:37:39 These classes should have doc strings explaining t
+ def __init__(self):
+ self._modified = False
+ self._string_jsons = []
+ self._string_by_id = {}
+ self._id_by_string = {}
+ self._max_string_id = 0
+
+ @property
+ def modified(self):
+ return self._modified
+
+ @property
+ def string_by_id(self):
+ return self._string_by_id
+
+ def ParseMore(self, heap_dump_version, strings_json):
+ if heap_dump_version != Trace.HEAP_DUMP_VERSION_1:
+ raise UnsupportedHeapDumpVersionError(heap_dump_version)
+
+ self._string_jsons.append(strings_json)
+ for string_json in strings_json:
+ self._Insert(string_json['id'], string_json['string'])
+
+ def Clear(self):
+ if self._string_by_id:
+ self._modified = True
awong 2017/04/20 22:12:14 Is Clear() not reset? This looks *almost* like __
+ self._string_by_id = {}
+ self._id_by_string = {}
+ self._Insert(0, '[null]')
+ self._max_string_id = 0
+
+ def AddString(self, string):
+ string_id = self._id_by_string.get(string)
+ if string_id is None:
+ string_id = self._max_string_id + 1
+ self._Insert(string_id, string)
+ self._modified = True
+ return string_id
+
+ def ApplyModifications(self):
awong 2017/04/20 22:12:14 What are such modifications? Can we use a less gen
+ if not self.modified:
+ return
+
+ assert self._string_jsons, 'no JSON nodes'
+
+ # Serialize into first JSON node, and clear all others.
awong 2017/04/20 22:12:14 Can we get a "why" in this comment? As a reader,
+
+ for string_json in self._string_jsons:
+ string_json[:] = []
awong 2017/04/20 22:12:14 string_json.clear()?
+ string_json = self._string_jsons[0]
+ for string_id, string in self._string_by_id.iteritems():
+ string_json.append({'id': string_id, 'string': string})
+
+ self._modified = False
awong 2017/04/20 22:12:14 This is confusing. Shouldn't it be true?
+
+ def _Insert(self, string_id, string):
+ self._id_by_string[string] = string_id
+ self._string_by_id[string_id] = string
+ self._max_string_id = max(self._max_string_id, string_id)
+
+
+class TypeNameMap(object):
+ def __init__(self):
+ self._modified = False
+ self._type_name_jsons = []
+ self._name_by_id = {}
+ self._id_by_name = {}
+ self._max_type_id = 0
+
+ @property
+ def modified(self):
+ return self._modified
+
+ @property
+ def name_by_id(self):
+ return self._name_by_id
+
+ def ParseMore(self, heap_dump_version, type_name_json, string_map):
+ if heap_dump_version != Trace.HEAP_DUMP_VERSION_1:
+ raise UnsupportedHeapDumpVersionError(heap_dump_version)
+
+ self._type_name_jsons.append(type_name_json)
+ for type_json in type_name_json:
+ self._Insert(type_json['id'],
+ string_map.string_by_id[type_json['name_sid']])
+
+ def AddType(self, type_name):
+ type_id = self._id_by_name.get(type_name)
+ if type_id is None:
+ type_id = self._max_type_id + 1
+ self._Insert(type_id, type_name)
+ self._modified = True
+ return type_id
+
+ def ApplyModifications(self, string_map, force=False):
+ if not self.modified and not force:
+ return
+
+ assert self._type_name_jsons, 'no JSON nodes'
+
+ # Serialize into first JSON node, and clear all others.
+
+ for types_json in self._type_name_jsons:
+ types_json[:] = []
awong 2017/04/20 22:12:14 types_json.clear()?
+ types_json = self._type_name_jsons[0]
+ for type_id, type_name in self._name_by_id.iteritems():
+ types_json.append({
+ 'id': type_id,
+ 'name_sid': string_map.AddString(type_name)})
+
+ self._modified = False
awong 2017/04/20 22:12:14 Should this be true?
+
+ def _Insert(self, type_id, type_name):
+ self._id_by_name[type_name] = type_id
+ self._name_by_id[type_id] = type_name
+ self._max_type_id = max(self._max_type_id, type_id)
- class PCFrame(object):
- def __init__(self, pc, frame):
+
+class StackFrameMap(object):
+ class Frame(object):
+ def __init__(self, frame_id, name, parent_frame_id):
self._modified = False
- self._pc = pc
- self._frame = frame
+ self._id = frame_id
+ self._name = name
+ self._pc = self._ParsePC(name)
+ self._parent_id = parent_frame_id
+ self._ext = None
@property
def modified(self):
return self._modified
@property
+ def id(self):
+ return self._id
+
+ @property
def pc(self):
return self._pc
@property
def name(self):
- return self._frame['name']
+ return self._name
@name.setter
def name(self, value):
self._modified = True
- self._frame['name'] = value
+ self._name = value
+
+ @property
+ def parent_id(self):
+ return self._parent_id
+
+ _PC_TAG = 'pc:'
+
+ def _ParsePC(self, name):
+ if not name.startswith(self._PC_TAG):
awong 2017/04/20 22:12:14 How about invert the logic to remove the not?
+ return None
+ return long(name[len(self._PC_TAG):], 16)
+
+ def _ClearModified(self):
+ self._modified = False
+
+ def __init__(self):
awong 2017/04/20 22:12:14 Group the __init__?
+ self._modified = False
+ self._heap_dump_version = None
+ self._stack_frames_jsons = []
+ self._frame_by_id = {}
+
+ @property
+ def modified(self):
+ return (self._modified or
+ any(f.modified for f in self._frame_by_id.itervalues()))
+
+ @property
+ def frame_by_id(self):
+ return self._frame_by_id
+
+ def ParseMore(self, heap_dump_version, stack_frames_json, string_map):
+ frame_by_id = {}
+ if heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY:
+ if self._stack_frames_jsons:
+ raise Exception('Legacy stack frames are expected only once.')
+ for frame_id, frame_json in stack_frames_json.iteritems():
+ frame = self.Frame(frame_id,
+ frame_json['name'],
+ frame_json.get('parent'))
+ frame_by_id[frame.id] = frame
+ else:
+ if heap_dump_version != Trace.HEAP_DUMP_VERSION_1:
+ raise UnsupportedHeapDumpVersionError(heap_dump_version)
+ for frame_json in stack_frames_json:
+ frame = self.Frame(frame_json['id'],
+ string_map.string_by_id[frame_json['name_sid']],
+ frame_json.get('parent'))
+ frame_by_id[frame.id] = frame
+
+ self._heap_dump_version = heap_dump_version
+ self._stack_frames_jsons.append(stack_frames_json)
+
+ self._frame_by_id = frame_by_id
+
+ def ApplyModifications(self, string_map, force=False):
+ if not self.modified and not force:
+ return
+
+ assert self._stack_frames_jsons, 'no JSON nodes'
+ if self._heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY:
+ assert string_map is None, \
+ 'string_map should not be used with the legacy format'
+
+ # Serialize frames into first JSON node, and clear all others.
+
+ for frames_json in self._stack_frames_jsons:
+ if self._heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY:
+ frames_json.clear()
+ else:
+ frames_json[:] = []
+
+ frames_json = self._stack_frames_jsons[0]
+ for frame in self._frame_by_id.itervalues():
+ if self._heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY:
+ frame_json = {'name': frame.name}
+ frames_json[frame.id] = frame_json
+ else:
+ frame_json = {
+ 'id': frame.id,
+ 'name_sid': string_map.AddString(frame.name)
+ }
+ frames_json.append(frame_json)
+ if frame.parent_id is not None:
+ frame_json['parent'] = frame.parent_id
+ frame._ClearModified()
+
+ self._modified = False
- def __init__(self, stack_frames):
- """Constructs object using 'stackFrames' dictionary."""
- self._pc_frames = []
- for frame in stack_frames.itervalues():
- pc_frame = self._ParsePCFrame(frame)
- if pc_frame:
- self._pc_frames.append(pc_frame)
+
+class HeapProfile(object):
+ EntryKey = collections.namedtuple(
+ 'EntryKey',
+ ['stack_frame_id', 'type_name_id'])
+
+ class Entry(object):
+ def __init__(self, key, mapped_value_by_name, numeric_value_by_name):
+ self._key = key
+ self._mapped_value_by_name = mapped_value_by_name
+ self._numeric_value_by_name = numeric_value_by_name
+
+ @property
+ def key(self):
+ return self._key
+
+ @property
+ def stack_frame_id(self):
+ return self._key.stack_frame_id
+
+ @property
+ def type_name_id(self):
+ return self._key.type_name_id
+
+ def _AddValuesFrom(self, entry):
+ self._mapped_value_by_name.clear()
+ for name, value in entry._numeric_value_by_name.iteritems():
+ value += self._numeric_value_by_name.get(name, 0)
+ self._numeric_value_by_name[name] = value
+
+ def __init__(self, allocator_name, entries_json, mapped_entry_names):
+ self._modified = False
+ self._allocator_name = allocator_name
+ self._entries_json = entries_json
+ self._entries = []
+ for values in zip(*entries_json.itervalues()):
+ stack_frame_id = None
+ type_name_id = None
+ mapped_value_by_name = {}
+ numeric_value_by_name = {}
+ for index, name in enumerate(entries_json.iterkeys()):
+ value = values[index]
+ if name == 'nodes':
+ stack_frame_id = value
+ elif name == 'types':
+ type_name_id = value
+ elif name in mapped_entry_names:
+ mapped_value_by_name[name] = value
+ else:
+ numeric_value_by_name[name] = value
+ entry = self.Entry(self.EntryKey(stack_frame_id, type_name_id),
+ mapped_value_by_name, numeric_value_by_name)
+ self._entries.append(entry)
+
+ @property
+ def modified(self):
+ return self._modified
@property
- def pc_frames(self):
- return self._pc_frames
+ def allocator_name(self):
+ return self._allocator_name
+
+ @property
+ def entries(self):
+ return self._entries
+
+ def ApplyModifications(self):
+ if not self.modified:
+ return
+
+ mapped_value_names = set()
+ numeric_value_names = set()
+ for entry in self._entries:
+ mapped_value_names.update(entry._mapped_value_by_name.iterkeys())
+ numeric_value_names.update(entry._numeric_value_by_name.iterkeys())
+
+ def _AddJSONValue(name, value):
+ values = self._entries_json.get(name)
+ if values is None:
+ values = []
+ self._entries_json[name] = values
+ values.append(value)
+
+ self._entries_json.clear()
+ for entry in self._entries:
+ _AddJSONValue('nodes', entry.stack_frame_id)
+ _AddJSONValue('types', entry.type_name_id)
+ for name in mapped_value_names:
+ value = entry._mapped_value_by_name[name]
+ _AddJSONValue(name, value)
+ for name in numeric_value_names:
+ value = entry._numeric_value_by_name[name]
+ _AddJSONValue(name, value)
+
+ self._modified = False
+
+
+class MemoryDump(object):
+ def __init__(self, allocators_json, mapped_entry_names):
+ self._profiles = []
+ for allocator_name, entries_json in allocators_json.iteritems():
+ profile = HeapProfile(allocator_name, entries_json, mapped_entry_names)
+ self._profiles.append(profile)
@property
def modified(self):
- return any(f.modified for f in self._pc_frames)
+ return any(p.modified for p in self.profiles)
- _PC_TAG = 'pc:'
+ @property
+ def profiles(self):
+ return self._profiles
- @classmethod
- def _ParsePCFrame(self, frame):
- name = frame['name']
- if not name.startswith(self._PC_TAG):
- return None
- pc = long(name[len(self._PC_TAG):], 16)
- return self.PCFrame(pc, frame)
+ def ApplyModifications(self):
+ for profile in self._profiles:
+ profile.ApplyModifications()
-class Process(object):
- """Holds various bits of information about a process in a trace file."""
+class Trace(object):
- def __init__(self, pid):
- self.pid = pid
- self.name = None
- self.mmaps = None
- self.stack_frames = None
+ HEAP_DUMP_VERSION_LEGACY = 'Legacy'
+ HEAP_DUMP_VERSION_1 = 1
+ class Process(object):
+ def __init__(self, pid):
+ self._pid = pid
+ self._name = None
+ self._memory_map = None
+ self._memory_dumps = []
+ self._stack_frame_map = StackFrameMap()
+ self._type_name_map = TypeNameMap()
+ self._string_map = StringMap()
+ self._heap_dump_version = None
-def CollectProcesses(trace):
- """Parses trace dictionary and returns pid->Process map of all processes
- suitable for symbolization (which have both mmaps and stack_frames).
- """
+ @property
+ def modified(self):
+ return (self._stack_frame_map.modified or
+ self._type_name_map.modified or
+ any(d.modified for d in self._memory_dumps))
- process_map = {}
+ @property
+ def pid(self):
+ return self._pid
- # Android traces produced via 'chrome://inspect/?tracing#devices' are
- # just list of events.
- events = trace if isinstance(trace, list) else trace['traceEvents']
- for event in events:
- name = event.get('name')
- if not name:
- continue
+ @property
+ def name(self):
awong 2017/04/20 22:28:04 For these properties, having a docstring that expl
+ return self._name
+
+ @property
+ def unique_name(self):
+ name = self._name if self._name else 'UnnamedProcess'
+ return '{}({})'.format(name, self._pid)
+
+ @property
+ def memory_map(self):
+ return self._memory_map
+
+ @property
+ def memory_dumps(self):
awong 2017/04/20 22:28:04 Why is this one plural?
+ return self._memory_dumps
- pid = event['pid']
- process = process_map.get(pid)
- if process is None:
- process = Process(pid)
- process_map[pid] = process
+ @property
+ def stack_frame_map(self):
+ return self._stack_frame_map
+
+ @property
+ def type_name_map(self):
+ return self._type_name_map
+
+ def ApplyModifications(self):
+ if self._heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY:
+ self._stack_frame_map.ApplyModifications(None)
+ else:
+ if self._stack_frame_map.modified or self._type_name_map.modified:
+ self._string_map.Clear()
+ self._stack_frame_map.ApplyModifications(self._string_map, force=True)
+ self._type_name_map.ApplyModifications(self._string_map, force=True)
+ self._string_map.ApplyModifications()
+ for dump in self._memory_dumps:
+ dump.ApplyModifications()
+
+ def __init__(self, trace_json):
+ self._trace_json = trace_json
+ self._processes = []
+ self._heap_dump_version = None
+
+ # Misc per-process information needed only during parsing.
+ class ProcessExt(object):
+ def __init__(self, pid):
+ self.process = Trace.Process(pid)
+ self.mapped_entry_names = set()
+ self.process_mmaps_json = None
+ self.seen_strings_json = False
+
+ process_ext_by_pid = {}
+
+ # Android traces produced via 'chrome://inspect/?tracing#devices' are
+ # just list of events.
+ events = trace_json if isinstance(trace_json, list) \
+ else trace_json['traceEvents']
+ for event in events:
+ name = event.get('name')
+ if not name:
+ continue
+
+ pid = event['pid']
+ process_ext = process_ext_by_pid.get(pid)
+ if process_ext is None:
+ process_ext = ProcessExt(pid)
+ process_ext_by_pid[pid] = process_ext
+ process = process_ext.process
+
+ phase = event['ph']
+ if phase == self._EVENT_PHASE_METADATA:
+ if name == 'process_name':
+ process._name = event['args']['name']
+ elif name == 'stackFrames':
+ process._stack_frame_map.ParseMore(
+ self._UseHeapDumpVersion(self.HEAP_DUMP_VERSION_LEGACY),
+ event['args']['stackFrames'],
+ process._string_map)
+ elif phase == self._EVENT_PHASE_MEMORY_DUMP:
+ dumps = event['args']['dumps']
+ process_mmaps = dumps.get('process_mmaps')
+ if process_mmaps:
+ # We want the most recent memory map, so parsing happens later
+ # once we finished reading all events.
+ process_ext.process_mmaps_json = process_mmaps
+ heaps = dumps.get('heaps_v2')
+ if heaps:
+ version = self._UseHeapDumpVersion(heaps['version'])
+ maps = heaps.get('maps')
+ if maps:
+ process_ext.mapped_entry_names.update(maps.iterkeys())
+ types = maps.get('types')
+ stack_frames = maps.get('nodes')
+ strings = maps.get('strings')
+ if (strings is None and (types or stack_frames)
+ and not process_ext.seen_strings_json):
+ # ApplyModifications() for TypeNameMap and StackFrameMap puts
+ # everything into the first node and depends on StringMap. So
+ # we need to make sure that 'strings' node is there if any of
+ # other two nodes present.
+ strings = []
+ maps['strings'] = strings
+ if strings is not None:
+ process_ext.seen_strings_json = True
+ process._string_map.ParseMore(version, strings)
+ if types:
+ process._type_name_map.ParseMore(
+ version, types, process._string_map)
+ if stack_frames:
+ process._stack_frame_map.ParseMore(
+ version, stack_frames, process._string_map)
+ allocators = heaps.get('allocators')
+ if allocators:
+ dump = MemoryDump(allocators, process_ext.mapped_entry_names)
+ process._memory_dumps.append(dump)
+
+ self._processes = []
+ for pe in process_ext_by_pid.itervalues():
+ pe.process._heap_dump_version = self._heap_dump_version
+ if pe.process_mmaps_json:
+ # Now parse the most recent memory map.
+ pe.process._memory_map = MemoryMap(pe.process_mmaps_json)
+ self._processes.append(pe.process)
- phase = event['ph']
- if phase == TRACE_EVENT_PHASE_METADATA:
- if name == 'process_name':
- process.name = event['args']['name']
- elif name == 'stackFrames':
- process.stack_frames = StackFrames(event['args']['stackFrames'])
- elif phase == TRACE_EVENT_PHASE_MEMORY_DUMP:
- process_mmaps = event['args']['dumps'].get('process_mmaps')
- if process_mmaps:
- # TODO(dskiba): this parses all process_mmaps, but retains only the
- # last one. We need to parse only once (lazy parsing?).
- process.mmaps = ProcessMemoryMaps(process_mmaps)
+ @property
+ def modified(self):
+ return any(p.modified for p in self._processes)
- return [p for p in process_map.itervalues() if p.mmaps and p.stack_frames]
+ @property
+ def processes(self):
+ return self._processes
+
+ @property
+ def heap_dump_version(self):
+ return self._heap_dump_version
+
+ def ApplyModifications(self):
+ for process in self._processes:
+ process.ApplyModifications()
+ assert not self.modified, 'still modified'
+
+ def Serialize(self):
+ return self._trace_json
+
+ # Relevant trace event phases from Chromium's
+ # src/base/trace_event/common/trace_event_common.h.
+ _EVENT_PHASE_METADATA = 'M'
+ _EVENT_PHASE_MEMORY_DUMP = 'v'
+
+ def _UseHeapDumpVersion(self, version):
+ if self._heap_dump_version is None:
+ self._heap_dump_version = version
+ return version
+ elif self._heap_dump_version != version:
+ raise Exception(
+ ("Inconsistent trace file: first saw '{}' heap dump version, "
+ "then '{}'.").format(self._heap_dump_version, version))
+ else:
+ return version
class SymbolizableFile(object):
@@ -381,8 +668,12 @@ def ResolveSymbolizableFiles(processes):
"""
symfile_by_path = {}
for process in processes:
- for frame in process.stack_frames.pc_frames:
- region = process.mmaps.FindRegion(frame.pc)
+ if not process.memory_map:
awong 2017/04/20 22:28:04 Comment explaining when this can occur?
+ continue
+ for frame in process.stack_frame_map.frame_by_id.itervalues():
+ if frame.pc is None:
+ continue
+ region = process.memory_map.FindRegion(frame.pc)
if region is None:
frame.name = '<unresolved>'
continue
@@ -397,15 +688,154 @@ def ResolveSymbolizableFiles(processes):
return symfile_by_path.values()
+def FindInSystemPath(binary_name):
+ paths = os.environ['PATH'].split(os.pathsep)
+ for path in paths:
+ binary_path = os.path.join(path, binary_name)
+ if os.path.isfile(binary_path):
+ return binary_path
+ return None
+
+
+class Symbolizer(object):
+ # Encapsulates platform-specific symbolization logic.
awong 2017/04/20 22:28:04 Turn into docstring.
+ def __init__(self):
+ self.is_mac = sys.platform == 'darwin'
+ self.is_win = sys.platform == 'win32'
+ if self.is_mac:
+ self.binary = 'atos'
+ self._matcher = symbolize_trace_atos_regex.AtosRegexMatcher()
+ elif self.is_win:
+ self.binary = 'addr2line-pdb.exe'
+ else:
+ self.binary = 'addr2line'
+ self.symbolizer_path = FindInSystemPath(self.binary)
+
+ def _SymbolizeLinuxAndAndroid(self, symfile, unsymbolized_name):
+ def _SymbolizerCallback(sym_info, frames):
+ # Unwind inline chain to the top.
+ while sym_info.inlined_by:
+ sym_info = sym_info.inlined_by
+
+ symbolized_name = sym_info.name if sym_info.name else unsymbolized_name
+ for frame in frames:
+ frame.name = symbolized_name
+ frame.ext.source_path = sym_info.source_path
+
+ symbolizer = elf_symbolizer.ELFSymbolizer(symfile.symbolizable_path,
+ self.symbolizer_path,
+ _SymbolizerCallback,
+ inlines=True)
+
+ for address, frames in symfile.frames_by_address.iteritems():
+ # SymbolizeAsync() asserts that the type of address is int. We operate
+ # on longs (since they are raw pointers possibly from 64-bit processes).
+ # It's OK to cast here because we're passing relative PC, which should
+ # always fit into int.
+ symbolizer.SymbolizeAsync(int(address), frames)
+
+ symbolizer.Join()
+
+
+ def _SymbolizeMac(self, symfile):
+ chars_max = int(subprocess.check_output("getconf ARG_MAX", shell=True))
+
+ # 16 for the address, 2 for "0x", 1 for the space
+ chars_per_address = 19
+
+ load_address = (symbolize_trace_macho_reader.
+ ReadMachOTextLoadAddress(symfile.symbolizable_path))
+ assert load_address is not None
+
+ cmd_base = [self.symbolizer_path, '-arch', 'x86_64', '-l',
+ '0x%x' % load_address, '-o',
+ symfile.symbolizable_path]
+ chars_for_other_arguments = len(' '.join(cmd_base)) + 1
+
+ # The maximum number of inputs that can be processed at once is limited by
+ # ARG_MAX. This currently evalutes to ~13000 on macOS.
+ max_inputs = (chars_max - chars_for_other_arguments) / chars_per_address
+
+ all_keys = symfile.frames_by_address.keys()
+ processed_keys_count = 0
+ while len(all_keys):
+ input_count = min(len(all_keys), max_inputs)
+ keys_to_process = all_keys[0:input_count]
+ cmd = list(cmd_base)
+ cmd.extend([hex(int(x) + load_address)
+ for x in keys_to_process])
+ output_array = subprocess.check_output(cmd).split('\n')
+ for i in range(len(keys_to_process)):
+ for frame in (symfile.frames_by_address.values()
+ [i + processed_keys_count]):
+ frame.name = self._matcher.Match(output_array[i])
+ processed_keys_count += len(keys_to_process)
+ all_keys = all_keys[input_count:]
+
+ def _SymbolizeWin(self, symfile):
+ """Invoke symbolizer binary on windows and write all input in one go.
+
+ Unlike linux, on windows, symbolization talks through a shared system
+ service that handles communication with the NT symbol servers. This
+ creates an explicit serialization (and therefor lock contention) of
+ any process using the symbol API for files do not have a local PDB.
+
+ Thus, even though the windows symbolizer binary can be make command line
+ compatible with the POSIX addr2line interface, paralellizing the
+ symbolization does not yield the same performance effects. Running
+ just one symbolizer seems good enough for now. Can optimize later
+ if this becomes a bottleneck.
+ """
+ cmd = [self.symbolizer_path, '--functions', '--demangle', '--exe',
+ symfile.symbolizable_path]
+
+ proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stdin=subprocess.PIPE,
+ stderr=sys.stderr)
+ addrs = ["%x" % relative_pc for relative_pc in
+ symfile.frames_by_address.keys()]
+ (stdout_data, stderr_data) = proc.communicate('\n'.join(addrs))
+ stdout_data = stdout_data.split('\n')
+
+ # This is known to be in the same order as stderr_data.
+ for i, addr in enumerate(addrs):
+ for frame in symfile.frames_by_address[int(addr, 16)]:
+ # Output of addr2line with --functions is always 2 outputs per
+ # symbol, function name followed by source line number. Only grab
+ # the function name as line info is not always available.
+ frame.name = stdout_data[i * 2]
+
+ def Symbolize(self, symfile, unsymbolized_name):
+ if self.is_mac:
+ self._SymbolizeMac(symfile)
+ elif self.is_win:
+ self._SymbolizeWin(symfile)
+ else:
+ self._SymbolizeLinuxAndAndroid(symfile, unsymbolized_name)
+
+ def IsSymbolizableFile(self, file_path):
+ if self.is_win:
+ extension = os.path.splitext(file_path)[1].lower()
+ return extension in ['.dll', '.exe']
+ else:
+ result = subprocess.check_output(['file', '-0', file_path])
+ type_string = result[result.find('\0') + 1:]
+ return bool(re.match(r'.*(ELF|Mach-O) (32|64)-bit\b.*',
+ type_string, re.DOTALL))
+
+
def SymbolizeFiles(symfiles, symbolizer):
"""Symbolizes each file in the given list of SymbolizableFiles
and updates stack frames with symbolization results."""
+
+ if not symfiles:
+ print 'Nothing to symbolize.'
+ return
+
print 'Symbolizing...'
def _SubPrintf(message, *args):
print (' ' + message).format(*args)
- symbolized = False
for symfile in symfiles:
unsymbolized_name = '<{}>'.format(
symfile.path if symfile.path else 'unnamed')
@@ -432,9 +862,20 @@ def SymbolizeFiles(symfiles, symbolizer):
symfile.path)
symbolizer.Symbolize(symfile, unsymbolized_name)
- symbolized = True
- return symbolized
+
+# Matches Android library paths, supports both K (/data/app-lib/<>/lib.so)
+# as well as L+ (/data/app/<>/lib/<>/lib.so). Library name is available
+# via 'name' group.
+ANDROID_PATH_MATCHER = re.compile(
awong 2017/04/20 22:28:04 This is hardish to read and matching paths with re
+ r'^/data/(?:'
+ r'app/[^/]+/lib/[^/]+/|'
+ r'app-lib/[^/]+/|'
+ r'data/[^/]+/incremental-install-files/lib/'
+ r')(?P<name>.*\.so)')
+
+# Subpath of output path where unstripped libraries are stored.
+ANDROID_UNSTRIPPED_SUBPATH = 'lib.unstripped'
def HaveFilesFromAndroid(symfiles):
@@ -455,59 +896,87 @@ def RemapAndroidFiles(symfiles, output_path):
symfile.symbolizable_path = 'android://{}'.format(symfile.path)
+def Symbolize(options, trace, symbolizer):
+ symfiles = ResolveSymbolizableFiles(trace.processes)
+
+ # Android trace files don't have any indication they are from Android.
+ # So we're checking for Android-specific paths.
+ if HaveFilesFromAndroid(symfiles):
+ if not options.output_directory:
+ sys.exit('The trace file appears to be from Android. Please '
+ 'specify output directory to properly symbolize it.')
+ RemapAndroidFiles(symfiles, os.path.abspath(options.output_directory))
+
+ SymbolizeFiles(symfiles, symbolizer)
+
+
+def OpenTraceFile(file_path, mode):
+ if file_path.endswith('.gz'):
+ return gzip.open(file_path, mode + 'b')
+ else:
+ return open(file_path, mode + 't')
+
+
# Suffix used for backup files.
BACKUP_FILE_TAG = '.BACKUP'
def main():
- parser = argparse.ArgumentParser()
- parser.add_argument('file',
- help='Trace file to symbolize (.json or .json.gz)')
- parser.add_argument('--no-backup',
- dest='backup', default='true', action='store_false',
- help="Don't create {} files".format(BACKUP_FILE_TAG))
- parser.add_argument('--output-directory',
- help='The path to the build output directory, such ' +
- 'as out/Debug. Only needed for Android.')
- options = parser.parse_args()
-
- trace_file_path = options.file
- def _OpenTraceFile(mode):
- if trace_file_path.endswith('.gz'):
- return gzip.open(trace_file_path, mode + 'b')
- else:
- return open(trace_file_path, mode + 't')
+ class MultilineHelpFormatter(argparse.HelpFormatter):
+ def _split_lines(self, text, width):
+ extra_lines = []
+ if '\n' in text:
+ lines = text.splitlines()
+ text = lines[0]
+ extra_lines = lines[1:]
+ return super(MultilineHelpFormatter, self)._split_lines(text, width) + \
+ extra_lines
+
+ parser = argparse.ArgumentParser(formatter_class=MultilineHelpFormatter)
+ parser.add_argument(
+ 'file',
+ help='Trace file to symbolize (.json or .json.gz)')
+
+ parser.add_argument(
+ '--no-backup', dest='backup', default='true', action='store_false',
+ help="Don't create {} files".format(BACKUP_FILE_TAG))
+
+ parser.add_argument(
+ '--output-directory',
+ help='The path to the build output directory, such as out/Debug.')
symbolizer = Symbolizer()
if symbolizer.symbolizer_path is None:
sys.exit("Can't symbolize - no %s in PATH." % symbolizer.binary)
+ options = parser.parse_args()
+
+ trace_file_path = options.file
+
print 'Reading trace file...'
- with _OpenTraceFile('r') as trace_file:
- trace = json.load(trace_file)
+ with OpenTraceFile(trace_file_path, 'r') as trace_file:
+ trace = Trace(json.load(trace_file))
- processes = CollectProcesses(trace)
- symfiles = ResolveSymbolizableFiles(processes)
+ Symbolize(options, trace, symbolizer)
- # Android trace files don't have any indication they are from Android.
- # So we're checking for Android-specific paths.
- if HaveFilesFromAndroid(symfiles):
- if not options.output_directory:
- parser.error('The trace file appears to be from Android. Please '
- "specify output directory (e.g. 'out/Debug') to properly "
- 'symbolize it.')
- RemapAndroidFiles(symfiles, os.path.abspath(options.output_directory))
+ if trace.modified:
+ trace.ApplyModifications()
- if SymbolizeFiles(symfiles, symbolizer):
if options.backup:
backup_file_path = trace_file_path + BACKUP_FILE_TAG
- print 'Backing up trace file to {}...'.format(backup_file_path)
+ if os.path.exists(backup_file_path):
+ for i in itertools.count(1):
+ unique_file_path = '{}{}'.format(backup_file_path, i)
+ if not os.path.exists(unique_file_path):
+ backup_file_path = unique_file_path
+ break
+ print 'Backing up trace file to {}'.format(backup_file_path)
os.rename(trace_file_path, backup_file_path)
- print 'Updating trace file...'
- with _OpenTraceFile('w') as trace_file:
- json.dump(trace, trace_file)
+ print 'Updating the trace file...'
+ with OpenTraceFile(trace_file_path, 'w') as trace_file:
+ json.dump(trace.Serialize(), trace_file)
else:
- print 'No PCs symbolized - not updating trace file.'
+ print 'No modifications were made - not updating the trace file.'
if __name__ == '__main__':
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698