tracing/bin/symbolize_trace - Issue 2810523002: symbolize_trace: support new heap dump format.

Unified Diff: tracing/bin/symbolize_trace

Issue 2810523002: symbolize_trace: support new heap dump format. (Closed)

Patch Set: Remove everything except symbolization Created 3 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: tracing/bin/symbolize_trace

diff --git a/tracing/bin/symbolize_trace b/tracing/bin/symbolize_trace

index 7c6f5a4e37b04144fc5bf255bb5509857023654a..a7416d2b54e0960143898ae75a693674da69d823 100755

--- a/tracing/bin/symbolize_trace

+++ b/tracing/bin/symbolize_trace

@@ -7,6 +7,7 @@ import argparse

import bisect

import collections

import gzip

+import itertools

import json

import os

import re

@@ -26,165 +27,7 @@ import symbolize_trace_atos_regex

import symbolize_trace_macho_reader

-# Relevant trace event phases from Chromium's

-# src/base/trace_event/common/trace_event_common.h.

-TRACE_EVENT_PHASE_METADATA = 'M'

-TRACE_EVENT_PHASE_MEMORY_DUMP = 'v'

-# Matches Android library paths, supports both K (/data/app-lib/<>/lib.so)

-# as well as L+ (/data/app/<>/lib/<>/lib.so). Library name is available

-# via 'name' group.

-ANDROID_PATH_MATCHER = re.compile(

- r'^/data/(?:'

- r'app/[^/]+/lib/[^/]+/|'

- r'app-lib/[^/]+/|'

- r'data/[^/]+/incremental-install-files/lib/'

- r')(?P<name>.*\.so)')

-# Subpath of output path where unstripped libraries are stored.

-ANDROID_UNSTRIPPED_SUBPATH = 'lib.unstripped'

-def FindInSystemPath(binary_name):

- paths = os.environ['PATH'].split(os.pathsep)

- for path in paths:

- binary_path = os.path.join(path, binary_name)

- if os.path.isfile(binary_path):

- return binary_path

- return None

-class Symbolizer(object):

- # Encapsulates platform-specific symbolization logic.

- def __init__(self):

- self.is_mac = sys.platform == 'darwin'

- self.is_win = sys.platform == 'win32'

- if self.is_mac:

- self.binary = 'atos'

- self._matcher = symbolize_trace_atos_regex.AtosRegexMatcher()

- elif self.is_win:

- self.binary = 'addr2line-pdb.exe'

- else:

- self.binary = 'addr2line'

- self.symbolizer_path = FindInSystemPath(self.binary)

- def _SymbolizeLinuxAndAndroid(self, symfile, unsymbolized_name):

- def _SymbolizerCallback(sym_info, frames):

- # Unwind inline chain to the top.

- while sym_info.inlined_by:

- sym_info = sym_info.inlined_by

- symbolized_name = sym_info.name if sym_info.name else unsymbolized_name

- for frame in frames:

- frame.name = symbolized_name

- symbolizer = elf_symbolizer.ELFSymbolizer(symfile.symbolizable_path,

- self.symbolizer_path,

- _SymbolizerCallback,

- inlines=True)

- for address, frames in symfile.frames_by_address.iteritems():

- # SymbolizeAsync() asserts that the type of address is int. We operate

- # on longs (since they are raw pointers possibly from 64-bit processes).

- # It's OK to cast here because we're passing relative PC, which should

- # always fit into int.

- symbolizer.SymbolizeAsync(int(address), frames)

- symbolizer.Join()

- def _SymbolizeMac(self, symfile):

- chars_max = int(subprocess.check_output("getconf ARG_MAX", shell=True))

- # 16 for the address, 2 for "0x", 1 for the space

- chars_per_address = 19

- load_address = (symbolize_trace_macho_reader.

- ReadMachOTextLoadAddress(symfile.symbolizable_path))

- assert load_address is not None

- cmd_base = [self.symbolizer_path, '-arch', 'x86_64', '-l',

- '0x%x' % load_address, '-o',

- symfile.symbolizable_path]

- chars_for_other_arguments = len(' '.join(cmd_base)) + 1

- # The maximum number of inputs that can be processed at once is limited by

- # ARG_MAX. This currently evalutes to ~13000 on macOS.

- max_inputs = (chars_max - chars_for_other_arguments) / chars_per_address

- all_keys = symfile.frames_by_address.keys()

- processed_keys_count = 0

- while len(all_keys):

- input_count = min(len(all_keys), max_inputs)

- keys_to_process = all_keys[0:input_count]

- cmd = list(cmd_base)

- cmd.extend([hex(int(x) + load_address)

- for x in keys_to_process])

- output_array = subprocess.check_output(cmd).split('\n')

- for i in range(len(keys_to_process)):

- for frame in (symfile.frames_by_address.values()

- [i + processed_keys_count]):

- frame.name = self._matcher.Match(output_array[i])

- processed_keys_count += len(keys_to_process)

- all_keys = all_keys[input_count:]

- def _SymbolizeWin(self, symfile):

- """Invoke symbolizer binary on windows and write all input in one go.

- Unlike linux, on windows, symbolization talks through a shared system

- service that handles communication with the NT symbol servers. This

- creates an explicit serialization (and therefor lock contention) of

- any process using the symbol API for files do not have a local PDB.

- Thus, even though the windows symbolizer binary can be make command line

- compatible with the POSIX addr2line interface, paralellizing the

- symbolization does not yield the same performance effects. Running

- just one symbolizer seems good enough for now. Can optimize later

- if this becomes a bottleneck.

- """

- cmd = [self.symbolizer_path, '--functions', '--demangle', '--exe',

- symfile.symbolizable_path]

- proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stdin=subprocess.PIPE,

- stderr=sys.stderr)

- addrs = ["%x" % relative_pc for relative_pc in

- symfile.frames_by_address.keys()]

- (stdout_data, stderr_data) = proc.communicate('\n'.join(addrs))

- stdout_data = stdout_data.split('\n')

- # This is known to be in the same order as stderr_data.

- for i, addr in enumerate(addrs):

- for frame in symfile.frames_by_address[int(addr, 16)]:

- # Output of addr2line with --functions is always 2 outputs per

- # symbol, function name followed by source line number. Only grab

- # the function name as line info is not always available.

- frame.name = stdout_data[i * 2]

- def Symbolize(self, symfile, unsymbolized_name):

- if self.is_mac:

- self._SymbolizeMac(symfile)

- if self.is_win:

- self._SymbolizeWin(symfile)

- else:

- self._SymbolizeLinuxAndAndroid(symfile, unsymbolized_name)

- def IsSymbolizableFile(self, file_path):

- if self.is_win:

- extension = os.path.splitext(file_path)[1].lower()

- return extension in ['.dll', '.exe']

- else:

- result = subprocess.check_output(['file', '-0', file_path])

- type_string = result[result.find('\0') + 1:]

- return bool(re.match(r'.*(ELF|Mach-O) (32|64)-bit\b.*',

- type_string, re.DOTALL))

-class ProcessMemoryMaps(object):

+class MemoryMap(object):

"""Represents 'process_mmaps' trace file entry."""

class Region(object):

@@ -221,15 +64,13 @@ class ProcessMemoryMaps(object):

return 'Region(0x{:X} - 0x{:X}, {})'.format(

self.start_address, self.end_address, self.file_path)

- def __init__(self, process_mmaps):

- """Parses 'process_mmaps' dictionary."""

+ def __init__(self, process_mmaps_json):

regions = []

- for region_value in process_mmaps['vm_regions']:

+ for region_json in process_mmaps_json['vm_regions']:

regions.append(self.Region(

- long(region_value['sa'], 16),

- long(region_value['sz'], 16),

- region_value['mf']))

+ long(region_json['sa'], 16),

+ long(region_json['sz'], 16),

+ region_json['mf']))

regions.sort()

# Copy regions without duplicates and check for overlaps.

@@ -259,104 +100,550 @@ class ProcessMemoryMaps(object):

return None

-class StackFrames(object):

- """Represents 'stackFrames' trace file entry."""

+class UnsupportedHeapDumpVersionError(Exception):

+ def __init__(self, version):

+ message = 'Unsupported heap dump version: {}'.format(version)

+ super(UnsupportedHeapDumpVersionError, self).__init__(message)

+class StringMap(object):

awong 2017/04/20 19:37:39 These classes should have doc strings explaining t

+ def __init__(self):

+ self._modified = False

+ self._string_jsons = []

+ self._string_by_id = {}

+ self._id_by_string = {}

+ self._max_string_id = 0

+ @property

+ def modified(self):

+ return self._modified

+ @property

+ def string_by_id(self):

+ return self._string_by_id

+ def ParseMore(self, heap_dump_version, strings_json):

+ if heap_dump_version != Trace.HEAP_DUMP_VERSION_1:

+ raise UnsupportedHeapDumpVersionError(heap_dump_version)

+ self._string_jsons.append(strings_json)

+ for string_json in strings_json:

+ self._Insert(string_json['id'], string_json['string'])

+ def Clear(self):

+ if self._string_by_id:

+ self._modified = True

awong 2017/04/20 22:12:14 Is Clear() not reset? This looks *almost* like __

+ self._string_by_id = {}

+ self._id_by_string = {}

+ self._Insert(0, '[null]')

+ self._max_string_id = 0

+ def AddString(self, string):

+ string_id = self._id_by_string.get(string)

+ if string_id is None:

+ string_id = self._max_string_id + 1

+ self._Insert(string_id, string)

+ self._modified = True

+ return string_id

+ def ApplyModifications(self):

awong 2017/04/20 22:12:14 What are such modifications? Can we use a less gen

+ if not self.modified:

+ return

+ assert self._string_jsons, 'no JSON nodes'

+ # Serialize into first JSON node, and clear all others.

awong 2017/04/20 22:12:14 Can we get a "why" in this comment? As a reader,

+ for string_json in self._string_jsons:

+ string_json[:] = []

awong 2017/04/20 22:12:14 string_json.clear()?

+ string_json = self._string_jsons[0]

+ for string_id, string in self._string_by_id.iteritems():

+ string_json.append({'id': string_id, 'string': string})

+ self._modified = False

awong 2017/04/20 22:12:14 This is confusing. Shouldn't it be true?

+ def _Insert(self, string_id, string):

+ self._id_by_string[string] = string_id

+ self._string_by_id[string_id] = string

+ self._max_string_id = max(self._max_string_id, string_id)

+class TypeNameMap(object):

+ def __init__(self):

+ self._modified = False

+ self._type_name_jsons = []

+ self._name_by_id = {}

+ self._id_by_name = {}

+ self._max_type_id = 0

+ @property

+ def modified(self):

+ return self._modified

+ @property

+ def name_by_id(self):

+ return self._name_by_id

+ def ParseMore(self, heap_dump_version, type_name_json, string_map):

+ if heap_dump_version != Trace.HEAP_DUMP_VERSION_1:

+ raise UnsupportedHeapDumpVersionError(heap_dump_version)

+ self._type_name_jsons.append(type_name_json)

+ for type_json in type_name_json:

+ self._Insert(type_json['id'],

+ string_map.string_by_id[type_json['name_sid']])

+ def AddType(self, type_name):

+ type_id = self._id_by_name.get(type_name)

+ if type_id is None:

+ type_id = self._max_type_id + 1

+ self._Insert(type_id, type_name)

+ self._modified = True

+ return type_id

+ def ApplyModifications(self, string_map, force=False):

+ if not self.modified and not force:

+ return

+ assert self._type_name_jsons, 'no JSON nodes'

+ # Serialize into first JSON node, and clear all others.

+ for types_json in self._type_name_jsons:

+ types_json[:] = []

awong 2017/04/20 22:12:14 types_json.clear()?

+ types_json = self._type_name_jsons[0]

+ for type_id, type_name in self._name_by_id.iteritems():

+ types_json.append({

+ 'id': type_id,

+ 'name_sid': string_map.AddString(type_name)})

+ self._modified = False

awong 2017/04/20 22:12:14 Should this be true?

+ def _Insert(self, type_id, type_name):

+ self._id_by_name[type_name] = type_id

+ self._name_by_id[type_id] = type_name

+ self._max_type_id = max(self._max_type_id, type_id)

- class PCFrame(object):

- def __init__(self, pc, frame):

+class StackFrameMap(object):

+ class Frame(object):

+ def __init__(self, frame_id, name, parent_frame_id):

self._modified = False

- self._pc = pc

- self._frame = frame

+ self._id = frame_id

+ self._name = name

+ self._pc = self._ParsePC(name)

+ self._parent_id = parent_frame_id

+ self._ext = None

@property

def modified(self):

return self._modified

@property

+ def id(self):

+ return self._id

+ @property

def pc(self):

return self._pc

@property

def name(self):

- return self._frame['name']

+ return self._name

@name.setter

def name(self, value):

self._modified = True

- self._frame['name'] = value

+ self._name = value

+ @property

+ def parent_id(self):

+ return self._parent_id

+ _PC_TAG = 'pc:'

+ def _ParsePC(self, name):

+ if not name.startswith(self._PC_TAG):

awong 2017/04/20 22:12:14 How about invert the logic to remove the not?

+ return None

+ return long(name[len(self._PC_TAG):], 16)

+ def _ClearModified(self):

+ self._modified = False

+ def __init__(self):

awong 2017/04/20 22:12:14 Group the __init__?

+ self._modified = False

+ self._heap_dump_version = None

+ self._stack_frames_jsons = []

+ self._frame_by_id = {}

+ @property

+ def modified(self):

+ return (self._modified or

+ any(f.modified for f in self._frame_by_id.itervalues()))

+ @property

+ def frame_by_id(self):

+ return self._frame_by_id

+ def ParseMore(self, heap_dump_version, stack_frames_json, string_map):

+ frame_by_id = {}

+ if heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY:

+ if self._stack_frames_jsons:

+ raise Exception('Legacy stack frames are expected only once.')

+ for frame_id, frame_json in stack_frames_json.iteritems():

+ frame = self.Frame(frame_id,

+ frame_json['name'],

+ frame_json.get('parent'))

+ frame_by_id[frame.id] = frame

+ else:

+ if heap_dump_version != Trace.HEAP_DUMP_VERSION_1:

+ raise UnsupportedHeapDumpVersionError(heap_dump_version)

+ for frame_json in stack_frames_json:

+ frame = self.Frame(frame_json['id'],

+ string_map.string_by_id[frame_json['name_sid']],

+ frame_json.get('parent'))

+ frame_by_id[frame.id] = frame

+ self._heap_dump_version = heap_dump_version

+ self._stack_frames_jsons.append(stack_frames_json)

+ self._frame_by_id = frame_by_id

+ def ApplyModifications(self, string_map, force=False):

+ if not self.modified and not force:

+ return

+ assert self._stack_frames_jsons, 'no JSON nodes'

+ if self._heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY:

+ assert string_map is None, \

+ 'string_map should not be used with the legacy format'

+ # Serialize frames into first JSON node, and clear all others.

+ for frames_json in self._stack_frames_jsons:

+ if self._heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY:

+ frames_json.clear()

+ else:

+ frames_json[:] = []

+ frames_json = self._stack_frames_jsons[0]

+ for frame in self._frame_by_id.itervalues():

+ if self._heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY:

+ frame_json = {'name': frame.name}

+ frames_json[frame.id] = frame_json

+ else:

+ frame_json = {

+ 'id': frame.id,

+ 'name_sid': string_map.AddString(frame.name)

+ }

+ frames_json.append(frame_json)

+ if frame.parent_id is not None:

+ frame_json['parent'] = frame.parent_id

+ frame._ClearModified()

+ self._modified = False

- def __init__(self, stack_frames):

- """Constructs object using 'stackFrames' dictionary."""

- self._pc_frames = []

- for frame in stack_frames.itervalues():

- pc_frame = self._ParsePCFrame(frame)

- if pc_frame:

- self._pc_frames.append(pc_frame)

+class HeapProfile(object):

+ EntryKey = collections.namedtuple(

+ 'EntryKey',

+ ['stack_frame_id', 'type_name_id'])

+ class Entry(object):

+ def __init__(self, key, mapped_value_by_name, numeric_value_by_name):

+ self._key = key

+ self._mapped_value_by_name = mapped_value_by_name

+ self._numeric_value_by_name = numeric_value_by_name

+ @property

+ def key(self):

+ return self._key

+ @property

+ def stack_frame_id(self):

+ return self._key.stack_frame_id

+ @property

+ def type_name_id(self):

+ return self._key.type_name_id

+ def _AddValuesFrom(self, entry):

+ self._mapped_value_by_name.clear()

+ for name, value in entry._numeric_value_by_name.iteritems():

+ value += self._numeric_value_by_name.get(name, 0)

+ self._numeric_value_by_name[name] = value

+ def __init__(self, allocator_name, entries_json, mapped_entry_names):

+ self._modified = False

+ self._allocator_name = allocator_name

+ self._entries_json = entries_json

+ self._entries = []

+ for values in zip(*entries_json.itervalues()):

+ stack_frame_id = None

+ type_name_id = None

+ mapped_value_by_name = {}

+ numeric_value_by_name = {}

+ for index, name in enumerate(entries_json.iterkeys()):

+ value = values[index]

+ if name == 'nodes':

+ stack_frame_id = value

+ elif name == 'types':

+ type_name_id = value

+ elif name in mapped_entry_names:

+ mapped_value_by_name[name] = value

+ else:

+ numeric_value_by_name[name] = value

+ entry = self.Entry(self.EntryKey(stack_frame_id, type_name_id),

+ mapped_value_by_name, numeric_value_by_name)

+ self._entries.append(entry)

+ @property

+ def modified(self):

+ return self._modified

@property

- def pc_frames(self):

- return self._pc_frames

+ def allocator_name(self):

+ return self._allocator_name

+ @property

+ def entries(self):

+ return self._entries

+ def ApplyModifications(self):

+ if not self.modified:

+ return

+ mapped_value_names = set()

+ numeric_value_names = set()

+ for entry in self._entries:

+ mapped_value_names.update(entry._mapped_value_by_name.iterkeys())

+ numeric_value_names.update(entry._numeric_value_by_name.iterkeys())

+ def _AddJSONValue(name, value):

+ values = self._entries_json.get(name)

+ if values is None:

+ values = []

+ self._entries_json[name] = values

+ values.append(value)

+ self._entries_json.clear()

+ for entry in self._entries:

+ _AddJSONValue('nodes', entry.stack_frame_id)

+ _AddJSONValue('types', entry.type_name_id)

+ for name in mapped_value_names:

+ value = entry._mapped_value_by_name[name]

+ _AddJSONValue(name, value)

+ for name in numeric_value_names:

+ value = entry._numeric_value_by_name[name]

+ _AddJSONValue(name, value)

+ self._modified = False

+class MemoryDump(object):

+ def __init__(self, allocators_json, mapped_entry_names):

+ self._profiles = []

+ for allocator_name, entries_json in allocators_json.iteritems():

+ profile = HeapProfile(allocator_name, entries_json, mapped_entry_names)

+ self._profiles.append(profile)

@property

def modified(self):

- return any(f.modified for f in self._pc_frames)

+ return any(p.modified for p in self.profiles)

- _PC_TAG = 'pc:'

+ @property

+ def profiles(self):

+ return self._profiles

- @classmethod

- def _ParsePCFrame(self, frame):

- name = frame['name']

- if not name.startswith(self._PC_TAG):

- return None

- pc = long(name[len(self._PC_TAG):], 16)

- return self.PCFrame(pc, frame)

+ def ApplyModifications(self):

+ for profile in self._profiles:

+ profile.ApplyModifications()

-class Process(object):

- """Holds various bits of information about a process in a trace file."""

+class Trace(object):

- def __init__(self, pid):

- self.pid = pid

- self.name = None

- self.mmaps = None

- self.stack_frames = None

+ HEAP_DUMP_VERSION_LEGACY = 'Legacy'

+ HEAP_DUMP_VERSION_1 = 1

+ class Process(object):

+ def __init__(self, pid):

+ self._pid = pid

+ self._name = None

+ self._memory_map = None

+ self._memory_dumps = []

+ self._stack_frame_map = StackFrameMap()

+ self._type_name_map = TypeNameMap()

+ self._string_map = StringMap()

+ self._heap_dump_version = None

-def CollectProcesses(trace):

- """Parses trace dictionary and returns pid->Process map of all processes

- suitable for symbolization (which have both mmaps and stack_frames).

- """

+ @property

+ def modified(self):

+ return (self._stack_frame_map.modified or

+ self._type_name_map.modified or

+ any(d.modified for d in self._memory_dumps))

- process_map = {}

+ @property

+ def pid(self):

+ return self._pid

- # Android traces produced via 'chrome://inspect/?tracing#devices' are

- # just list of events.

- events = trace if isinstance(trace, list) else trace['traceEvents']

- for event in events:

- name = event.get('name')

- if not name:

- continue

+ @property

+ def name(self):

awong 2017/04/20 22:28:04 For these properties, having a docstring that expl

+ return self._name

+ @property

+ def unique_name(self):

+ name = self._name if self._name else 'UnnamedProcess'

+ return '{}({})'.format(name, self._pid)

+ @property

+ def memory_map(self):

+ return self._memory_map

+ @property

+ def memory_dumps(self):

awong 2017/04/20 22:28:04 Why is this one plural?

+ return self._memory_dumps

- pid = event['pid']

- process = process_map.get(pid)

- if process is None:

- process = Process(pid)

- process_map[pid] = process

+ @property

+ def stack_frame_map(self):

+ return self._stack_frame_map

+ @property

+ def type_name_map(self):

+ return self._type_name_map

+ def ApplyModifications(self):

+ if self._heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY:

+ self._stack_frame_map.ApplyModifications(None)

+ else:

+ if self._stack_frame_map.modified or self._type_name_map.modified:

+ self._string_map.Clear()

+ self._stack_frame_map.ApplyModifications(self._string_map, force=True)

+ self._type_name_map.ApplyModifications(self._string_map, force=True)

+ self._string_map.ApplyModifications()

+ for dump in self._memory_dumps:

+ dump.ApplyModifications()

+ def __init__(self, trace_json):

+ self._trace_json = trace_json

+ self._processes = []

+ self._heap_dump_version = None

+ # Misc per-process information needed only during parsing.

+ class ProcessExt(object):

+ def __init__(self, pid):

+ self.process = Trace.Process(pid)

+ self.mapped_entry_names = set()

+ self.process_mmaps_json = None

+ self.seen_strings_json = False

+ process_ext_by_pid = {}

+ # Android traces produced via 'chrome://inspect/?tracing#devices' are

+ # just list of events.

+ events = trace_json if isinstance(trace_json, list) \

+ else trace_json['traceEvents']

+ for event in events:

+ name = event.get('name')

+ if not name:

+ continue

+ pid = event['pid']

+ process_ext = process_ext_by_pid.get(pid)

+ if process_ext is None:

+ process_ext = ProcessExt(pid)

+ process_ext_by_pid[pid] = process_ext

+ process = process_ext.process

+ phase = event['ph']

+ if phase == self._EVENT_PHASE_METADATA:

+ if name == 'process_name':

+ process._name = event['args']['name']

+ elif name == 'stackFrames':

+ process._stack_frame_map.ParseMore(

+ self._UseHeapDumpVersion(self.HEAP_DUMP_VERSION_LEGACY),

+ event['args']['stackFrames'],

+ process._string_map)

+ elif phase == self._EVENT_PHASE_MEMORY_DUMP:

+ dumps = event['args']['dumps']

+ process_mmaps = dumps.get('process_mmaps')

+ if process_mmaps:

+ # We want the most recent memory map, so parsing happens later

+ # once we finished reading all events.

+ process_ext.process_mmaps_json = process_mmaps

+ heaps = dumps.get('heaps_v2')

+ if heaps:

+ version = self._UseHeapDumpVersion(heaps['version'])

+ maps = heaps.get('maps')

+ if maps:

+ process_ext.mapped_entry_names.update(maps.iterkeys())

+ types = maps.get('types')

+ stack_frames = maps.get('nodes')

+ strings = maps.get('strings')

+ if (strings is None and (types or stack_frames)

+ and not process_ext.seen_strings_json):

+ # ApplyModifications() for TypeNameMap and StackFrameMap puts

+ # everything into the first node and depends on StringMap. So

+ # we need to make sure that 'strings' node is there if any of

+ # other two nodes present.

+ strings = []

+ maps['strings'] = strings

+ if strings is not None:

+ process_ext.seen_strings_json = True

+ process._string_map.ParseMore(version, strings)

+ if types:

+ process._type_name_map.ParseMore(

+ version, types, process._string_map)

+ if stack_frames:

+ process._stack_frame_map.ParseMore(

+ version, stack_frames, process._string_map)

+ allocators = heaps.get('allocators')

+ if allocators:

+ dump = MemoryDump(allocators, process_ext.mapped_entry_names)

+ process._memory_dumps.append(dump)

+ self._processes = []

+ for pe in process_ext_by_pid.itervalues():

+ pe.process._heap_dump_version = self._heap_dump_version

+ if pe.process_mmaps_json:

+ # Now parse the most recent memory map.

+ pe.process._memory_map = MemoryMap(pe.process_mmaps_json)

+ self._processes.append(pe.process)

- phase = event['ph']

- if phase == TRACE_EVENT_PHASE_METADATA:

- if name == 'process_name':

- process.name = event['args']['name']

- elif name == 'stackFrames':

- process.stack_frames = StackFrames(event['args']['stackFrames'])

- elif phase == TRACE_EVENT_PHASE_MEMORY_DUMP:

- process_mmaps = event['args']['dumps'].get('process_mmaps')

- if process_mmaps:

- # TODO(dskiba): this parses all process_mmaps, but retains only the

- # last one. We need to parse only once (lazy parsing?).

- process.mmaps = ProcessMemoryMaps(process_mmaps)

+ @property

+ def modified(self):

+ return any(p.modified for p in self._processes)

- return [p for p in process_map.itervalues() if p.mmaps and p.stack_frames]

+ @property

+ def processes(self):

+ return self._processes

+ @property

+ def heap_dump_version(self):

+ return self._heap_dump_version

+ def ApplyModifications(self):

+ for process in self._processes:

+ process.ApplyModifications()

+ assert not self.modified, 'still modified'

+ def Serialize(self):

+ return self._trace_json

+ # Relevant trace event phases from Chromium's

+ # src/base/trace_event/common/trace_event_common.h.

+ _EVENT_PHASE_METADATA = 'M'

+ _EVENT_PHASE_MEMORY_DUMP = 'v'

+ def _UseHeapDumpVersion(self, version):

+ if self._heap_dump_version is None:

+ self._heap_dump_version = version

+ return version

+ elif self._heap_dump_version != version:

+ raise Exception(

+ ("Inconsistent trace file: first saw '{}' heap dump version, "

+ "then '{}'.").format(self._heap_dump_version, version))

+ else:

+ return version

class SymbolizableFile(object):

@@ -381,8 +668,12 @@ def ResolveSymbolizableFiles(processes):

"""

symfile_by_path = {}

for process in processes:

- for frame in process.stack_frames.pc_frames:

- region = process.mmaps.FindRegion(frame.pc)

+ if not process.memory_map:

awong 2017/04/20 22:28:04 Comment explaining when this can occur?

+ continue

+ for frame in process.stack_frame_map.frame_by_id.itervalues():

+ if frame.pc is None:

+ continue

+ region = process.memory_map.FindRegion(frame.pc)

if region is None:

frame.name = '<unresolved>'

continue

@@ -397,15 +688,154 @@ def ResolveSymbolizableFiles(processes):

return symfile_by_path.values()

+def FindInSystemPath(binary_name):

+ paths = os.environ['PATH'].split(os.pathsep)

+ for path in paths:

+ binary_path = os.path.join(path, binary_name)

+ if os.path.isfile(binary_path):

+ return binary_path

+ return None

+class Symbolizer(object):

+ # Encapsulates platform-specific symbolization logic.

awong 2017/04/20 22:28:04 Turn into docstring.

+ def __init__(self):

+ self.is_mac = sys.platform == 'darwin'

+ self.is_win = sys.platform == 'win32'

+ if self.is_mac:

+ self.binary = 'atos'

+ self._matcher = symbolize_trace_atos_regex.AtosRegexMatcher()

+ elif self.is_win:

+ self.binary = 'addr2line-pdb.exe'

+ else:

+ self.binary = 'addr2line'

+ self.symbolizer_path = FindInSystemPath(self.binary)

+ def _SymbolizeLinuxAndAndroid(self, symfile, unsymbolized_name):

+ def _SymbolizerCallback(sym_info, frames):

+ # Unwind inline chain to the top.

+ while sym_info.inlined_by:

+ sym_info = sym_info.inlined_by

+ symbolized_name = sym_info.name if sym_info.name else unsymbolized_name

+ for frame in frames:

+ frame.name = symbolized_name

+ frame.ext.source_path = sym_info.source_path

+ symbolizer = elf_symbolizer.ELFSymbolizer(symfile.symbolizable_path,

+ self.symbolizer_path,

+ _SymbolizerCallback,

+ inlines=True)

+ for address, frames in symfile.frames_by_address.iteritems():

+ # SymbolizeAsync() asserts that the type of address is int. We operate

+ # on longs (since they are raw pointers possibly from 64-bit processes).

+ # It's OK to cast here because we're passing relative PC, which should

+ # always fit into int.

+ symbolizer.SymbolizeAsync(int(address), frames)

+ symbolizer.Join()

+ def _SymbolizeMac(self, symfile):

+ chars_max = int(subprocess.check_output("getconf ARG_MAX", shell=True))

+ # 16 for the address, 2 for "0x", 1 for the space

+ chars_per_address = 19

+ load_address = (symbolize_trace_macho_reader.

+ ReadMachOTextLoadAddress(symfile.symbolizable_path))

+ assert load_address is not None

+ cmd_base = [self.symbolizer_path, '-arch', 'x86_64', '-l',

+ '0x%x' % load_address, '-o',

+ symfile.symbolizable_path]

+ chars_for_other_arguments = len(' '.join(cmd_base)) + 1

+ # The maximum number of inputs that can be processed at once is limited by

+ # ARG_MAX. This currently evalutes to ~13000 on macOS.

+ max_inputs = (chars_max - chars_for_other_arguments) / chars_per_address

+ all_keys = symfile.frames_by_address.keys()

+ processed_keys_count = 0

+ while len(all_keys):

+ input_count = min(len(all_keys), max_inputs)

+ keys_to_process = all_keys[0:input_count]

+ cmd = list(cmd_base)

+ cmd.extend([hex(int(x) + load_address)

+ for x in keys_to_process])

+ output_array = subprocess.check_output(cmd).split('\n')

+ for i in range(len(keys_to_process)):

+ for frame in (symfile.frames_by_address.values()

+ [i + processed_keys_count]):

+ frame.name = self._matcher.Match(output_array[i])

+ processed_keys_count += len(keys_to_process)

+ all_keys = all_keys[input_count:]

+ def _SymbolizeWin(self, symfile):

+ """Invoke symbolizer binary on windows and write all input in one go.

+ Unlike linux, on windows, symbolization talks through a shared system

+ service that handles communication with the NT symbol servers. This

+ creates an explicit serialization (and therefor lock contention) of

+ any process using the symbol API for files do not have a local PDB.

+ Thus, even though the windows symbolizer binary can be make command line

+ compatible with the POSIX addr2line interface, paralellizing the

+ symbolization does not yield the same performance effects. Running

+ just one symbolizer seems good enough for now. Can optimize later

+ if this becomes a bottleneck.

+ """

+ cmd = [self.symbolizer_path, '--functions', '--demangle', '--exe',

+ symfile.symbolizable_path]

+ proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stdin=subprocess.PIPE,

+ stderr=sys.stderr)

+ addrs = ["%x" % relative_pc for relative_pc in

+ symfile.frames_by_address.keys()]

+ (stdout_data, stderr_data) = proc.communicate('\n'.join(addrs))

+ stdout_data = stdout_data.split('\n')

+ # This is known to be in the same order as stderr_data.

+ for i, addr in enumerate(addrs):

+ for frame in symfile.frames_by_address[int(addr, 16)]:

+ # Output of addr2line with --functions is always 2 outputs per

+ # symbol, function name followed by source line number. Only grab

+ # the function name as line info is not always available.

+ frame.name = stdout_data[i * 2]

+ def Symbolize(self, symfile, unsymbolized_name):

+ if self.is_mac:

+ self._SymbolizeMac(symfile)

+ elif self.is_win:

+ self._SymbolizeWin(symfile)

+ else:

+ self._SymbolizeLinuxAndAndroid(symfile, unsymbolized_name)

+ def IsSymbolizableFile(self, file_path):

+ if self.is_win:

+ extension = os.path.splitext(file_path)[1].lower()

+ return extension in ['.dll', '.exe']

+ else:

+ result = subprocess.check_output(['file', '-0', file_path])

+ type_string = result[result.find('\0') + 1:]

+ return bool(re.match(r'.*(ELF|Mach-O) (32|64)-bit\b.*',

+ type_string, re.DOTALL))

def SymbolizeFiles(symfiles, symbolizer):

"""Symbolizes each file in the given list of SymbolizableFiles

and updates stack frames with symbolization results."""

+ if not symfiles:

+ print 'Nothing to symbolize.'

+ return

print 'Symbolizing...'

def _SubPrintf(message, *args):

print (' ' + message).format(*args)

- symbolized = False

for symfile in symfiles:

unsymbolized_name = '<{}>'.format(

symfile.path if symfile.path else 'unnamed')

@@ -432,9 +862,20 @@ def SymbolizeFiles(symfiles, symbolizer):

symfile.path)

symbolizer.Symbolize(symfile, unsymbolized_name)

- symbolized = True

- return symbolized

+# Matches Android library paths, supports both K (/data/app-lib/<>/lib.so)

+# as well as L+ (/data/app/<>/lib/<>/lib.so). Library name is available

+# via 'name' group.

+ANDROID_PATH_MATCHER = re.compile(

awong 2017/04/20 22:28:04 This is hardish to read and matching paths with re

+ r'^/data/(?:'

+ r'app/[^/]+/lib/[^/]+/|'

+ r'app-lib/[^/]+/|'

+ r'data/[^/]+/incremental-install-files/lib/'

+ r')(?P<name>.*\.so)')

+# Subpath of output path where unstripped libraries are stored.

+ANDROID_UNSTRIPPED_SUBPATH = 'lib.unstripped'

def HaveFilesFromAndroid(symfiles):

@@ -455,59 +896,87 @@ def RemapAndroidFiles(symfiles, output_path):

symfile.symbolizable_path = 'android://{}'.format(symfile.path)

+def Symbolize(options, trace, symbolizer):

+ symfiles = ResolveSymbolizableFiles(trace.processes)

+ # Android trace files don't have any indication they are from Android.

+ # So we're checking for Android-specific paths.

+ if HaveFilesFromAndroid(symfiles):

+ if not options.output_directory:

+ sys.exit('The trace file appears to be from Android. Please '

+ 'specify output directory to properly symbolize it.')

+ RemapAndroidFiles(symfiles, os.path.abspath(options.output_directory))

+ SymbolizeFiles(symfiles, symbolizer)

+def OpenTraceFile(file_path, mode):

+ if file_path.endswith('.gz'):

+ return gzip.open(file_path, mode + 'b')

+ else:

+ return open(file_path, mode + 't')

# Suffix used for backup files.

BACKUP_FILE_TAG = '.BACKUP'

def main():

- parser = argparse.ArgumentParser()

- parser.add_argument('file',

- help='Trace file to symbolize (.json or .json.gz)')

- parser.add_argument('--no-backup',

- dest='backup', default='true', action='store_false',

- help="Don't create {} files".format(BACKUP_FILE_TAG))

- parser.add_argument('--output-directory',

- help='The path to the build output directory, such ' +

- 'as out/Debug. Only needed for Android.')

- options = parser.parse_args()

- trace_file_path = options.file

- def _OpenTraceFile(mode):

- if trace_file_path.endswith('.gz'):

- return gzip.open(trace_file_path, mode + 'b')

- else:

- return open(trace_file_path, mode + 't')

+ class MultilineHelpFormatter(argparse.HelpFormatter):

+ def _split_lines(self, text, width):

+ extra_lines = []

+ if '\n' in text:

+ lines = text.splitlines()

+ text = lines[0]

+ extra_lines = lines[1:]

+ return super(MultilineHelpFormatter, self)._split_lines(text, width) + \

+ extra_lines

+ parser = argparse.ArgumentParser(formatter_class=MultilineHelpFormatter)

+ parser.add_argument(

+ 'file',

+ help='Trace file to symbolize (.json or .json.gz)')

+ parser.add_argument(

+ '--no-backup', dest='backup', default='true', action='store_false',

+ help="Don't create {} files".format(BACKUP_FILE_TAG))

+ parser.add_argument(

+ '--output-directory',

+ help='The path to the build output directory, such as out/Debug.')

symbolizer = Symbolizer()

if symbolizer.symbolizer_path is None:

sys.exit("Can't symbolize - no %s in PATH." % symbolizer.binary)

+ options = parser.parse_args()

+ trace_file_path = options.file

print 'Reading trace file...'

- with _OpenTraceFile('r') as trace_file:

- trace = json.load(trace_file)

+ with OpenTraceFile(trace_file_path, 'r') as trace_file:

+ trace = Trace(json.load(trace_file))

- processes = CollectProcesses(trace)

- symfiles = ResolveSymbolizableFiles(processes)

+ Symbolize(options, trace, symbolizer)

- # Android trace files don't have any indication they are from Android.

- # So we're checking for Android-specific paths.

- if HaveFilesFromAndroid(symfiles):

- if not options.output_directory:

- parser.error('The trace file appears to be from Android. Please '

- "specify output directory (e.g. 'out/Debug') to properly "

- 'symbolize it.')

- RemapAndroidFiles(symfiles, os.path.abspath(options.output_directory))

+ if trace.modified:

+ trace.ApplyModifications()

- if SymbolizeFiles(symfiles, symbolizer):

if options.backup:

backup_file_path = trace_file_path + BACKUP_FILE_TAG

- print 'Backing up trace file to {}...'.format(backup_file_path)

+ if os.path.exists(backup_file_path):

+ for i in itertools.count(1):

+ unique_file_path = '{}{}'.format(backup_file_path, i)

+ if not os.path.exists(unique_file_path):

+ backup_file_path = unique_file_path

+ break

+ print 'Backing up trace file to {}'.format(backup_file_path)

os.rename(trace_file_path, backup_file_path)

- print 'Updating trace file...'

- with _OpenTraceFile('w') as trace_file:

- json.dump(trace, trace_file)

+ print 'Updating the trace file...'

+ with OpenTraceFile(trace_file_path, 'w') as trace_file:

+ json.dump(trace.Serialize(), trace_file)

else:

- print 'No PCs symbolized - not updating trace file.'

+ print 'No modifications were made - not updating the trace file.'

if __name__ == '__main__':

« no previous file with comments | « no previous file | no next file » | no next file with comments »