Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 # Copyright 2016 The Chromium Authors. All rights reserved. | 2 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
| 5 | 5 |
| 6 import argparse | 6 import argparse |
| 7 import bisect | 7 import bisect |
| 8 import collections | 8 import collections |
| 9 import gzip | 9 import gzip |
| 10 import itertools | |
| 10 import json | 11 import json |
| 11 import os | 12 import os |
| 12 import re | 13 import re |
| 13 import subprocess | 14 import subprocess |
| 14 import sys | 15 import sys |
| 15 | 16 |
| 16 _SYMBOLS_PATH = os.path.abspath(os.path.join( | 17 _SYMBOLS_PATH = os.path.abspath(os.path.join( |
| 17 os.path.dirname(os.path.realpath(__file__)), | 18 os.path.dirname(os.path.realpath(__file__)), |
| 18 '..', | 19 '..', |
| 19 'third_party', | 20 'third_party', |
| 20 'symbols')) | 21 'symbols')) |
| 21 sys.path.append(_SYMBOLS_PATH) | 22 sys.path.append(_SYMBOLS_PATH) |
| 22 # pylint: disable=import-error | 23 # pylint: disable=import-error |
| 23 import symbols.elf_symbolizer as elf_symbolizer | 24 import symbols.elf_symbolizer as elf_symbolizer |
| 24 | 25 |
| 25 import symbolize_trace_atos_regex | 26 import symbolize_trace_atos_regex |
| 26 import symbolize_trace_macho_reader | 27 import symbolize_trace_macho_reader |
| 27 | 28 |
| 28 | 29 |
| 29 # Relevant trace event phases from Chromium's | 30 class MemoryMap(object): |
| 30 # src/base/trace_event/common/trace_event_common.h. | |
| 31 TRACE_EVENT_PHASE_METADATA = 'M' | |
| 32 TRACE_EVENT_PHASE_MEMORY_DUMP = 'v' | |
| 33 | |
| 34 | |
| 35 # Matches Android library paths, supports both K (/data/app-lib/<>/lib.so) | |
| 36 # as well as L+ (/data/app/<>/lib/<>/lib.so). Library name is available | |
| 37 # via 'name' group. | |
| 38 ANDROID_PATH_MATCHER = re.compile( | |
| 39 r'^/data/(?:' | |
| 40 r'app/[^/]+/lib/[^/]+/|' | |
| 41 r'app-lib/[^/]+/|' | |
| 42 r'data/[^/]+/incremental-install-files/lib/' | |
| 43 r')(?P<name>.*\.so)') | |
| 44 | |
| 45 # Subpath of output path where unstripped libraries are stored. | |
| 46 ANDROID_UNSTRIPPED_SUBPATH = 'lib.unstripped' | |
| 47 | |
| 48 | |
| 49 def FindInSystemPath(binary_name): | |
| 50 paths = os.environ['PATH'].split(os.pathsep) | |
| 51 for path in paths: | |
| 52 binary_path = os.path.join(path, binary_name) | |
| 53 if os.path.isfile(binary_path): | |
| 54 return binary_path | |
| 55 return None | |
| 56 | |
| 57 | |
| 58 class Symbolizer(object): | |
| 59 # Encapsulates platform-specific symbolization logic. | |
| 60 def __init__(self): | |
| 61 self.is_mac = sys.platform == 'darwin' | |
| 62 self.is_win = sys.platform == 'win32' | |
| 63 if self.is_mac: | |
| 64 self.binary = 'atos' | |
| 65 self._matcher = symbolize_trace_atos_regex.AtosRegexMatcher() | |
| 66 elif self.is_win: | |
| 67 self.binary = 'addr2line-pdb.exe' | |
| 68 else: | |
| 69 self.binary = 'addr2line' | |
| 70 self.symbolizer_path = FindInSystemPath(self.binary) | |
| 71 | |
| 72 def _SymbolizeLinuxAndAndroid(self, symfile, unsymbolized_name): | |
| 73 def _SymbolizerCallback(sym_info, frames): | |
| 74 # Unwind inline chain to the top. | |
| 75 while sym_info.inlined_by: | |
| 76 sym_info = sym_info.inlined_by | |
| 77 | |
| 78 symbolized_name = sym_info.name if sym_info.name else unsymbolized_name | |
| 79 for frame in frames: | |
| 80 frame.name = symbolized_name | |
| 81 | |
| 82 symbolizer = elf_symbolizer.ELFSymbolizer(symfile.symbolizable_path, | |
| 83 self.symbolizer_path, | |
| 84 _SymbolizerCallback, | |
| 85 inlines=True) | |
| 86 | |
| 87 for address, frames in symfile.frames_by_address.iteritems(): | |
| 88 # SymbolizeAsync() asserts that the type of address is int. We operate | |
| 89 # on longs (since they are raw pointers possibly from 64-bit processes). | |
| 90 # It's OK to cast here because we're passing relative PC, which should | |
| 91 # always fit into int. | |
| 92 symbolizer.SymbolizeAsync(int(address), frames) | |
| 93 | |
| 94 symbolizer.Join() | |
| 95 | |
| 96 | |
| 97 def _SymbolizeMac(self, symfile): | |
| 98 chars_max = int(subprocess.check_output("getconf ARG_MAX", shell=True)) | |
| 99 | |
| 100 # 16 for the address, 2 for "0x", 1 for the space | |
| 101 chars_per_address = 19 | |
| 102 | |
| 103 load_address = (symbolize_trace_macho_reader. | |
| 104 ReadMachOTextLoadAddress(symfile.symbolizable_path)) | |
| 105 assert load_address is not None | |
| 106 | |
| 107 cmd_base = [self.symbolizer_path, '-arch', 'x86_64', '-l', | |
| 108 '0x%x' % load_address, '-o', | |
| 109 symfile.symbolizable_path] | |
| 110 chars_for_other_arguments = len(' '.join(cmd_base)) + 1 | |
| 111 | |
| 112 # The maximum number of inputs that can be processed at once is limited by | |
| 113 # ARG_MAX. This currently evalutes to ~13000 on macOS. | |
| 114 max_inputs = (chars_max - chars_for_other_arguments) / chars_per_address | |
| 115 | |
| 116 all_keys = symfile.frames_by_address.keys() | |
| 117 processed_keys_count = 0 | |
| 118 while len(all_keys): | |
| 119 input_count = min(len(all_keys), max_inputs) | |
| 120 keys_to_process = all_keys[0:input_count] | |
| 121 | |
| 122 cmd = list(cmd_base) | |
| 123 cmd.extend([hex(int(x) + load_address) | |
| 124 for x in keys_to_process]) | |
| 125 output_array = subprocess.check_output(cmd).split('\n') | |
| 126 for i in range(len(keys_to_process)): | |
| 127 for frame in (symfile.frames_by_address.values() | |
| 128 [i + processed_keys_count]): | |
| 129 frame.name = self._matcher.Match(output_array[i]) | |
| 130 processed_keys_count += len(keys_to_process) | |
| 131 all_keys = all_keys[input_count:] | |
| 132 | |
| 133 | |
| 134 def _SymbolizeWin(self, symfile): | |
| 135 """Invoke symbolizer binary on windows and write all input in one go. | |
| 136 | |
| 137 Unlike linux, on windows, symbolization talks through a shared system | |
| 138 service that handles communication with the NT symbol servers. This | |
| 139 creates an explicit serialization (and therefor lock contention) of | |
| 140 any process using the symbol API for files do not have a local PDB. | |
| 141 | |
| 142 Thus, even though the windows symbolizer binary can be make command line | |
| 143 compatible with the POSIX addr2line interface, paralellizing the | |
| 144 symbolization does not yield the same performance effects. Running | |
| 145 just one symbolizer seems good enough for now. Can optimize later | |
| 146 if this becomes a bottleneck. | |
| 147 """ | |
| 148 cmd = [self.symbolizer_path, '--functions', '--demangle', '--exe', | |
| 149 symfile.symbolizable_path] | |
| 150 | |
| 151 proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stdin=subprocess.PIPE, | |
| 152 stderr=sys.stderr) | |
| 153 addrs = ["%x" % relative_pc for relative_pc in | |
| 154 symfile.frames_by_address.keys()] | |
| 155 (stdout_data, stderr_data) = proc.communicate('\n'.join(addrs)) | |
| 156 stdout_data = stdout_data.split('\n') | |
| 157 | |
| 158 # This is known to be in the same order as stderr_data. | |
| 159 for i, addr in enumerate(addrs): | |
| 160 for frame in symfile.frames_by_address[int(addr, 16)]: | |
| 161 # Output of addr2line with --functions is always 2 outputs per | |
| 162 # symbol, function name followed by source line number. Only grab | |
| 163 # the function name as line info is not always available. | |
| 164 frame.name = stdout_data[i * 2] | |
| 165 | |
| 166 | |
| 167 def Symbolize(self, symfile, unsymbolized_name): | |
| 168 if self.is_mac: | |
| 169 self._SymbolizeMac(symfile) | |
| 170 if self.is_win: | |
| 171 self._SymbolizeWin(symfile) | |
| 172 else: | |
| 173 self._SymbolizeLinuxAndAndroid(symfile, unsymbolized_name) | |
| 174 | |
| 175 | |
| 176 def IsSymbolizableFile(self, file_path): | |
| 177 if self.is_win: | |
| 178 extension = os.path.splitext(file_path)[1].lower() | |
| 179 return extension in ['.dll', '.exe'] | |
| 180 else: | |
| 181 result = subprocess.check_output(['file', '-0', file_path]) | |
| 182 type_string = result[result.find('\0') + 1:] | |
| 183 return bool(re.match(r'.*(ELF|Mach-O) (32|64)-bit\b.*', | |
| 184 type_string, re.DOTALL)) | |
| 185 | |
| 186 | |
| 187 class ProcessMemoryMaps(object): | |
| 188 """Represents 'process_mmaps' trace file entry.""" | 31 """Represents 'process_mmaps' trace file entry.""" |
| 189 | 32 |
| 190 class Region(object): | 33 class Region(object): |
| 191 def __init__(self, start_address, size, file_path): | 34 def __init__(self, start_address, size, file_path): |
| 192 self._start_address = start_address | 35 self._start_address = start_address |
| 193 self._size = size | 36 self._size = size |
| 194 self._file_path = file_path | 37 self._file_path = file_path |
| 195 | 38 |
| 196 @property | 39 @property |
| 197 def start_address(self): | 40 def start_address(self): |
| (...skipping 16 matching lines...) Expand all Loading... | |
| 214 return long(self._start_address).__cmp__(long(other._start_address)) | 57 return long(self._start_address).__cmp__(long(other._start_address)) |
| 215 elif isinstance(other, (long, int)): | 58 elif isinstance(other, (long, int)): |
| 216 return long(self._start_address).__cmp__(long(other)) | 59 return long(self._start_address).__cmp__(long(other)) |
| 217 else: | 60 else: |
| 218 raise Exception('Cannot compare with %s' % type(other)) | 61 raise Exception('Cannot compare with %s' % type(other)) |
| 219 | 62 |
| 220 def __repr__(self): | 63 def __repr__(self): |
| 221 return 'Region(0x{:X} - 0x{:X}, {})'.format( | 64 return 'Region(0x{:X} - 0x{:X}, {})'.format( |
| 222 self.start_address, self.end_address, self.file_path) | 65 self.start_address, self.end_address, self.file_path) |
| 223 | 66 |
| 224 def __init__(self, process_mmaps): | 67 def __init__(self, process_mmaps_json): |
| 225 """Parses 'process_mmaps' dictionary.""" | |
| 226 | |
| 227 regions = [] | 68 regions = [] |
| 228 for region_value in process_mmaps['vm_regions']: | 69 for region_json in process_mmaps_json['vm_regions']: |
| 229 regions.append(self.Region( | 70 regions.append(self.Region( |
| 230 long(region_value['sa'], 16), | 71 long(region_json['sa'], 16), |
| 231 long(region_value['sz'], 16), | 72 long(region_json['sz'], 16), |
| 232 region_value['mf'])) | 73 region_json['mf'])) |
| 233 regions.sort() | 74 regions.sort() |
| 234 | 75 |
| 235 # Copy regions without duplicates and check for overlaps. | 76 # Copy regions without duplicates and check for overlaps. |
| 236 self._regions = [] | 77 self._regions = [] |
| 237 previous_region = None | 78 previous_region = None |
| 238 for region in regions: | 79 for region in regions: |
| 239 if previous_region is not None: | 80 if previous_region is not None: |
| 240 if region == previous_region: | 81 if region == previous_region: |
| 241 continue | 82 continue |
| 242 assert region.start_address >= previous_region.end_address, \ | 83 assert region.start_address >= previous_region.end_address, \ |
| 243 'Regions {} and {} overlap.'.format(previous_region, region) | 84 'Regions {} and {} overlap.'.format(previous_region, region) |
| 244 previous_region = region | 85 previous_region = region |
| 245 self._regions.append(region) | 86 self._regions.append(region) |
| 246 | 87 |
| 247 @property | 88 @property |
| 248 def regions(self): | 89 def regions(self): |
| 249 return self._regions | 90 return self._regions |
| 250 | 91 |
| 251 def FindRegion(self, address): | 92 def FindRegion(self, address): |
| 252 """Finds region containing |address|. Returns None if none found.""" | 93 """Finds region containing |address|. Returns None if none found.""" |
| 253 | 94 |
| 254 region_index = bisect.bisect_right(self._regions, address) - 1 | 95 region_index = bisect.bisect_right(self._regions, address) - 1 |
| 255 if region_index >= 0: | 96 if region_index >= 0: |
| 256 region = self._regions[region_index] | 97 region = self._regions[region_index] |
| 257 if address >= region.start_address and address < region.end_address: | 98 if address >= region.start_address and address < region.end_address: |
| 258 return region | 99 return region |
| 259 return None | 100 return None |
| 260 | 101 |
| 261 | 102 |
| 262 class StackFrames(object): | 103 class UnsupportedHeapDumpVersionError(Exception): |
| 263 """Represents 'stackFrames' trace file entry.""" | 104 def __init__(self, version): |
| 264 | 105 message = 'Unsupported heap dump version: {}'.format(version) |
| 265 class PCFrame(object): | 106 super(UnsupportedHeapDumpVersionError, self).__init__(message) |
| 266 def __init__(self, pc, frame): | 107 |
| 108 | |
| 109 class StringMap(object): | |
|
awong
2017/04/20 19:37:39
These classes should have doc strings explaining t
| |
| 110 def __init__(self): | |
| 111 self._modified = False | |
| 112 self._string_jsons = [] | |
| 113 self._string_by_id = {} | |
| 114 self._id_by_string = {} | |
| 115 self._max_string_id = 0 | |
| 116 | |
| 117 @property | |
| 118 def modified(self): | |
| 119 return self._modified | |
| 120 | |
| 121 @property | |
| 122 def string_by_id(self): | |
| 123 return self._string_by_id | |
| 124 | |
| 125 def ParseMore(self, heap_dump_version, strings_json): | |
| 126 if heap_dump_version != Trace.HEAP_DUMP_VERSION_1: | |
| 127 raise UnsupportedHeapDumpVersionError(heap_dump_version) | |
| 128 | |
| 129 self._string_jsons.append(strings_json) | |
| 130 for string_json in strings_json: | |
| 131 self._Insert(string_json['id'], string_json['string']) | |
| 132 | |
| 133 def Clear(self): | |
| 134 if self._string_by_id: | |
| 135 self._modified = True | |
|
awong
2017/04/20 22:12:14
Is Clear() not reset?
This looks *almost* like __
| |
| 136 self._string_by_id = {} | |
| 137 self._id_by_string = {} | |
| 138 self._Insert(0, '[null]') | |
| 139 self._max_string_id = 0 | |
| 140 | |
| 141 def AddString(self, string): | |
| 142 string_id = self._id_by_string.get(string) | |
| 143 if string_id is None: | |
| 144 string_id = self._max_string_id + 1 | |
| 145 self._Insert(string_id, string) | |
| 146 self._modified = True | |
| 147 return string_id | |
| 148 | |
| 149 def ApplyModifications(self): | |
|
awong
2017/04/20 22:12:14
What are such modifications? Can we use a less gen
| |
| 150 if not self.modified: | |
| 151 return | |
| 152 | |
| 153 assert self._string_jsons, 'no JSON nodes' | |
| 154 | |
| 155 # Serialize into first JSON node, and clear all others. | |
|
awong
2017/04/20 22:12:14
Can we get a "why" in this comment?
As a reader,
| |
| 156 | |
| 157 for string_json in self._string_jsons: | |
| 158 string_json[:] = [] | |
|
awong
2017/04/20 22:12:14
string_json.clear()?
| |
| 159 string_json = self._string_jsons[0] | |
| 160 for string_id, string in self._string_by_id.iteritems(): | |
| 161 string_json.append({'id': string_id, 'string': string}) | |
| 162 | |
| 163 self._modified = False | |
|
awong
2017/04/20 22:12:14
This is confusing. Shouldn't it be true?
| |
| 164 | |
| 165 def _Insert(self, string_id, string): | |
| 166 self._id_by_string[string] = string_id | |
| 167 self._string_by_id[string_id] = string | |
| 168 self._max_string_id = max(self._max_string_id, string_id) | |
| 169 | |
| 170 | |
| 171 class TypeNameMap(object): | |
| 172 def __init__(self): | |
| 173 self._modified = False | |
| 174 self._type_name_jsons = [] | |
| 175 self._name_by_id = {} | |
| 176 self._id_by_name = {} | |
| 177 self._max_type_id = 0 | |
| 178 | |
| 179 @property | |
| 180 def modified(self): | |
| 181 return self._modified | |
| 182 | |
| 183 @property | |
| 184 def name_by_id(self): | |
| 185 return self._name_by_id | |
| 186 | |
| 187 def ParseMore(self, heap_dump_version, type_name_json, string_map): | |
| 188 if heap_dump_version != Trace.HEAP_DUMP_VERSION_1: | |
| 189 raise UnsupportedHeapDumpVersionError(heap_dump_version) | |
| 190 | |
| 191 self._type_name_jsons.append(type_name_json) | |
| 192 for type_json in type_name_json: | |
| 193 self._Insert(type_json['id'], | |
| 194 string_map.string_by_id[type_json['name_sid']]) | |
| 195 | |
| 196 def AddType(self, type_name): | |
| 197 type_id = self._id_by_name.get(type_name) | |
| 198 if type_id is None: | |
| 199 type_id = self._max_type_id + 1 | |
| 200 self._Insert(type_id, type_name) | |
| 201 self._modified = True | |
| 202 return type_id | |
| 203 | |
| 204 def ApplyModifications(self, string_map, force=False): | |
| 205 if not self.modified and not force: | |
| 206 return | |
| 207 | |
| 208 assert self._type_name_jsons, 'no JSON nodes' | |
| 209 | |
| 210 # Serialize into first JSON node, and clear all others. | |
| 211 | |
| 212 for types_json in self._type_name_jsons: | |
| 213 types_json[:] = [] | |
|
awong
2017/04/20 22:12:14
types_json.clear()?
| |
| 214 types_json = self._type_name_jsons[0] | |
| 215 for type_id, type_name in self._name_by_id.iteritems(): | |
| 216 types_json.append({ | |
| 217 'id': type_id, | |
| 218 'name_sid': string_map.AddString(type_name)}) | |
| 219 | |
| 220 self._modified = False | |
|
awong
2017/04/20 22:12:14
Should this be true?
| |
| 221 | |
| 222 def _Insert(self, type_id, type_name): | |
| 223 self._id_by_name[type_name] = type_id | |
| 224 self._name_by_id[type_id] = type_name | |
| 225 self._max_type_id = max(self._max_type_id, type_id) | |
| 226 | |
| 227 | |
| 228 class StackFrameMap(object): | |
| 229 class Frame(object): | |
| 230 def __init__(self, frame_id, name, parent_frame_id): | |
| 267 self._modified = False | 231 self._modified = False |
| 268 self._pc = pc | 232 self._id = frame_id |
| 269 self._frame = frame | 233 self._name = name |
| 234 self._pc = self._ParsePC(name) | |
| 235 self._parent_id = parent_frame_id | |
| 236 self._ext = None | |
| 270 | 237 |
| 271 @property | 238 @property |
| 272 def modified(self): | 239 def modified(self): |
| 273 return self._modified | 240 return self._modified |
| 274 | 241 |
| 275 @property | 242 @property |
| 243 def id(self): | |
| 244 return self._id | |
| 245 | |
| 246 @property | |
| 276 def pc(self): | 247 def pc(self): |
| 277 return self._pc | 248 return self._pc |
| 278 | 249 |
| 279 @property | 250 @property |
| 280 def name(self): | 251 def name(self): |
| 281 return self._frame['name'] | 252 return self._name |
| 282 | 253 |
| 283 @name.setter | 254 @name.setter |
| 284 def name(self, value): | 255 def name(self, value): |
| 285 self._modified = True | 256 self._modified = True |
| 286 self._frame['name'] = value | 257 self._name = value |
| 287 | 258 |
| 288 def __init__(self, stack_frames): | 259 @property |
| 289 """Constructs object using 'stackFrames' dictionary.""" | 260 def parent_id(self): |
| 290 self._pc_frames = [] | 261 return self._parent_id |
| 291 for frame in stack_frames.itervalues(): | 262 |
| 292 pc_frame = self._ParsePCFrame(frame) | 263 _PC_TAG = 'pc:' |
| 293 if pc_frame: | 264 |
| 294 self._pc_frames.append(pc_frame) | 265 def _ParsePC(self, name): |
| 295 | 266 if not name.startswith(self._PC_TAG): |
|
awong
2017/04/20 22:12:14
How about invert the logic to remove the not?
| |
| 296 @property | 267 return None |
| 297 def pc_frames(self): | 268 return long(name[len(self._PC_TAG):], 16) |
| 298 return self._pc_frames | 269 |
| 270 def _ClearModified(self): | |
| 271 self._modified = False | |
| 272 | |
| 273 def __init__(self): | |
|
awong
2017/04/20 22:12:14
Group the __init__?
| |
| 274 self._modified = False | |
| 275 self._heap_dump_version = None | |
| 276 self._stack_frames_jsons = [] | |
| 277 self._frame_by_id = {} | |
| 299 | 278 |
| 300 @property | 279 @property |
| 301 def modified(self): | 280 def modified(self): |
| 302 return any(f.modified for f in self._pc_frames) | 281 return (self._modified or |
| 303 | 282 any(f.modified for f in self._frame_by_id.itervalues())) |
| 304 _PC_TAG = 'pc:' | 283 |
| 305 | 284 @property |
| 306 @classmethod | 285 def frame_by_id(self): |
| 307 def _ParsePCFrame(self, frame): | 286 return self._frame_by_id |
| 308 name = frame['name'] | 287 |
| 309 if not name.startswith(self._PC_TAG): | 288 def ParseMore(self, heap_dump_version, stack_frames_json, string_map): |
| 310 return None | 289 frame_by_id = {} |
| 311 pc = long(name[len(self._PC_TAG):], 16) | 290 if heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY: |
| 312 return self.PCFrame(pc, frame) | 291 if self._stack_frames_jsons: |
| 313 | 292 raise Exception('Legacy stack frames are expected only once.') |
| 314 | 293 for frame_id, frame_json in stack_frames_json.iteritems(): |
| 315 class Process(object): | 294 frame = self.Frame(frame_id, |
| 316 """Holds various bits of information about a process in a trace file.""" | 295 frame_json['name'], |
| 317 | 296 frame_json.get('parent')) |
| 318 def __init__(self, pid): | 297 frame_by_id[frame.id] = frame |
| 319 self.pid = pid | 298 else: |
| 320 self.name = None | 299 if heap_dump_version != Trace.HEAP_DUMP_VERSION_1: |
| 321 self.mmaps = None | 300 raise UnsupportedHeapDumpVersionError(heap_dump_version) |
| 322 self.stack_frames = None | 301 for frame_json in stack_frames_json: |
| 323 | 302 frame = self.Frame(frame_json['id'], |
| 324 | 303 string_map.string_by_id[frame_json['name_sid']], |
| 325 def CollectProcesses(trace): | 304 frame_json.get('parent')) |
| 326 """Parses trace dictionary and returns pid->Process map of all processes | 305 frame_by_id[frame.id] = frame |
| 327 suitable for symbolization (which have both mmaps and stack_frames). | 306 |
| 328 """ | 307 self._heap_dump_version = heap_dump_version |
| 329 | 308 self._stack_frames_jsons.append(stack_frames_json) |
| 330 process_map = {} | 309 |
| 331 | 310 self._frame_by_id = frame_by_id |
| 332 # Android traces produced via 'chrome://inspect/?tracing#devices' are | 311 |
| 333 # just list of events. | 312 def ApplyModifications(self, string_map, force=False): |
| 334 events = trace if isinstance(trace, list) else trace['traceEvents'] | 313 if not self.modified and not force: |
| 335 for event in events: | 314 return |
| 336 name = event.get('name') | 315 |
| 337 if not name: | 316 assert self._stack_frames_jsons, 'no JSON nodes' |
| 338 continue | 317 if self._heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY: |
| 339 | 318 assert string_map is None, \ |
| 340 pid = event['pid'] | 319 'string_map should not be used with the legacy format' |
| 341 process = process_map.get(pid) | 320 |
| 342 if process is None: | 321 # Serialize frames into first JSON node, and clear all others. |
| 343 process = Process(pid) | 322 |
| 344 process_map[pid] = process | 323 for frames_json in self._stack_frames_jsons: |
| 345 | 324 if self._heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY: |
| 346 phase = event['ph'] | 325 frames_json.clear() |
| 347 if phase == TRACE_EVENT_PHASE_METADATA: | 326 else: |
| 348 if name == 'process_name': | 327 frames_json[:] = [] |
| 349 process.name = event['args']['name'] | 328 |
| 350 elif name == 'stackFrames': | 329 frames_json = self._stack_frames_jsons[0] |
| 351 process.stack_frames = StackFrames(event['args']['stackFrames']) | 330 for frame in self._frame_by_id.itervalues(): |
| 352 elif phase == TRACE_EVENT_PHASE_MEMORY_DUMP: | 331 if self._heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY: |
| 353 process_mmaps = event['args']['dumps'].get('process_mmaps') | 332 frame_json = {'name': frame.name} |
| 354 if process_mmaps: | 333 frames_json[frame.id] = frame_json |
| 355 # TODO(dskiba): this parses all process_mmaps, but retains only the | 334 else: |
| 356 # last one. We need to parse only once (lazy parsing?). | 335 frame_json = { |
| 357 process.mmaps = ProcessMemoryMaps(process_mmaps) | 336 'id': frame.id, |
| 358 | 337 'name_sid': string_map.AddString(frame.name) |
| 359 return [p for p in process_map.itervalues() if p.mmaps and p.stack_frames] | 338 } |
| 339 frames_json.append(frame_json) | |
| 340 if frame.parent_id is not None: | |
| 341 frame_json['parent'] = frame.parent_id | |
| 342 frame._ClearModified() | |
| 343 | |
| 344 self._modified = False | |
| 345 | |
| 346 | |
| 347 class HeapProfile(object): | |
| 348 EntryKey = collections.namedtuple( | |
| 349 'EntryKey', | |
| 350 ['stack_frame_id', 'type_name_id']) | |
| 351 | |
| 352 class Entry(object): | |
| 353 def __init__(self, key, mapped_value_by_name, numeric_value_by_name): | |
| 354 self._key = key | |
| 355 self._mapped_value_by_name = mapped_value_by_name | |
| 356 self._numeric_value_by_name = numeric_value_by_name | |
| 357 | |
| 358 @property | |
| 359 def key(self): | |
| 360 return self._key | |
| 361 | |
| 362 @property | |
| 363 def stack_frame_id(self): | |
| 364 return self._key.stack_frame_id | |
| 365 | |
| 366 @property | |
| 367 def type_name_id(self): | |
| 368 return self._key.type_name_id | |
| 369 | |
| 370 def _AddValuesFrom(self, entry): | |
| 371 self._mapped_value_by_name.clear() | |
| 372 for name, value in entry._numeric_value_by_name.iteritems(): | |
| 373 value += self._numeric_value_by_name.get(name, 0) | |
| 374 self._numeric_value_by_name[name] = value | |
| 375 | |
| 376 def __init__(self, allocator_name, entries_json, mapped_entry_names): | |
| 377 self._modified = False | |
| 378 self._allocator_name = allocator_name | |
| 379 self._entries_json = entries_json | |
| 380 self._entries = [] | |
| 381 for values in zip(*entries_json.itervalues()): | |
| 382 stack_frame_id = None | |
| 383 type_name_id = None | |
| 384 mapped_value_by_name = {} | |
| 385 numeric_value_by_name = {} | |
| 386 for index, name in enumerate(entries_json.iterkeys()): | |
| 387 value = values[index] | |
| 388 if name == 'nodes': | |
| 389 stack_frame_id = value | |
| 390 elif name == 'types': | |
| 391 type_name_id = value | |
| 392 elif name in mapped_entry_names: | |
| 393 mapped_value_by_name[name] = value | |
| 394 else: | |
| 395 numeric_value_by_name[name] = value | |
| 396 entry = self.Entry(self.EntryKey(stack_frame_id, type_name_id), | |
| 397 mapped_value_by_name, numeric_value_by_name) | |
| 398 self._entries.append(entry) | |
| 399 | |
| 400 @property | |
| 401 def modified(self): | |
| 402 return self._modified | |
| 403 | |
| 404 @property | |
| 405 def allocator_name(self): | |
| 406 return self._allocator_name | |
| 407 | |
| 408 @property | |
| 409 def entries(self): | |
| 410 return self._entries | |
| 411 | |
| 412 def ApplyModifications(self): | |
| 413 if not self.modified: | |
| 414 return | |
| 415 | |
| 416 mapped_value_names = set() | |
| 417 numeric_value_names = set() | |
| 418 for entry in self._entries: | |
| 419 mapped_value_names.update(entry._mapped_value_by_name.iterkeys()) | |
| 420 numeric_value_names.update(entry._numeric_value_by_name.iterkeys()) | |
| 421 | |
| 422 def _AddJSONValue(name, value): | |
| 423 values = self._entries_json.get(name) | |
| 424 if values is None: | |
| 425 values = [] | |
| 426 self._entries_json[name] = values | |
| 427 values.append(value) | |
| 428 | |
| 429 self._entries_json.clear() | |
| 430 for entry in self._entries: | |
| 431 _AddJSONValue('nodes', entry.stack_frame_id) | |
| 432 _AddJSONValue('types', entry.type_name_id) | |
| 433 for name in mapped_value_names: | |
| 434 value = entry._mapped_value_by_name[name] | |
| 435 _AddJSONValue(name, value) | |
| 436 for name in numeric_value_names: | |
| 437 value = entry._numeric_value_by_name[name] | |
| 438 _AddJSONValue(name, value) | |
| 439 | |
| 440 self._modified = False | |
| 441 | |
| 442 | |
| 443 class MemoryDump(object): | |
| 444 def __init__(self, allocators_json, mapped_entry_names): | |
| 445 self._profiles = [] | |
| 446 for allocator_name, entries_json in allocators_json.iteritems(): | |
| 447 profile = HeapProfile(allocator_name, entries_json, mapped_entry_names) | |
| 448 self._profiles.append(profile) | |
| 449 | |
| 450 @property | |
| 451 def modified(self): | |
| 452 return any(p.modified for p in self.profiles) | |
| 453 | |
| 454 @property | |
| 455 def profiles(self): | |
| 456 return self._profiles | |
| 457 | |
| 458 def ApplyModifications(self): | |
| 459 for profile in self._profiles: | |
| 460 profile.ApplyModifications() | |
| 461 | |
| 462 | |
| 463 class Trace(object): | |
| 464 | |
| 465 HEAP_DUMP_VERSION_LEGACY = 'Legacy' | |
| 466 HEAP_DUMP_VERSION_1 = 1 | |
| 467 | |
| 468 class Process(object): | |
| 469 def __init__(self, pid): | |
| 470 self._pid = pid | |
| 471 self._name = None | |
| 472 self._memory_map = None | |
| 473 self._memory_dumps = [] | |
| 474 self._stack_frame_map = StackFrameMap() | |
| 475 self._type_name_map = TypeNameMap() | |
| 476 self._string_map = StringMap() | |
| 477 self._heap_dump_version = None | |
| 478 | |
| 479 @property | |
| 480 def modified(self): | |
| 481 return (self._stack_frame_map.modified or | |
| 482 self._type_name_map.modified or | |
| 483 any(d.modified for d in self._memory_dumps)) | |
| 484 | |
| 485 @property | |
| 486 def pid(self): | |
| 487 return self._pid | |
| 488 | |
| 489 @property | |
| 490 def name(self): | |
|
awong
2017/04/20 22:28:04
For these properties, having a docstring that expl
| |
| 491 return self._name | |
| 492 | |
| 493 @property | |
| 494 def unique_name(self): | |
| 495 name = self._name if self._name else 'UnnamedProcess' | |
| 496 return '{}({})'.format(name, self._pid) | |
| 497 | |
| 498 @property | |
| 499 def memory_map(self): | |
| 500 return self._memory_map | |
| 501 | |
| 502 @property | |
| 503 def memory_dumps(self): | |
|
awong
2017/04/20 22:28:04
Why is this one plural?
| |
| 504 return self._memory_dumps | |
| 505 | |
| 506 @property | |
| 507 def stack_frame_map(self): | |
| 508 return self._stack_frame_map | |
| 509 | |
| 510 @property | |
| 511 def type_name_map(self): | |
| 512 return self._type_name_map | |
| 513 | |
| 514 def ApplyModifications(self): | |
| 515 if self._heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY: | |
| 516 self._stack_frame_map.ApplyModifications(None) | |
| 517 else: | |
| 518 if self._stack_frame_map.modified or self._type_name_map.modified: | |
| 519 self._string_map.Clear() | |
| 520 self._stack_frame_map.ApplyModifications(self._string_map, force=True) | |
| 521 self._type_name_map.ApplyModifications(self._string_map, force=True) | |
| 522 self._string_map.ApplyModifications() | |
| 523 for dump in self._memory_dumps: | |
| 524 dump.ApplyModifications() | |
| 525 | |
| 526 def __init__(self, trace_json): | |
| 527 self._trace_json = trace_json | |
| 528 self._processes = [] | |
| 529 self._heap_dump_version = None | |
| 530 | |
| 531 # Misc per-process information needed only during parsing. | |
| 532 class ProcessExt(object): | |
| 533 def __init__(self, pid): | |
| 534 self.process = Trace.Process(pid) | |
| 535 self.mapped_entry_names = set() | |
| 536 self.process_mmaps_json = None | |
| 537 self.seen_strings_json = False | |
| 538 | |
| 539 process_ext_by_pid = {} | |
| 540 | |
| 541 # Android traces produced via 'chrome://inspect/?tracing#devices' are | |
| 542 # just list of events. | |
| 543 events = trace_json if isinstance(trace_json, list) \ | |
| 544 else trace_json['traceEvents'] | |
| 545 for event in events: | |
| 546 name = event.get('name') | |
| 547 if not name: | |
| 548 continue | |
| 549 | |
| 550 pid = event['pid'] | |
| 551 process_ext = process_ext_by_pid.get(pid) | |
| 552 if process_ext is None: | |
| 553 process_ext = ProcessExt(pid) | |
| 554 process_ext_by_pid[pid] = process_ext | |
| 555 process = process_ext.process | |
| 556 | |
| 557 phase = event['ph'] | |
| 558 if phase == self._EVENT_PHASE_METADATA: | |
| 559 if name == 'process_name': | |
| 560 process._name = event['args']['name'] | |
| 561 elif name == 'stackFrames': | |
| 562 process._stack_frame_map.ParseMore( | |
| 563 self._UseHeapDumpVersion(self.HEAP_DUMP_VERSION_LEGACY), | |
| 564 event['args']['stackFrames'], | |
| 565 process._string_map) | |
| 566 elif phase == self._EVENT_PHASE_MEMORY_DUMP: | |
| 567 dumps = event['args']['dumps'] | |
| 568 process_mmaps = dumps.get('process_mmaps') | |
| 569 if process_mmaps: | |
| 570 # We want the most recent memory map, so parsing happens later | |
| 571 # once we finished reading all events. | |
| 572 process_ext.process_mmaps_json = process_mmaps | |
| 573 heaps = dumps.get('heaps_v2') | |
| 574 if heaps: | |
| 575 version = self._UseHeapDumpVersion(heaps['version']) | |
| 576 maps = heaps.get('maps') | |
| 577 if maps: | |
| 578 process_ext.mapped_entry_names.update(maps.iterkeys()) | |
| 579 types = maps.get('types') | |
| 580 stack_frames = maps.get('nodes') | |
| 581 strings = maps.get('strings') | |
| 582 if (strings is None and (types or stack_frames) | |
| 583 and not process_ext.seen_strings_json): | |
| 584 # ApplyModifications() for TypeNameMap and StackFrameMap puts | |
| 585 # everything into the first node and depends on StringMap. So | |
| 586 # we need to make sure that 'strings' node is there if any of | |
| 587 # other two nodes present. | |
| 588 strings = [] | |
| 589 maps['strings'] = strings | |
| 590 if strings is not None: | |
| 591 process_ext.seen_strings_json = True | |
| 592 process._string_map.ParseMore(version, strings) | |
| 593 if types: | |
| 594 process._type_name_map.ParseMore( | |
| 595 version, types, process._string_map) | |
| 596 if stack_frames: | |
| 597 process._stack_frame_map.ParseMore( | |
| 598 version, stack_frames, process._string_map) | |
| 599 allocators = heaps.get('allocators') | |
| 600 if allocators: | |
| 601 dump = MemoryDump(allocators, process_ext.mapped_entry_names) | |
| 602 process._memory_dumps.append(dump) | |
| 603 | |
| 604 self._processes = [] | |
| 605 for pe in process_ext_by_pid.itervalues(): | |
| 606 pe.process._heap_dump_version = self._heap_dump_version | |
| 607 if pe.process_mmaps_json: | |
| 608 # Now parse the most recent memory map. | |
| 609 pe.process._memory_map = MemoryMap(pe.process_mmaps_json) | |
| 610 self._processes.append(pe.process) | |
| 611 | |
| 612 @property | |
| 613 def modified(self): | |
| 614 return any(p.modified for p in self._processes) | |
| 615 | |
| 616 @property | |
| 617 def processes(self): | |
| 618 return self._processes | |
| 619 | |
| 620 @property | |
| 621 def heap_dump_version(self): | |
| 622 return self._heap_dump_version | |
| 623 | |
| 624 def ApplyModifications(self): | |
| 625 for process in self._processes: | |
| 626 process.ApplyModifications() | |
| 627 assert not self.modified, 'still modified' | |
| 628 | |
| 629 def Serialize(self): | |
| 630 return self._trace_json | |
| 631 | |
| 632 # Relevant trace event phases from Chromium's | |
| 633 # src/base/trace_event/common/trace_event_common.h. | |
| 634 _EVENT_PHASE_METADATA = 'M' | |
| 635 _EVENT_PHASE_MEMORY_DUMP = 'v' | |
| 636 | |
| 637 def _UseHeapDumpVersion(self, version): | |
| 638 if self._heap_dump_version is None: | |
| 639 self._heap_dump_version = version | |
| 640 return version | |
| 641 elif self._heap_dump_version != version: | |
| 642 raise Exception( | |
| 643 ("Inconsistent trace file: first saw '{}' heap dump version, " | |
| 644 "then '{}'.").format(self._heap_dump_version, version)) | |
| 645 else: | |
| 646 return version | |
| 360 | 647 |
| 361 | 648 |
| 362 class SymbolizableFile(object): | 649 class SymbolizableFile(object): |
| 363 """Holds file path, addresses to symbolize and stack frames to update. | 650 """Holds file path, addresses to symbolize and stack frames to update. |
| 364 | 651 |
| 365 This class is a link between ELFSymbolizer and a trace file: it specifies | 652 This class is a link between ELFSymbolizer and a trace file: it specifies |
| 366 what to symbolize (addresses) and what to update with the symbolization | 653 what to symbolize (addresses) and what to update with the symbolization |
| 367 result (frames). | 654 result (frames). |
| 368 """ | 655 """ |
| 369 def __init__(self, file_path): | 656 def __init__(self, file_path): |
| 370 self.path = file_path | 657 self.path = file_path |
| 371 self.symbolizable_path = file_path # path to use for symbolization | 658 self.symbolizable_path = file_path # path to use for symbolization |
| 372 self.frames_by_address = collections.defaultdict(list) | 659 self.frames_by_address = collections.defaultdict(list) |
| 373 | 660 |
| 374 | 661 |
| 375 def ResolveSymbolizableFiles(processes): | 662 def ResolveSymbolizableFiles(processes): |
| 376 """Resolves and groups PCs into list of SymbolizableFiles. | 663 """Resolves and groups PCs into list of SymbolizableFiles. |
| 377 | 664 |
| 378 As part of the grouping process, this function resolves PC from each stack | 665 As part of the grouping process, this function resolves PC from each stack |
| 379 frame to the corresponding mmap region. Stack frames that failed to resolve | 666 frame to the corresponding mmap region. Stack frames that failed to resolve |
| 380 are symbolized with '<unresolved>'. | 667 are symbolized with '<unresolved>'. |
| 381 """ | 668 """ |
| 382 symfile_by_path = {} | 669 symfile_by_path = {} |
| 383 for process in processes: | 670 for process in processes: |
| 384 for frame in process.stack_frames.pc_frames: | 671 if not process.memory_map: |
|
awong
2017/04/20 22:28:04
Comment explaining when this can occur?
| |
| 385 region = process.mmaps.FindRegion(frame.pc) | 672 continue |
| 673 for frame in process.stack_frame_map.frame_by_id.itervalues(): | |
| 674 if frame.pc is None: | |
| 675 continue | |
| 676 region = process.memory_map.FindRegion(frame.pc) | |
| 386 if region is None: | 677 if region is None: |
| 387 frame.name = '<unresolved>' | 678 frame.name = '<unresolved>' |
| 388 continue | 679 continue |
| 389 | 680 |
| 390 symfile = symfile_by_path.get(region.file_path) | 681 symfile = symfile_by_path.get(region.file_path) |
| 391 if symfile is None: | 682 if symfile is None: |
| 392 symfile = SymbolizableFile(region.file_path) | 683 symfile = SymbolizableFile(region.file_path) |
| 393 symfile_by_path[symfile.path] = symfile | 684 symfile_by_path[symfile.path] = symfile |
| 394 | 685 |
| 395 relative_pc = frame.pc - region.start_address | 686 relative_pc = frame.pc - region.start_address |
| 396 symfile.frames_by_address[relative_pc].append(frame) | 687 symfile.frames_by_address[relative_pc].append(frame) |
| 397 return symfile_by_path.values() | 688 return symfile_by_path.values() |
| 398 | 689 |
| 399 | 690 |
| 691 def FindInSystemPath(binary_name): | |
| 692 paths = os.environ['PATH'].split(os.pathsep) | |
| 693 for path in paths: | |
| 694 binary_path = os.path.join(path, binary_name) | |
| 695 if os.path.isfile(binary_path): | |
| 696 return binary_path | |
| 697 return None | |
| 698 | |
| 699 | |
| 700 class Symbolizer(object): | |
| 701 # Encapsulates platform-specific symbolization logic. | |
|
awong
2017/04/20 22:28:04
Turn into docstring.
| |
| 702 def __init__(self): | |
| 703 self.is_mac = sys.platform == 'darwin' | |
| 704 self.is_win = sys.platform == 'win32' | |
| 705 if self.is_mac: | |
| 706 self.binary = 'atos' | |
| 707 self._matcher = symbolize_trace_atos_regex.AtosRegexMatcher() | |
| 708 elif self.is_win: | |
| 709 self.binary = 'addr2line-pdb.exe' | |
| 710 else: | |
| 711 self.binary = 'addr2line' | |
| 712 self.symbolizer_path = FindInSystemPath(self.binary) | |
| 713 | |
| 714 def _SymbolizeLinuxAndAndroid(self, symfile, unsymbolized_name): | |
| 715 def _SymbolizerCallback(sym_info, frames): | |
| 716 # Unwind inline chain to the top. | |
| 717 while sym_info.inlined_by: | |
| 718 sym_info = sym_info.inlined_by | |
| 719 | |
| 720 symbolized_name = sym_info.name if sym_info.name else unsymbolized_name | |
| 721 for frame in frames: | |
| 722 frame.name = symbolized_name | |
| 723 frame.ext.source_path = sym_info.source_path | |
| 724 | |
| 725 symbolizer = elf_symbolizer.ELFSymbolizer(symfile.symbolizable_path, | |
| 726 self.symbolizer_path, | |
| 727 _SymbolizerCallback, | |
| 728 inlines=True) | |
| 729 | |
| 730 for address, frames in symfile.frames_by_address.iteritems(): | |
| 731 # SymbolizeAsync() asserts that the type of address is int. We operate | |
| 732 # on longs (since they are raw pointers possibly from 64-bit processes). | |
| 733 # It's OK to cast here because we're passing relative PC, which should | |
| 734 # always fit into int. | |
| 735 symbolizer.SymbolizeAsync(int(address), frames) | |
| 736 | |
| 737 symbolizer.Join() | |
| 738 | |
| 739 | |
| 740 def _SymbolizeMac(self, symfile): | |
| 741 chars_max = int(subprocess.check_output("getconf ARG_MAX", shell=True)) | |
| 742 | |
| 743 # 16 for the address, 2 for "0x", 1 for the space | |
| 744 chars_per_address = 19 | |
| 745 | |
| 746 load_address = (symbolize_trace_macho_reader. | |
| 747 ReadMachOTextLoadAddress(symfile.symbolizable_path)) | |
| 748 assert load_address is not None | |
| 749 | |
| 750 cmd_base = [self.symbolizer_path, '-arch', 'x86_64', '-l', | |
| 751 '0x%x' % load_address, '-o', | |
| 752 symfile.symbolizable_path] | |
| 753 chars_for_other_arguments = len(' '.join(cmd_base)) + 1 | |
| 754 | |
| 755 # The maximum number of inputs that can be processed at once is limited by | |
| 756 # ARG_MAX. This currently evalutes to ~13000 on macOS. | |
| 757 max_inputs = (chars_max - chars_for_other_arguments) / chars_per_address | |
| 758 | |
| 759 all_keys = symfile.frames_by_address.keys() | |
| 760 processed_keys_count = 0 | |
| 761 while len(all_keys): | |
| 762 input_count = min(len(all_keys), max_inputs) | |
| 763 keys_to_process = all_keys[0:input_count] | |
| 764 cmd = list(cmd_base) | |
| 765 cmd.extend([hex(int(x) + load_address) | |
| 766 for x in keys_to_process]) | |
| 767 output_array = subprocess.check_output(cmd).split('\n') | |
| 768 for i in range(len(keys_to_process)): | |
| 769 for frame in (symfile.frames_by_address.values() | |
| 770 [i + processed_keys_count]): | |
| 771 frame.name = self._matcher.Match(output_array[i]) | |
| 772 processed_keys_count += len(keys_to_process) | |
| 773 all_keys = all_keys[input_count:] | |
| 774 | |
| 775 def _SymbolizeWin(self, symfile): | |
| 776 """Invoke symbolizer binary on windows and write all input in one go. | |
| 777 | |
| 778 Unlike linux, on windows, symbolization talks through a shared system | |
| 779 service that handles communication with the NT symbol servers. This | |
| 780 creates an explicit serialization (and therefor lock contention) of | |
| 781 any process using the symbol API for files do not have a local PDB. | |
| 782 | |
| 783 Thus, even though the windows symbolizer binary can be make command line | |
| 784 compatible with the POSIX addr2line interface, paralellizing the | |
| 785 symbolization does not yield the same performance effects. Running | |
| 786 just one symbolizer seems good enough for now. Can optimize later | |
| 787 if this becomes a bottleneck. | |
| 788 """ | |
| 789 cmd = [self.symbolizer_path, '--functions', '--demangle', '--exe', | |
| 790 symfile.symbolizable_path] | |
| 791 | |
| 792 proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stdin=subprocess.PIPE, | |
| 793 stderr=sys.stderr) | |
| 794 addrs = ["%x" % relative_pc for relative_pc in | |
| 795 symfile.frames_by_address.keys()] | |
| 796 (stdout_data, stderr_data) = proc.communicate('\n'.join(addrs)) | |
| 797 stdout_data = stdout_data.split('\n') | |
| 798 | |
| 799 # This is known to be in the same order as stderr_data. | |
| 800 for i, addr in enumerate(addrs): | |
| 801 for frame in symfile.frames_by_address[int(addr, 16)]: | |
| 802 # Output of addr2line with --functions is always 2 outputs per | |
| 803 # symbol, function name followed by source line number. Only grab | |
| 804 # the function name as line info is not always available. | |
| 805 frame.name = stdout_data[i * 2] | |
| 806 | |
| 807 def Symbolize(self, symfile, unsymbolized_name): | |
| 808 if self.is_mac: | |
| 809 self._SymbolizeMac(symfile) | |
| 810 elif self.is_win: | |
| 811 self._SymbolizeWin(symfile) | |
| 812 else: | |
| 813 self._SymbolizeLinuxAndAndroid(symfile, unsymbolized_name) | |
| 814 | |
| 815 def IsSymbolizableFile(self, file_path): | |
| 816 if self.is_win: | |
| 817 extension = os.path.splitext(file_path)[1].lower() | |
| 818 return extension in ['.dll', '.exe'] | |
| 819 else: | |
| 820 result = subprocess.check_output(['file', '-0', file_path]) | |
| 821 type_string = result[result.find('\0') + 1:] | |
| 822 return bool(re.match(r'.*(ELF|Mach-O) (32|64)-bit\b.*', | |
| 823 type_string, re.DOTALL)) | |
| 824 | |
| 825 | |
| 400 def SymbolizeFiles(symfiles, symbolizer): | 826 def SymbolizeFiles(symfiles, symbolizer): |
| 401 """Symbolizes each file in the given list of SymbolizableFiles | 827 """Symbolizes each file in the given list of SymbolizableFiles |
| 402 and updates stack frames with symbolization results.""" | 828 and updates stack frames with symbolization results.""" |
| 829 | |
| 830 if not symfiles: | |
| 831 print 'Nothing to symbolize.' | |
| 832 return | |
| 833 | |
| 403 print 'Symbolizing...' | 834 print 'Symbolizing...' |
| 404 | 835 |
| 405 def _SubPrintf(message, *args): | 836 def _SubPrintf(message, *args): |
| 406 print (' ' + message).format(*args) | 837 print (' ' + message).format(*args) |
| 407 | 838 |
| 408 symbolized = False | |
| 409 for symfile in symfiles: | 839 for symfile in symfiles: |
| 410 unsymbolized_name = '<{}>'.format( | 840 unsymbolized_name = '<{}>'.format( |
| 411 symfile.path if symfile.path else 'unnamed') | 841 symfile.path if symfile.path else 'unnamed') |
| 412 | 842 |
| 413 problem = None | 843 problem = None |
| 414 if not os.path.isabs(symfile.symbolizable_path): | 844 if not os.path.isabs(symfile.symbolizable_path): |
| 415 problem = 'not a file' | 845 problem = 'not a file' |
| 416 elif not os.path.isfile(symfile.symbolizable_path): | 846 elif not os.path.isfile(symfile.symbolizable_path): |
| 417 problem = "file doesn't exist" | 847 problem = "file doesn't exist" |
| 418 elif not symbolizer.IsSymbolizableFile(symfile.symbolizable_path): | 848 elif not symbolizer.IsSymbolizableFile(symfile.symbolizable_path): |
| 419 problem = 'file is not symbolizable' | 849 problem = 'file is not symbolizable' |
| 420 if problem: | 850 if problem: |
| 421 _SubPrintf("Won't symbolize {} PCs for '{}': {}.", | 851 _SubPrintf("Won't symbolize {} PCs for '{}': {}.", |
| 422 len(symfile.frames_by_address), | 852 len(symfile.frames_by_address), |
| 423 symfile.symbolizable_path, | 853 symfile.symbolizable_path, |
| 424 problem) | 854 problem) |
| 425 for frames in symfile.frames_by_address.itervalues(): | 855 for frames in symfile.frames_by_address.itervalues(): |
| 426 for frame in frames: | 856 for frame in frames: |
| 427 frame.name = unsymbolized_name | 857 frame.name = unsymbolized_name |
| 428 continue | 858 continue |
| 429 | 859 |
| 430 _SubPrintf('Symbolizing {} PCs from {}...', | 860 _SubPrintf('Symbolizing {} PCs from {}...', |
| 431 len(symfile.frames_by_address), | 861 len(symfile.frames_by_address), |
| 432 symfile.path) | 862 symfile.path) |
| 433 | 863 |
| 434 symbolizer.Symbolize(symfile, unsymbolized_name) | 864 symbolizer.Symbolize(symfile, unsymbolized_name) |
| 435 symbolized = True | |
| 436 | 865 |
| 437 return symbolized | 866 |
| 867 # Matches Android library paths, supports both K (/data/app-lib/<>/lib.so) | |
| 868 # as well as L+ (/data/app/<>/lib/<>/lib.so). Library name is available | |
| 869 # via 'name' group. | |
| 870 ANDROID_PATH_MATCHER = re.compile( | |
|
awong
2017/04/20 22:28:04
This is hardish to read and matching paths with re
| |
| 871 r'^/data/(?:' | |
| 872 r'app/[^/]+/lib/[^/]+/|' | |
| 873 r'app-lib/[^/]+/|' | |
| 874 r'data/[^/]+/incremental-install-files/lib/' | |
| 875 r')(?P<name>.*\.so)') | |
| 876 | |
| 877 # Subpath of output path where unstripped libraries are stored. | |
| 878 ANDROID_UNSTRIPPED_SUBPATH = 'lib.unstripped' | |
| 438 | 879 |
| 439 | 880 |
| 440 def HaveFilesFromAndroid(symfiles): | 881 def HaveFilesFromAndroid(symfiles): |
| 441 return any(ANDROID_PATH_MATCHER.match(f.path) for f in symfiles) | 882 return any(ANDROID_PATH_MATCHER.match(f.path) for f in symfiles) |
| 442 | 883 |
| 443 | 884 |
| 444 def RemapAndroidFiles(symfiles, output_path): | 885 def RemapAndroidFiles(symfiles, output_path): |
| 445 for symfile in symfiles: | 886 for symfile in symfiles: |
| 446 match = ANDROID_PATH_MATCHER.match(symfile.path) | 887 match = ANDROID_PATH_MATCHER.match(symfile.path) |
| 447 if match: | 888 if match: |
| 448 name = match.group('name') | 889 name = match.group('name') |
| 449 symfile.symbolizable_path = os.path.join( | 890 symfile.symbolizable_path = os.path.join( |
| 450 output_path, ANDROID_UNSTRIPPED_SUBPATH, name) | 891 output_path, ANDROID_UNSTRIPPED_SUBPATH, name) |
| 451 else: | 892 else: |
| 452 # Clobber file path to trigger "not a file" problem in SymbolizeFiles(). | 893 # Clobber file path to trigger "not a file" problem in SymbolizeFiles(). |
| 453 # Without this, files won't be symbolized with "file not found" problem, | 894 # Without this, files won't be symbolized with "file not found" problem, |
| 454 # which is not accurate. | 895 # which is not accurate. |
| 455 symfile.symbolizable_path = 'android://{}'.format(symfile.path) | 896 symfile.symbolizable_path = 'android://{}'.format(symfile.path) |
| 456 | 897 |
| 457 | 898 |
| 899 def Symbolize(options, trace, symbolizer): | |
| 900 symfiles = ResolveSymbolizableFiles(trace.processes) | |
| 901 | |
| 902 # Android trace files don't have any indication they are from Android. | |
| 903 # So we're checking for Android-specific paths. | |
| 904 if HaveFilesFromAndroid(symfiles): | |
| 905 if not options.output_directory: | |
| 906 sys.exit('The trace file appears to be from Android. Please ' | |
| 907 'specify output directory to properly symbolize it.') | |
| 908 RemapAndroidFiles(symfiles, os.path.abspath(options.output_directory)) | |
| 909 | |
| 910 SymbolizeFiles(symfiles, symbolizer) | |
| 911 | |
| 912 | |
| 913 def OpenTraceFile(file_path, mode): | |
| 914 if file_path.endswith('.gz'): | |
| 915 return gzip.open(file_path, mode + 'b') | |
| 916 else: | |
| 917 return open(file_path, mode + 't') | |
| 918 | |
| 919 | |
| 458 # Suffix used for backup files. | 920 # Suffix used for backup files. |
| 459 BACKUP_FILE_TAG = '.BACKUP' | 921 BACKUP_FILE_TAG = '.BACKUP' |
| 460 | 922 |
| 461 def main(): | 923 def main(): |
| 462 parser = argparse.ArgumentParser() | 924 class MultilineHelpFormatter(argparse.HelpFormatter): |
| 463 parser.add_argument('file', | 925 def _split_lines(self, text, width): |
| 464 help='Trace file to symbolize (.json or .json.gz)') | 926 extra_lines = [] |
| 465 parser.add_argument('--no-backup', | 927 if '\n' in text: |
| 466 dest='backup', default='true', action='store_false', | 928 lines = text.splitlines() |
| 467 help="Don't create {} files".format(BACKUP_FILE_TAG)) | 929 text = lines[0] |
| 468 parser.add_argument('--output-directory', | 930 extra_lines = lines[1:] |
| 469 help='The path to the build output directory, such ' + | 931 return super(MultilineHelpFormatter, self)._split_lines(text, width) + \ |
| 470 'as out/Debug. Only needed for Android.') | 932 extra_lines |
| 471 options = parser.parse_args() | |
| 472 | 933 |
| 473 trace_file_path = options.file | 934 parser = argparse.ArgumentParser(formatter_class=MultilineHelpFormatter) |
| 474 def _OpenTraceFile(mode): | 935 parser.add_argument( |
| 475 if trace_file_path.endswith('.gz'): | 936 'file', |
| 476 return gzip.open(trace_file_path, mode + 'b') | 937 help='Trace file to symbolize (.json or .json.gz)') |
| 477 else: | 938 |
| 478 return open(trace_file_path, mode + 't') | 939 parser.add_argument( |
| 940 '--no-backup', dest='backup', default='true', action='store_false', | |
| 941 help="Don't create {} files".format(BACKUP_FILE_TAG)) | |
| 942 | |
| 943 parser.add_argument( | |
| 944 '--output-directory', | |
| 945 help='The path to the build output directory, such as out/Debug.') | |
| 479 | 946 |
| 480 symbolizer = Symbolizer() | 947 symbolizer = Symbolizer() |
| 481 if symbolizer.symbolizer_path is None: | 948 if symbolizer.symbolizer_path is None: |
| 482 sys.exit("Can't symbolize - no %s in PATH." % symbolizer.binary) | 949 sys.exit("Can't symbolize - no %s in PATH." % symbolizer.binary) |
| 483 | 950 |
| 951 options = parser.parse_args() | |
| 952 | |
| 953 trace_file_path = options.file | |
| 954 | |
| 484 print 'Reading trace file...' | 955 print 'Reading trace file...' |
| 485 with _OpenTraceFile('r') as trace_file: | 956 with OpenTraceFile(trace_file_path, 'r') as trace_file: |
| 486 trace = json.load(trace_file) | 957 trace = Trace(json.load(trace_file)) |
| 487 | 958 |
| 488 processes = CollectProcesses(trace) | 959 Symbolize(options, trace, symbolizer) |
| 489 symfiles = ResolveSymbolizableFiles(processes) | |
| 490 | 960 |
| 491 # Android trace files don't have any indication they are from Android. | 961 if trace.modified: |
| 492 # So we're checking for Android-specific paths. | 962 trace.ApplyModifications() |
| 493 if HaveFilesFromAndroid(symfiles): | |
| 494 if not options.output_directory: | |
| 495 parser.error('The trace file appears to be from Android. Please ' | |
| 496 "specify output directory (e.g. 'out/Debug') to properly " | |
| 497 'symbolize it.') | |
| 498 RemapAndroidFiles(symfiles, os.path.abspath(options.output_directory)) | |
| 499 | 963 |
| 500 if SymbolizeFiles(symfiles, symbolizer): | |
| 501 if options.backup: | 964 if options.backup: |
| 502 backup_file_path = trace_file_path + BACKUP_FILE_TAG | 965 backup_file_path = trace_file_path + BACKUP_FILE_TAG |
| 503 print 'Backing up trace file to {}...'.format(backup_file_path) | 966 if os.path.exists(backup_file_path): |
| 967 for i in itertools.count(1): | |
| 968 unique_file_path = '{}{}'.format(backup_file_path, i) | |
| 969 if not os.path.exists(unique_file_path): | |
| 970 backup_file_path = unique_file_path | |
| 971 break | |
| 972 print 'Backing up trace file to {}'.format(backup_file_path) | |
| 504 os.rename(trace_file_path, backup_file_path) | 973 os.rename(trace_file_path, backup_file_path) |
| 505 | 974 |
| 506 print 'Updating trace file...' | 975 print 'Updating the trace file...' |
| 507 with _OpenTraceFile('w') as trace_file: | 976 with OpenTraceFile(trace_file_path, 'w') as trace_file: |
| 508 json.dump(trace, trace_file) | 977 json.dump(trace.Serialize(), trace_file) |
| 509 else: | 978 else: |
| 510 print 'No PCs symbolized - not updating trace file.' | 979 print 'No modifications were made - not updating the trace file.' |
| 511 | 980 |
| 512 | 981 |
| 513 if __name__ == '__main__': | 982 if __name__ == '__main__': |
| 514 main() | 983 main() |
| OLD | NEW |