| OLD | NEW |
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 # Copyright 2016 The Chromium Authors. All rights reserved. | 2 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
| 5 | 5 |
| 6 """ |
| 7 This script processes trace files and symbolizes stack frames generated by |
| 8 Chrome's native heap profiler. |
| 9 |
| 10 === Overview === |
| 11 |
| 12 Trace file is essentially a giant JSON array of dictionaries (events). |
| 13 Events have some predefined keys (e.g. 'pid'), but otherwise are free to |
| 14 have anything inside. Trace file contains events from all Chrome processes |
| 15 that were sampled during tracing period. |
| 16 |
| 17 This script cares only about memory dump events generated with memory-infra |
| 18 category enabled. |
| 19 |
| 20 When Chrome native heap profiling is enabled, some memory dump events |
| 21 include the following extra information: |
| 22 |
| 23 * (Per allocator) Information about live allocations at the moment of the |
| 24 memory dump (the information includes backtraces, types / categories, |
| 25 sizes, and counts of allocations). There are several allocators in |
| 26 Chrome: e.g. malloc, blink_gc, partition_alloc. |
| 27 |
| 28 * (Per process) Stack frame tree of all functions that called allocators |
| 29 above. |
| 30 |
| 31 This script does the following: |
| 32 |
| 33 1. Parses the given trace file (loads JSON). |
| 34 2. Finds memory dump events and parses stack frame tree for each process. |
| 35 3. Finds stack frames that have PC addresses instead of function names. |
| 36 4. Symbolizes PCs and modifies loaded JSON. |
| 37 5. Writes modified JSON back to the file. |
| 38 |
| 39 The script supports trace files from the following platforms: |
| 40 * Android (the script itself must be run on Linux) |
| 41 * Linux |
| 42 * macOS |
| 43 * Windows |
| 44 |
| 45 Important note - the script doesn't check that it symbolizes same binaries |
| 46 that were used at the time trace was taken. I.e. if you take a trace, change |
| 47 and rebuild Chrome binaries, the script will blindly use the new binaries. |
| 48 |
| 49 === Details === |
| 50 |
| 51 There are two formats of heap profiler information: legacy and modern. The |
| 52 main differences relevant to this script are: |
| 53 |
| 54 * In the modern format the stack frame tree, type name mapping, and string |
| 55 mapping nodes are dumped incrementally. These nodes are dumped in each |
| 56 memory dump event and carry updates that occurred since the last event. |
| 57 |
| 58 For example, let's say that when the first memory dump event is generated |
| 59 we only know about a function foo() (called from main()) allocating objects |
| 60 of type "int": |
| 61 |
| 62 { |
| 63 "args": { |
| 64 "dumps": { |
| 65 "heaps_v2": { |
| 66 "maps": { |
| 67 "nodes": [ |
| 68 { "id": 1, "name_sid": 1 }, |
| 69 { "id": 2, "parent": 1, "name_sid": 3 }, |
| 70 ], |
| 71 "types": [ |
| 72 { "id": 1, "name_sid": 2 }, |
| 73 ], |
| 74 "strings": [ |
| 75 { "id": 1, "string": "main()" }, |
| 76 { "id": 2, "string": "int" }, |
| 77 { "id": 3, "string": "foo()" }, |
| 78 ] |
| 79 }, |
| 80 "allocators": { ...live allocations per allocator... }, |
| 81 ... |
| 82 }, |
| 83 ... |
| 84 } |
| 85 }, |
| 86 ... |
| 87 } |
| 88 |
| 89 Here: |
| 90 * 'nodes' node encodes stack frame tree |
| 91 * 'types' node encodes type name mappings |
| 92 * 'strings' node encodes string mapping (explained below) |
| 93 |
| 94 Then, by the time second memory dump even is generated, we learn about |
| 95 bar() (called from main()), which also allocated "int" objects. Only the |
| 96 new information is dumped, i.e. bar() stack frame: |
| 97 |
| 98 { |
| 99 "args": { |
| 100 "dumps": { |
| 101 "heaps_v2": { |
| 102 "maps": { |
| 103 "nodes": [ |
| 104 { "id": 2, "parent": 1, "name_sid": 4 }, |
| 105 ], |
| 106 "types": [], |
| 107 "strings": [ |
| 108 { "id": 4, "string": "bar()" }, |
| 109 ] |
| 110 }, |
| 111 "allocators": { ...live allocations per allocator... }, |
| 112 ... |
| 113 }, |
| 114 ... |
| 115 } |
| 116 }, |
| 117 ... |
| 118 } |
| 119 |
| 120 Note that 'types' node is empty, since there were no updates. All three |
| 121 nodes ('nodes', types', and 'strings') can be empty if there were no updates |
| 122 to them. |
| 123 |
| 124 For simplicity, when the script updates incremental nodes, it puts updated |
| 125 content in the first node, and clears all others. I.e. the following stack |
| 126 frame nodes: |
| 127 |
| 128 'nodes': [ |
| 129 { "id": 1, "name_sid": 1 }, |
| 130 { "id": 2, "parent": 1, "name_sid": 2 }, |
| 131 ] |
| 132 'nodes': [ |
| 133 { "id": 3, "parent": 2, "name_sid": 3 }, |
| 134 ] |
| 135 'nodes': [ |
| 136 { "id": 4, "parent": 3, "name_sid": 4 }, |
| 137 { "id": 5, "parent": 1, "name_sid": 5 }, |
| 138 ] |
| 139 |
| 140 After symbolization are written as: |
| 141 |
| 142 'nodes': [ |
| 143 { "id": 1, "name_sid": 1 }, |
| 144 { "id": 2, "parent": 1, "name_sid": 2 }, |
| 145 { "id": 3, "parent": 2, "name_sid": 3 }, |
| 146 { "id": 4, "parent": 3, "name_sid": 4 }, |
| 147 { "id": 5, "parent": 1, "name_sid": 5 }, |
| 148 ] |
| 149 'nodes': [] |
| 150 'nodes': [] |
| 151 |
| 152 |
| 153 * In contrast, in the legacy format stack frame tree and type mappings are |
| 154 dumped separately from memory dump events, once per process. |
| 155 |
| 156 Here is how trace file with two memory dump events looks like in the |
| 157 legacy format: |
| 158 |
| 159 { |
| 160 "args": { |
| 161 "dumps": { |
| 162 "heaps": { ...live allocations per allocator... }, |
| 163 ... |
| 164 } |
| 165 }, |
| 166 ... |
| 167 } |
| 168 |
| 169 { |
| 170 "args": { |
| 171 "dumps": { |
| 172 "heaps": { ...live allocations per allocator... }, |
| 173 ... |
| 174 } |
| 175 }, |
| 176 ... |
| 177 } |
| 178 |
| 179 { |
| 180 "args": { |
| 181 "typeNames": { |
| 182 1: "int", |
| 183 } |
| 184 }, |
| 185 "cat": "__metadata", |
| 186 "name": "typeNames", |
| 187 ... |
| 188 } |
| 189 |
| 190 { |
| 191 "args": { |
| 192 "stackFrames": { |
| 193 1: { "name": "main" }, |
| 194 2: { "name": "foo", "parent": 1 }, |
| 195 3: { "name": "bar", "parent": 1 }, |
| 196 } |
| 197 }, |
| 198 "cat": "__metadata", |
| 199 "name": "stackFrames", |
| 200 ... |
| 201 } |
| 202 |
| 203 |
| 204 * Another change in the modern format is 'strings' node, which was added |
| 205 to deduplicate stack frame names (mainly for trace file size reduction). |
| 206 For consistency 'types' node also uses string mappings. |
| 207 |
| 208 |
| 209 See crbug.com/708930 for more information about the modern format. |
| 210 """ |
| 211 |
| 6 import argparse | 212 import argparse |
| 7 import bisect | 213 import bisect |
| 8 import collections | 214 import collections |
| 9 import gzip | 215 import gzip |
| 216 import itertools |
| 10 import json | 217 import json |
| 11 import os | 218 import os |
| 12 import re | 219 import re |
| 13 import subprocess | 220 import subprocess |
| 14 import sys | 221 import sys |
| 15 | 222 |
| 16 _SYMBOLS_PATH = os.path.abspath(os.path.join( | 223 _SYMBOLS_PATH = os.path.abspath(os.path.join( |
| 17 os.path.dirname(os.path.realpath(__file__)), | 224 os.path.dirname(os.path.realpath(__file__)), |
| 18 '..', | 225 '..', |
| 19 'third_party', | 226 'third_party', |
| 20 'symbols')) | 227 'symbols')) |
| 21 sys.path.append(_SYMBOLS_PATH) | 228 sys.path.append(_SYMBOLS_PATH) |
| 22 # pylint: disable=import-error | 229 # pylint: disable=import-error |
| 23 import symbols.elf_symbolizer as elf_symbolizer | 230 import symbols.elf_symbolizer as elf_symbolizer |
| 24 | 231 |
| 25 import symbolize_trace_atos_regex | 232 import symbolize_trace_atos_regex |
| 26 import symbolize_trace_macho_reader | 233 import symbolize_trace_macho_reader |
| 27 | 234 |
| 28 | 235 |
| 29 # Relevant trace event phases from Chromium's | 236 class NodeWrapper(object): |
| 30 # src/base/trace_event/common/trace_event_common.h. | 237 """Wraps an event data node(s). |
| 31 TRACE_EVENT_PHASE_METADATA = 'M' | 238 |
| 32 TRACE_EVENT_PHASE_MEMORY_DUMP = 'v' | 239 A node is a reference into a trace event JSON. Wrappers parse nodes to |
| 240 provide convenient APIs and update nodes when asked to propagate changes |
| 241 back (see ApplyModifications() below). |
| 242 |
| 243 Here is an example of legacy metadata event that contains stack frame tree: |
| 244 |
| 245 { |
| 246 "args": { |
| 247 "stackFrames": { ... } |
| 248 }, |
| 249 "cat": "__metadata", |
| 250 "name": "stackFrames", |
| 251 "ph": "M", |
| 252 ... |
| 253 } |
| 254 |
| 255 When this event is encountered, a reference to the "stackFrames" dictionary |
| 256 is obtained and passed down to a specific wrapped class, which knows how to |
| 257 parse / update the dictionary. |
| 258 |
| 259 There are two parsing patterns depending on whether node is serialized |
| 260 incrementally: |
| 261 |
| 262 * If node is not incremental, then parsing is done by __init__(), |
| 263 see MemoryMap for an example. |
| 264 |
| 265 * If node is incremental, then __init__() does nothing, and instead |
| 266 ParseNext() method is called when next node (from a next event) is |
| 267 encountered. |
| 268 |
| 269 Some wrappers can also modify nodes they parsed. In such cases they have |
| 270 additional APIs: |
| 271 |
| 272 * 'modified' flag, which indicates whether the wrapper was changed. |
| 273 |
| 274 * 'ApplyModifications' method, which propagates changes made to the wrapper |
| 275 back to nodes. Successful invocation of ApplyModifications() resets |
| 276 'modified' flag. |
| 277 |
| 278 """ |
| 279 pass |
| 33 | 280 |
| 34 | 281 |
| 35 # Matches Android library paths, supports both K (/data/app-lib/<>/lib.so) | 282 class MemoryMap(NodeWrapper): |
| 36 # as well as L+ (/data/app/<>/lib/<>/lib.so). Library name is available | 283 """Wraps 'process_mmaps' node. |
| 37 # via 'name' group. | |
| 38 ANDROID_PATH_MATCHER = re.compile( | |
| 39 r'^/data/(?:' | |
| 40 r'app/[^/]+/lib/[^/]+/|' | |
| 41 r'app-lib/[^/]+/|' | |
| 42 r'data/[^/]+/incremental-install-files/lib/' | |
| 43 r')(?P<name>.*\.so)') | |
| 44 | 284 |
| 45 # Subpath of output path where unstripped libraries are stored. | 285 'process_mmaps' node contains information about file mappings. |
| 46 ANDROID_UNSTRIPPED_SUBPATH = 'lib.unstripped' | |
| 47 | 286 |
| 48 | 287 "process_mmaps": { |
| 49 def FindInSystemPath(binary_name): | 288 "vm_regions": [ |
| 50 paths = os.environ['PATH'].split(os.pathsep) | 289 { |
| 51 for path in paths: | 290 "mf": "<file_path>", |
| 52 binary_path = os.path.join(path, binary_name) | 291 "sa": "<start_address>", |
| 53 if os.path.isfile(binary_path): | 292 "sz": "<size>", |
| 54 return binary_path | 293 ... |
| 55 return None | 294 }, |
| 56 | 295 ... |
| 57 | 296 ] |
| 58 class Symbolizer(object): | 297 } |
| 59 # Encapsulates platform-specific symbolization logic. | 298 """ |
| 60 def __init__(self): | |
| 61 self.is_mac = sys.platform == 'darwin' | |
| 62 self.is_win = sys.platform == 'win32' | |
| 63 if self.is_mac: | |
| 64 self.binary = 'atos' | |
| 65 self._matcher = symbolize_trace_atos_regex.AtosRegexMatcher() | |
| 66 elif self.is_win: | |
| 67 self.binary = 'addr2line-pdb.exe' | |
| 68 else: | |
| 69 self.binary = 'addr2line' | |
| 70 self.symbolizer_path = FindInSystemPath(self.binary) | |
| 71 | |
| 72 def _SymbolizeLinuxAndAndroid(self, symfile, unsymbolized_name): | |
| 73 def _SymbolizerCallback(sym_info, frames): | |
| 74 # Unwind inline chain to the top. | |
| 75 while sym_info.inlined_by: | |
| 76 sym_info = sym_info.inlined_by | |
| 77 | |
| 78 symbolized_name = sym_info.name if sym_info.name else unsymbolized_name | |
| 79 for frame in frames: | |
| 80 frame.name = symbolized_name | |
| 81 | |
| 82 symbolizer = elf_symbolizer.ELFSymbolizer(symfile.symbolizable_path, | |
| 83 self.symbolizer_path, | |
| 84 _SymbolizerCallback, | |
| 85 inlines=True) | |
| 86 | |
| 87 for address, frames in symfile.frames_by_address.iteritems(): | |
| 88 # SymbolizeAsync() asserts that the type of address is int. We operate | |
| 89 # on longs (since they are raw pointers possibly from 64-bit processes). | |
| 90 # It's OK to cast here because we're passing relative PC, which should | |
| 91 # always fit into int. | |
| 92 symbolizer.SymbolizeAsync(int(address), frames) | |
| 93 | |
| 94 symbolizer.Join() | |
| 95 | |
| 96 | |
| 97 def _SymbolizeMac(self, symfile): | |
| 98 chars_max = int(subprocess.check_output("getconf ARG_MAX", shell=True)) | |
| 99 | |
| 100 # 16 for the address, 2 for "0x", 1 for the space | |
| 101 chars_per_address = 19 | |
| 102 | |
| 103 load_address = (symbolize_trace_macho_reader. | |
| 104 ReadMachOTextLoadAddress(symfile.symbolizable_path)) | |
| 105 assert load_address is not None | |
| 106 | |
| 107 cmd_base = [self.symbolizer_path, '-arch', 'x86_64', '-l', | |
| 108 '0x%x' % load_address, '-o', | |
| 109 symfile.symbolizable_path] | |
| 110 chars_for_other_arguments = len(' '.join(cmd_base)) + 1 | |
| 111 | |
| 112 # The maximum number of inputs that can be processed at once is limited by | |
| 113 # ARG_MAX. This currently evalutes to ~13000 on macOS. | |
| 114 max_inputs = (chars_max - chars_for_other_arguments) / chars_per_address | |
| 115 | |
| 116 all_keys = symfile.frames_by_address.keys() | |
| 117 processed_keys_count = 0 | |
| 118 while len(all_keys): | |
| 119 input_count = min(len(all_keys), max_inputs) | |
| 120 keys_to_process = all_keys[0:input_count] | |
| 121 | |
| 122 cmd = list(cmd_base) | |
| 123 cmd.extend([hex(int(x) + load_address) | |
| 124 for x in keys_to_process]) | |
| 125 output_array = subprocess.check_output(cmd).split('\n') | |
| 126 for i in range(len(keys_to_process)): | |
| 127 for frame in (symfile.frames_by_address.values() | |
| 128 [i + processed_keys_count]): | |
| 129 frame.name = self._matcher.Match(output_array[i]) | |
| 130 processed_keys_count += len(keys_to_process) | |
| 131 all_keys = all_keys[input_count:] | |
| 132 | |
| 133 | |
| 134 def _SymbolizeWin(self, symfile): | |
| 135 """Invoke symbolizer binary on windows and write all input in one go. | |
| 136 | |
| 137 Unlike linux, on windows, symbolization talks through a shared system | |
| 138 service that handles communication with the NT symbol servers. This | |
| 139 creates an explicit serialization (and therefor lock contention) of | |
| 140 any process using the symbol API for files do not have a local PDB. | |
| 141 | |
| 142 Thus, even though the windows symbolizer binary can be make command line | |
| 143 compatible with the POSIX addr2line interface, paralellizing the | |
| 144 symbolization does not yield the same performance effects. Running | |
| 145 just one symbolizer seems good enough for now. Can optimize later | |
| 146 if this becomes a bottleneck. | |
| 147 """ | |
| 148 cmd = [self.symbolizer_path, '--functions', '--demangle', '--exe', | |
| 149 symfile.symbolizable_path] | |
| 150 | |
| 151 proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stdin=subprocess.PIPE, | |
| 152 stderr=sys.stderr) | |
| 153 addrs = ["%x" % relative_pc for relative_pc in | |
| 154 symfile.frames_by_address.keys()] | |
| 155 (stdout_data, stderr_data) = proc.communicate('\n'.join(addrs)) | |
| 156 stdout_data = stdout_data.split('\n') | |
| 157 | |
| 158 # This is known to be in the same order as stderr_data. | |
| 159 for i, addr in enumerate(addrs): | |
| 160 for frame in symfile.frames_by_address[int(addr, 16)]: | |
| 161 # Output of addr2line with --functions is always 2 outputs per | |
| 162 # symbol, function name followed by source line number. Only grab | |
| 163 # the function name as line info is not always available. | |
| 164 frame.name = stdout_data[i * 2] | |
| 165 | |
| 166 | |
| 167 def Symbolize(self, symfile, unsymbolized_name): | |
| 168 if self.is_mac: | |
| 169 self._SymbolizeMac(symfile) | |
| 170 if self.is_win: | |
| 171 self._SymbolizeWin(symfile) | |
| 172 else: | |
| 173 self._SymbolizeLinuxAndAndroid(symfile, unsymbolized_name) | |
| 174 | |
| 175 | |
| 176 def IsSymbolizableFile(self, file_path): | |
| 177 if self.is_win: | |
| 178 extension = os.path.splitext(file_path)[1].lower() | |
| 179 return extension in ['.dll', '.exe'] | |
| 180 else: | |
| 181 result = subprocess.check_output(['file', '-0', file_path]) | |
| 182 type_string = result[result.find('\0') + 1:] | |
| 183 return bool(re.match(r'.*(ELF|Mach-O) (32|64)-bit\b.*', | |
| 184 type_string, re.DOTALL)) | |
| 185 | |
| 186 | |
| 187 class ProcessMemoryMaps(object): | |
| 188 """Represents 'process_mmaps' trace file entry.""" | |
| 189 | 299 |
| 190 class Region(object): | 300 class Region(object): |
| 191 def __init__(self, start_address, size, file_path): | 301 def __init__(self, start_address, size, file_path): |
| 192 self._start_address = start_address | 302 self._start_address = start_address |
| 193 self._size = size | 303 self._size = size |
| 194 self._file_path = file_path | 304 self._file_path = file_path |
| 195 | 305 |
| 196 @property | 306 @property |
| 197 def start_address(self): | 307 def start_address(self): |
| 198 return self._start_address | 308 return self._start_address |
| (...skipping 15 matching lines...) Expand all Loading... |
| 214 return long(self._start_address).__cmp__(long(other._start_address)) | 324 return long(self._start_address).__cmp__(long(other._start_address)) |
| 215 elif isinstance(other, (long, int)): | 325 elif isinstance(other, (long, int)): |
| 216 return long(self._start_address).__cmp__(long(other)) | 326 return long(self._start_address).__cmp__(long(other)) |
| 217 else: | 327 else: |
| 218 raise Exception('Cannot compare with %s' % type(other)) | 328 raise Exception('Cannot compare with %s' % type(other)) |
| 219 | 329 |
| 220 def __repr__(self): | 330 def __repr__(self): |
| 221 return 'Region(0x{:X} - 0x{:X}, {})'.format( | 331 return 'Region(0x{:X} - 0x{:X}, {})'.format( |
| 222 self.start_address, self.end_address, self.file_path) | 332 self.start_address, self.end_address, self.file_path) |
| 223 | 333 |
| 224 def __init__(self, process_mmaps): | 334 def __init__(self, process_mmaps_node): |
| 225 """Parses 'process_mmaps' dictionary.""" | |
| 226 | |
| 227 regions = [] | 335 regions = [] |
| 228 for region_value in process_mmaps['vm_regions']: | 336 for region_node in process_mmaps_node['vm_regions']: |
| 229 regions.append(self.Region( | 337 regions.append(self.Region( |
| 230 long(region_value['sa'], 16), | 338 long(region_node['sa'], 16), |
| 231 long(region_value['sz'], 16), | 339 long(region_node['sz'], 16), |
| 232 region_value['mf'])) | 340 region_node['mf'])) |
| 233 regions.sort() | 341 regions.sort() |
| 234 | 342 |
| 235 # Copy regions without duplicates and check for overlaps. | 343 # Copy regions without duplicates and check for overlaps. |
| 236 self._regions = [] | 344 self._regions = [] |
| 237 previous_region = None | 345 previous_region = None |
| 238 for region in regions: | 346 for region in regions: |
| 239 if previous_region is not None: | 347 if previous_region is not None: |
| 240 if region == previous_region: | 348 if region == previous_region: |
| 241 continue | 349 continue |
| 242 assert region.start_address >= previous_region.end_address, \ | 350 assert region.start_address >= previous_region.end_address, \ |
| 243 'Regions {} and {} overlap.'.format(previous_region, region) | 351 'Regions {} and {} overlap.'.format(previous_region, region) |
| 244 previous_region = region | 352 previous_region = region |
| 245 self._regions.append(region) | 353 self._regions.append(region) |
| 246 | 354 |
| 247 @property | 355 @property |
| 248 def regions(self): | 356 def regions(self): |
| 249 return self._regions | 357 return self._regions |
| 250 | 358 |
| 251 def FindRegion(self, address): | 359 def FindRegion(self, address): |
| 252 """Finds region containing |address|. Returns None if none found.""" | 360 """Finds region containing |address|. Returns None if none found.""" |
| 253 | 361 |
| 254 region_index = bisect.bisect_right(self._regions, address) - 1 | 362 region_index = bisect.bisect_right(self._regions, address) - 1 |
| 255 if region_index >= 0: | 363 if region_index >= 0: |
| 256 region = self._regions[region_index] | 364 region = self._regions[region_index] |
| 257 if address >= region.start_address and address < region.end_address: | 365 if address >= region.start_address and address < region.end_address: |
| 258 return region | 366 return region |
| 259 return None | 367 return None |
| 260 | 368 |
| 261 | 369 |
| 262 class StackFrames(object): | 370 class UnsupportedHeapDumpVersionError(Exception): |
| 263 """Represents 'stackFrames' trace file entry.""" | 371 """Helper exception class to signal unsupported heap dump version.""" |
| 264 | 372 |
| 265 class PCFrame(object): | 373 def __init__(self, version): |
| 266 def __init__(self, pc, frame): | 374 message = 'Unsupported heap dump version: {}'.format(version) |
| 375 super(UnsupportedHeapDumpVersionError, self).__init__(message) |
| 376 |
| 377 |
| 378 class StringMap(NodeWrapper): |
| 379 """Wraps all 'strings' nodes for a process. |
| 380 |
| 381 'strings' node contains incremental mappings between integer ids and strings. |
| 382 |
| 383 "strings": [ |
| 384 { |
| 385 "id": <string_id>, |
| 386 "string": <string> |
| 387 }, |
| 388 ... |
| 389 ] |
| 390 """ |
| 391 |
| 392 def __init__(self): |
| 393 self._modified = False |
| 394 self._strings_nodes = [] |
| 395 self._string_by_id = {} |
| 396 self._id_by_string = {} |
| 397 self._max_string_id = 0 |
| 398 |
| 399 @property |
| 400 def modified(self): |
| 401 """Returns True if the wrapper was modified (see NodeWrapper).""" |
| 402 return self._modified |
| 403 |
| 404 @property |
| 405 def string_by_id(self): |
| 406 return self._string_by_id |
| 407 |
| 408 def ParseNext(self, heap_dump_version, strings_node): |
| 409 """Parses and interns next node (see NodeWrapper).""" |
| 410 |
| 411 if heap_dump_version != Trace.HEAP_DUMP_VERSION_1: |
| 412 raise UnsupportedHeapDumpVersionError(heap_dump_version) |
| 413 |
| 414 self._strings_nodes.append(strings_node) |
| 415 for string_node in strings_node: |
| 416 self._Insert(string_node['id'], string_node['string']) |
| 417 |
| 418 def Clear(self): |
| 419 """Clears all string mappings.""" |
| 420 if self._string_by_id: |
| 421 self._modified = True |
| 422 # ID #0 means 'no entry' and must always be present. Carry it over. |
| 423 null_string = self._string_by_id[0] |
| 424 self._string_by_id = {} |
| 425 self._id_by_string = {} |
| 426 self._Insert(0, null_string) |
| 427 self._max_string_id = 0 |
| 428 |
| 429 def AddString(self, string): |
| 430 """Adds a string (if it doesn't exist) and returns its integer id.""" |
| 431 string_id = self._id_by_string.get(string) |
| 432 if string_id is None: |
| 433 string_id = self._max_string_id + 1 |
| 434 self._Insert(string_id, string) |
| 435 self._modified = True |
| 436 return string_id |
| 437 |
| 438 def ApplyModifications(self): |
| 439 """Propagates modifications back to nodes (see NodeWrapper).""" |
| 440 if not self.modified: |
| 441 return |
| 442 |
| 443 assert self._strings_nodes, 'no nodes' |
| 444 |
| 445 # Serialize into the first node, and clear all others. |
| 446 |
| 447 for strings_node in self._strings_nodes: |
| 448 del strings_node[:] |
| 449 strings_node = self._strings_nodes[0] |
| 450 for string_id, string in self._string_by_id.iteritems(): |
| 451 strings_node.append({'id': string_id, 'string': string}) |
| 452 |
| 453 self._modified = False |
| 454 |
| 455 def _Insert(self, string_id, string): |
| 456 self._id_by_string[string] = string_id |
| 457 self._string_by_id[string_id] = string |
| 458 self._max_string_id = max(self._max_string_id, string_id) |
| 459 |
| 460 |
| 461 class TypeNameMap(NodeWrapper): |
| 462 """Wraps all 'types' nodes for a process. |
| 463 |
| 464 'types' nodes encode mappings between integer type ids and integer |
| 465 string ids (from 'strings' nodes). |
| 466 |
| 467 "types": [ |
| 468 { |
| 469 "id": <type_id>, |
| 470 "name_sid": <name_string_id> |
| 471 } |
| 472 ... |
| 473 ] |
| 474 |
| 475 For simplicity string ids are translated into strings during parsing, |
| 476 and then translated back to ids in ApplyModifications(). |
| 477 """ |
| 478 def __init__(self): |
| 479 self._modified = False |
| 480 self._type_name_nodes = [] |
| 481 self._name_by_id = {} |
| 482 self._id_by_name = {} |
| 483 self._max_type_id = 0 |
| 484 |
| 485 @property |
| 486 def modified(self): |
| 487 """Returns True if the wrapper was modified (see NodeWrapper).""" |
| 488 return self._modified |
| 489 |
| 490 @property |
| 491 def name_by_id(self): |
| 492 """Returns {id -> name} dict (must not be changed directly).""" |
| 493 return self._name_by_id |
| 494 |
| 495 def ParseNext(self, heap_dump_version, type_name_node, string_map): |
| 496 """Parses and interns next node (see NodeWrapper). |
| 497 |
| 498 |string_map| - A StringMap object to use to translate string ids |
| 499 to strings. |
| 500 """ |
| 501 if heap_dump_version != Trace.HEAP_DUMP_VERSION_1: |
| 502 raise UnsupportedHeapDumpVersionError(heap_dump_version) |
| 503 |
| 504 self._type_name_nodes.append(type_name_node) |
| 505 for type_node in type_name_node: |
| 506 self._Insert(type_node['id'], |
| 507 string_map.string_by_id[type_node['name_sid']]) |
| 508 |
| 509 def AddType(self, type_name): |
| 510 """Adds a type name (if it doesn't exist) and returns its id.""" |
| 511 type_id = self._id_by_name.get(type_name) |
| 512 if type_id is None: |
| 513 type_id = self._max_type_id + 1 |
| 514 self._Insert(type_id, type_name) |
| 515 self._modified = True |
| 516 return type_id |
| 517 |
| 518 def ApplyModifications(self, string_map, force=False): |
| 519 """Propagates modifications back to nodes. |
| 520 |
| 521 |string_map| - A StringMap object to use to translate strings to ids. |
| 522 |force| - Whether to propagate changes regardless of 'modified' flag. |
| 523 """ |
| 524 if not self.modified and not force: |
| 525 return |
| 526 |
| 527 assert self._type_name_nodes, 'no nodes' |
| 528 |
| 529 # Serialize into the first node, and clear all others. |
| 530 |
| 531 for types_node in self._type_name_nodes: |
| 532 del types_node[:] |
| 533 types_node = self._type_name_nodes[0] |
| 534 for type_id, type_name in self._name_by_id.iteritems(): |
| 535 types_node.append({ |
| 536 'id': type_id, |
| 537 'name_sid': string_map.AddString(type_name)}) |
| 538 |
| 539 self._modified = False |
| 540 |
| 541 def _Insert(self, type_id, type_name): |
| 542 self._id_by_name[type_name] = type_id |
| 543 self._name_by_id[type_id] = type_name |
| 544 self._max_type_id = max(self._max_type_id, type_id) |
| 545 |
| 546 |
| 547 class StackFrameMap(NodeWrapper): |
| 548 """ Wraps stack frame tree nodes for a process. |
| 549 |
| 550 For the legacy format this wrapper expects a single 'stackFrames' node |
| 551 (which comes from metadata event): |
| 552 |
| 553 "stackFrames": { |
| 554 "<frame_id>": { |
| 555 "name": "<frame_name>" |
| 556 "parent": "<parent_frame_id>" |
| 557 }, |
| 558 ... |
| 559 } |
| 560 |
| 561 For the modern format this wrapper expects several 'nodes' nodes: |
| 562 |
| 563 "nodes": [ |
| 564 { |
| 565 "id": <frame_id>, |
| 566 "parent": <parent_frame_id>, |
| 567 "name_sid": <name_string_id> |
| 568 }, |
| 569 ... |
| 570 ] |
| 571 |
| 572 In both formats frame name is a string. Native heap profiler generates |
| 573 specially formatted frame names (e.g. "pc:10eb78dba") for function |
| 574 addresses (PCs). Inner Frame class below parses name and extracts PC, |
| 575 if it's there. |
| 576 """ |
| 577 class Frame(object): |
| 578 def __init__(self, frame_id, name, parent_frame_id): |
| 267 self._modified = False | 579 self._modified = False |
| 268 self._pc = pc | 580 self._id = frame_id |
| 269 self._frame = frame | 581 self._name = name |
| 582 self._pc = self._ParsePC(name) |
| 583 self._parent_id = parent_frame_id |
| 584 self._ext = None |
| 270 | 585 |
| 271 @property | 586 @property |
| 272 def modified(self): | 587 def modified(self): |
| 588 """Returns True if the frame was modified. |
| 589 |
| 590 For example changing frame's name sets this flag (since the change |
| 591 needs to be propagated back to nodes). |
| 592 """ |
| 273 return self._modified | 593 return self._modified |
| 274 | 594 |
| 275 @property | 595 @property |
| 596 def id(self): |
| 597 """Frame id (integer).""" |
| 598 return self._id |
| 599 |
| 600 @property |
| 276 def pc(self): | 601 def pc(self): |
| 602 """Parsed (integer) PC of the frame, or None.""" |
| 277 return self._pc | 603 return self._pc |
| 278 | 604 |
| 279 @property | 605 @property |
| 280 def name(self): | 606 def name(self): |
| 281 return self._frame['name'] | 607 """Name of the frame (see above).""" |
| 608 return self._name |
| 282 | 609 |
| 283 @name.setter | 610 @name.setter |
| 284 def name(self, value): | 611 def name(self, value): |
| 612 """Changes the name. Doesn't affect value of |pc|.""" |
| 285 self._modified = True | 613 self._modified = True |
| 286 self._frame['name'] = value | 614 self._name = value |
| 287 | 615 |
| 288 def __init__(self, stack_frames): | 616 @property |
| 289 """Constructs object using 'stackFrames' dictionary.""" | 617 def parent_id(self): |
| 290 self._pc_frames = [] | 618 """Parent frame id (integer).""" |
| 291 for frame in stack_frames.itervalues(): | 619 return self._parent_id |
| 292 pc_frame = self._ParsePCFrame(frame) | 620 |
| 293 if pc_frame: | 621 _PC_TAG = 'pc:' |
| 294 self._pc_frames.append(pc_frame) | 622 |
| 295 | 623 def _ParsePC(self, name): |
| 296 @property | 624 if not name.startswith(self._PC_TAG): |
| 297 def pc_frames(self): | 625 return None |
| 298 return self._pc_frames | 626 return long(name[len(self._PC_TAG):], 16) |
| 627 |
| 628 def _ClearModified(self): |
| 629 self._modified = False |
| 630 |
| 631 def __init__(self): |
| 632 self._modified = False |
| 633 self._heap_dump_version = None |
| 634 self._stack_frames_nodes = [] |
| 635 self._frame_by_id = {} |
| 299 | 636 |
| 300 @property | 637 @property |
| 301 def modified(self): | 638 def modified(self): |
| 302 return any(f.modified for f in self._pc_frames) | 639 """Returns True if the wrapper or any of its frames were modified.""" |
| 303 | 640 return (self._modified or |
| 304 _PC_TAG = 'pc:' | 641 any(f.modified for f in self._frame_by_id.itervalues())) |
| 305 | 642 |
| 306 @classmethod | 643 @property |
| 307 def _ParsePCFrame(self, frame): | 644 def frame_by_id(self): |
| 308 name = frame['name'] | 645 """Returns {id -> frame} dict (must not be modified directly).""" |
| 309 if not name.startswith(self._PC_TAG): | 646 return self._frame_by_id |
| 310 return None | 647 |
| 311 pc = long(name[len(self._PC_TAG):], 16) | 648 def ParseNext(self, heap_dump_version, stack_frames_node, string_map): |
| 312 return self.PCFrame(pc, frame) | 649 """Parses the next stack frames node (see NodeWrapper). |
| 313 | 650 |
| 314 | 651 For the modern format |string_map| is used to translate string ids |
| 315 class Process(object): | 652 to strings. |
| 316 """Holds various bits of information about a process in a trace file.""" | 653 """ |
| 317 | 654 |
| 318 def __init__(self, pid): | 655 frame_by_id = {} |
| 319 self.pid = pid | 656 if heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY: |
| 320 self.name = None | 657 if self._stack_frames_nodes: |
| 321 self.mmaps = None | 658 raise Exception('Legacy stack frames node is expected only once.') |
| 322 self.stack_frames = None | 659 for frame_id, frame_node in stack_frames_node.iteritems(): |
| 323 | 660 frame = self.Frame(frame_id, |
| 324 | 661 frame_node['name'], |
| 325 def CollectProcesses(trace): | 662 frame_node.get('parent')) |
| 326 """Parses trace dictionary and returns pid->Process map of all processes | 663 frame_by_id[frame.id] = frame |
| 327 suitable for symbolization (which have both mmaps and stack_frames). | 664 else: |
| 665 if heap_dump_version != Trace.HEAP_DUMP_VERSION_1: |
| 666 raise UnsupportedHeapDumpVersionError(heap_dump_version) |
| 667 for frame_node in stack_frames_node: |
| 668 frame = self.Frame(frame_node['id'], |
| 669 string_map.string_by_id[frame_node['name_sid']], |
| 670 frame_node.get('parent')) |
| 671 frame_by_id[frame.id] = frame |
| 672 |
| 673 self._heap_dump_version = heap_dump_version |
| 674 self._stack_frames_nodes.append(stack_frames_node) |
| 675 |
| 676 self._frame_by_id = frame_by_id |
| 677 |
| 678 def ApplyModifications(self, string_map, force=False): |
| 679 """Applies modifications back to nodes (see NodeWrapper).""" |
| 680 |
| 681 if not self.modified and not force: |
| 682 return |
| 683 |
| 684 assert self._stack_frames_nodes, 'no nodes' |
| 685 if self._heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY: |
| 686 assert string_map is None, \ |
| 687 'string_map should not be used with the legacy format' |
| 688 |
| 689 # Serialize frames into the first node, clear all others. |
| 690 |
| 691 for frames_node in self._stack_frames_nodes: |
| 692 if self._heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY: |
| 693 frames_node.clear() |
| 694 else: |
| 695 del frames_node[:] |
| 696 |
| 697 frames_node = self._stack_frames_nodes[0] |
| 698 for frame in self._frame_by_id.itervalues(): |
| 699 if self._heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY: |
| 700 frame_node = {'name': frame.name} |
| 701 frames_node[frame.id] = frame_node |
| 702 else: |
| 703 frame_node = { |
| 704 'id': frame.id, |
| 705 'name_sid': string_map.AddString(frame.name) |
| 706 } |
| 707 frames_node.append(frame_node) |
| 708 if frame.parent_id is not None: |
| 709 frame_node['parent'] = frame.parent_id |
| 710 frame._ClearModified() |
| 711 |
| 712 self._modified = False |
| 713 |
| 714 |
| 715 class Trace(NodeWrapper): |
| 716 """Wrapper for the root trace node (i.e. the trace JSON itself). |
| 717 |
| 718 This wrapper parses select nodes from memory-infra events and groups |
| 719 parsed data per-process (see inner Process class below). |
| 328 """ | 720 """ |
| 329 | 721 |
| 330 process_map = {} | 722 # Indicates legacy heap dump format. |
| 331 | 723 HEAP_DUMP_VERSION_LEGACY = 'Legacy' |
| 332 # Android traces produced via 'chrome://inspect/?tracing#devices' are | 724 |
| 333 # just list of events. | 725 # Indicates variation of a modern heap dump format. |
| 334 events = trace if isinstance(trace, list) else trace['traceEvents'] | 726 HEAP_DUMP_VERSION_1 = 1 |
| 335 for event in events: | 727 |
| 336 name = event.get('name') | 728 class Process(object): |
| 337 if not name: | 729 """Collection of per-process data and wrappers.""" |
| 338 continue | 730 |
| 339 | 731 def __init__(self, pid): |
| 340 pid = event['pid'] | 732 self._pid = pid |
| 341 process = process_map.get(pid) | 733 self._name = None |
| 342 if process is None: | 734 self._memory_map = None |
| 343 process = Process(pid) | 735 self._stack_frame_map = StackFrameMap() |
| 344 process_map[pid] = process | 736 self._type_name_map = TypeNameMap() |
| 345 | 737 self._string_map = StringMap() |
| 346 phase = event['ph'] | 738 self._heap_dump_version = None |
| 347 if phase == TRACE_EVENT_PHASE_METADATA: | 739 |
| 348 if name == 'process_name': | 740 @property |
| 349 process.name = event['args']['name'] | 741 def modified(self): |
| 350 elif name == 'stackFrames': | 742 return self._stack_frame_map.modified or self._type_name_map.modified |
| 351 process.stack_frames = StackFrames(event['args']['stackFrames']) | 743 |
| 352 elif phase == TRACE_EVENT_PHASE_MEMORY_DUMP: | 744 @property |
| 353 process_mmaps = event['args']['dumps'].get('process_mmaps') | 745 def pid(self): |
| 354 if process_mmaps: | 746 return self._pid |
| 355 # TODO(dskiba): this parses all process_mmaps, but retains only the | 747 |
| 356 # last one. We need to parse only once (lazy parsing?). | 748 @property |
| 357 process.mmaps = ProcessMemoryMaps(process_mmaps) | 749 def name(self): |
| 358 | 750 return self._name |
| 359 return [p for p in process_map.itervalues() if p.mmaps and p.stack_frames] | 751 |
| 752 @property |
| 753 def unique_name(self): |
| 754 """Returns string that includes both process name and its pid.""" |
| 755 name = self._name if self._name else 'UnnamedProcess' |
| 756 return '{}({})'.format(name, self._pid) |
| 757 |
| 758 @property |
| 759 def memory_map(self): |
| 760 return self._memory_map |
| 761 |
| 762 @property |
| 763 def stack_frame_map(self): |
| 764 return self._stack_frame_map |
| 765 |
| 766 @property |
| 767 def type_name_map(self): |
| 768 return self._type_name_map |
| 769 |
| 770 def ApplyModifications(self): |
| 771 """Calls ApplyModifications() on contained wrappers.""" |
| 772 if self._heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY: |
| 773 self._stack_frame_map.ApplyModifications(None) |
| 774 else: |
| 775 if self._stack_frame_map.modified or self._type_name_map.modified: |
| 776 self._string_map.Clear() |
| 777 self._stack_frame_map.ApplyModifications(self._string_map, force=True) |
| 778 self._type_name_map.ApplyModifications(self._string_map, force=True) |
| 779 self._string_map.ApplyModifications() |
| 780 |
| 781 def __init__(self, trace_node): |
| 782 self._trace_node = trace_node |
| 783 self._processes = [] |
| 784 self._heap_dump_version = None |
| 785 |
| 786 # Misc per-process information needed only during parsing. |
| 787 class ProcessExt(object): |
| 788 def __init__(self, pid): |
| 789 self.process = Trace.Process(pid) |
| 790 self.mapped_entry_names = set() |
| 791 self.process_mmaps_node = None |
| 792 self.seen_strings_node = False |
| 793 |
| 794 process_ext_by_pid = {} |
| 795 |
| 796 # Android traces produced via 'chrome://inspect/?tracing#devices' are |
| 797 # just list of events. |
| 798 events = trace_node if isinstance(trace_node, list) \ |
| 799 else trace_node['traceEvents'] |
| 800 for event in events: |
| 801 name = event.get('name') |
| 802 if not name: |
| 803 continue |
| 804 |
| 805 pid = event['pid'] |
| 806 process_ext = process_ext_by_pid.get(pid) |
| 807 if process_ext is None: |
| 808 process_ext = ProcessExt(pid) |
| 809 process_ext_by_pid[pid] = process_ext |
| 810 process = process_ext.process |
| 811 |
| 812 phase = event['ph'] |
| 813 if phase == self._EVENT_PHASE_METADATA: |
| 814 if name == 'process_name': |
| 815 process._name = event['args']['name'] |
| 816 elif name == 'stackFrames': |
| 817 process._stack_frame_map.ParseNext( |
| 818 self._UseHeapDumpVersion(self.HEAP_DUMP_VERSION_LEGACY), |
| 819 event['args']['stackFrames'], |
| 820 process._string_map) |
| 821 elif phase == self._EVENT_PHASE_MEMORY_DUMP: |
| 822 dumps = event['args']['dumps'] |
| 823 process_mmaps = dumps.get('process_mmaps') |
| 824 if process_mmaps: |
| 825 # We want the most recent memory map, so parsing happens later |
| 826 # once we finished reading all events. |
| 827 process_ext.process_mmaps_node = process_mmaps |
| 828 heaps = dumps.get('heaps_v2') |
| 829 if heaps: |
| 830 version = self._UseHeapDumpVersion(heaps['version']) |
| 831 maps = heaps.get('maps') |
| 832 if maps: |
| 833 process_ext.mapped_entry_names.update(maps.iterkeys()) |
| 834 types = maps.get('types') |
| 835 stack_frames = maps.get('nodes') |
| 836 strings = maps.get('strings') |
| 837 if (strings is None and (types or stack_frames) |
| 838 and not process_ext.seen_strings_node): |
| 839 # ApplyModifications() for TypeNameMap and StackFrameMap puts |
| 840 # everything into the first node and depends on StringMap. So |
| 841 # we need to make sure that 'strings' node is there if any of |
| 842 # other two nodes present. |
| 843 strings = [] |
| 844 maps['strings'] = strings |
| 845 if strings is not None: |
| 846 process_ext.seen_strings_node = True |
| 847 process._string_map.ParseNext(version, strings) |
| 848 if types: |
| 849 process._type_name_map.ParseNext( |
| 850 version, types, process._string_map) |
| 851 if stack_frames: |
| 852 process._stack_frame_map.ParseNext( |
| 853 version, stack_frames, process._string_map) |
| 854 |
| 855 self._processes = [] |
| 856 for pe in process_ext_by_pid.itervalues(): |
| 857 pe.process._heap_dump_version = self._heap_dump_version |
| 858 if pe.process_mmaps_node: |
| 859 # Now parse the most recent memory map. |
| 860 pe.process._memory_map = MemoryMap(pe.process_mmaps_node) |
| 861 self._processes.append(pe.process) |
| 862 |
| 863 @property |
| 864 def node(self): |
| 865 """Root node (that was passed to the __init__).""" |
| 866 return self._trace_node |
| 867 |
| 868 @property |
| 869 def modified(self): |
| 870 """Returns True if trace file needs to be updated. |
| 871 |
| 872 Before writing trace JSON back to a file ApplyModifications() needs |
| 873 to be called. |
| 874 """ |
| 875 return any(p.modified for p in self._processes) |
| 876 |
| 877 @property |
| 878 def processes(self): |
| 879 return self._processes |
| 880 |
| 881 @property |
| 882 def heap_dump_version(self): |
| 883 return self._heap_dump_version |
| 884 |
| 885 def ApplyModifications(self): |
| 886 """Propagates modifications back to the trace JSON.""" |
| 887 for process in self._processes: |
| 888 process.ApplyModifications() |
| 889 assert not self.modified, 'still modified' |
| 890 |
| 891 # Relevant trace event phases from Chromium's |
| 892 # src/base/trace_event/common/trace_event_common.h. |
| 893 _EVENT_PHASE_METADATA = 'M' |
| 894 _EVENT_PHASE_MEMORY_DUMP = 'v' |
| 895 |
| 896 def _UseHeapDumpVersion(self, version): |
| 897 if self._heap_dump_version is None: |
| 898 self._heap_dump_version = version |
| 899 return version |
| 900 elif self._heap_dump_version != version: |
| 901 raise Exception( |
| 902 ("Inconsistent trace file: first saw '{}' heap dump version, " |
| 903 "then '{}'.").format(self._heap_dump_version, version)) |
| 904 else: |
| 905 return version |
| 360 | 906 |
| 361 | 907 |
| 362 class SymbolizableFile(object): | 908 class SymbolizableFile(object): |
| 363 """Holds file path, addresses to symbolize and stack frames to update. | 909 """Holds file path, addresses to symbolize and stack frames to update. |
| 364 | 910 |
| 365 This class is a link between ELFSymbolizer and a trace file: it specifies | 911 This class is a link between ELFSymbolizer and a trace file: it specifies |
| 366 what to symbolize (addresses) and what to update with the symbolization | 912 what to symbolize (addresses) and what to update with the symbolization |
| 367 result (frames). | 913 result (frames). |
| 368 """ | 914 """ |
| 369 def __init__(self, file_path): | 915 def __init__(self, file_path): |
| 370 self.path = file_path | 916 self.path = file_path |
| 371 self.symbolizable_path = file_path # path to use for symbolization | 917 self.symbolizable_path = file_path # path to use for symbolization |
| 372 self.frames_by_address = collections.defaultdict(list) | 918 self.frames_by_address = collections.defaultdict(list) |
| 373 | 919 |
| 374 | 920 |
| 375 def ResolveSymbolizableFiles(processes): | 921 def ResolveSymbolizableFiles(processes): |
| 376 """Resolves and groups PCs into list of SymbolizableFiles. | 922 """Resolves and groups PCs into list of SymbolizableFiles. |
| 377 | 923 |
| 378 As part of the grouping process, this function resolves PC from each stack | 924 As part of the grouping process, this function resolves PC from each stack |
| 379 frame to the corresponding mmap region. Stack frames that failed to resolve | 925 frame to the corresponding mmap region. Stack frames that failed to resolve |
| 380 are symbolized with '<unresolved>'. | 926 are symbolized with '<unresolved>'. |
| 381 """ | 927 """ |
| 382 symfile_by_path = {} | 928 symfile_by_path = {} |
| 383 for process in processes: | 929 for process in processes: |
| 384 for frame in process.stack_frames.pc_frames: | 930 if not process.memory_map: |
| 385 region = process.mmaps.FindRegion(frame.pc) | 931 continue |
| 932 for frame in process.stack_frame_map.frame_by_id.itervalues(): |
| 933 if frame.pc is None: |
| 934 continue |
| 935 region = process.memory_map.FindRegion(frame.pc) |
| 386 if region is None: | 936 if region is None: |
| 387 frame.name = '<unresolved>' | 937 frame.name = '<unresolved>' |
| 388 continue | 938 continue |
| 389 | 939 |
| 390 symfile = symfile_by_path.get(region.file_path) | 940 symfile = symfile_by_path.get(region.file_path) |
| 391 if symfile is None: | 941 if symfile is None: |
| 392 symfile = SymbolizableFile(region.file_path) | 942 symfile = SymbolizableFile(region.file_path) |
| 393 symfile_by_path[symfile.path] = symfile | 943 symfile_by_path[symfile.path] = symfile |
| 394 | 944 |
| 395 relative_pc = frame.pc - region.start_address | 945 relative_pc = frame.pc - region.start_address |
| 396 symfile.frames_by_address[relative_pc].append(frame) | 946 symfile.frames_by_address[relative_pc].append(frame) |
| 397 return symfile_by_path.values() | 947 return symfile_by_path.values() |
| 398 | 948 |
| 399 | 949 |
| 950 def FindInSystemPath(binary_name): |
| 951 paths = os.environ['PATH'].split(os.pathsep) |
| 952 for path in paths: |
| 953 binary_path = os.path.join(path, binary_name) |
| 954 if os.path.isfile(binary_path): |
| 955 return binary_path |
| 956 return None |
| 957 |
| 958 |
| 959 class Symbolizer(object): |
| 960 """Encapsulates platform-specific symbolization logic.""" |
| 961 |
| 962 def __init__(self): |
| 963 self.is_mac = sys.platform == 'darwin' |
| 964 self.is_win = sys.platform == 'win32' |
| 965 if self.is_mac: |
| 966 self.binary = 'atos' |
| 967 self._matcher = symbolize_trace_atos_regex.AtosRegexMatcher() |
| 968 elif self.is_win: |
| 969 self.binary = 'addr2line-pdb.exe' |
| 970 else: |
| 971 self.binary = 'addr2line' |
| 972 self.symbolizer_path = FindInSystemPath(self.binary) |
| 973 |
| 974 def _SymbolizeLinuxAndAndroid(self, symfile, unsymbolized_name): |
| 975 def _SymbolizerCallback(sym_info, frames): |
| 976 # Unwind inline chain to the top. |
| 977 while sym_info.inlined_by: |
| 978 sym_info = sym_info.inlined_by |
| 979 |
| 980 symbolized_name = sym_info.name if sym_info.name else unsymbolized_name |
| 981 for frame in frames: |
| 982 frame.name = symbolized_name |
| 983 frame.ext.source_path = sym_info.source_path |
| 984 |
| 985 symbolizer = elf_symbolizer.ELFSymbolizer(symfile.symbolizable_path, |
| 986 self.symbolizer_path, |
| 987 _SymbolizerCallback, |
| 988 inlines=True) |
| 989 |
| 990 for address, frames in symfile.frames_by_address.iteritems(): |
| 991 # SymbolizeAsync() asserts that the type of address is int. We operate |
| 992 # on longs (since they are raw pointers possibly from 64-bit processes). |
| 993 # It's OK to cast here because we're passing relative PC, which should |
| 994 # always fit into int. |
| 995 symbolizer.SymbolizeAsync(int(address), frames) |
| 996 |
| 997 symbolizer.Join() |
| 998 |
| 999 |
| 1000 def _SymbolizeMac(self, symfile): |
| 1001 chars_max = int(subprocess.check_output("getconf ARG_MAX", shell=True)) |
| 1002 |
| 1003 # 16 for the address, 2 for "0x", 1 for the space |
| 1004 chars_per_address = 19 |
| 1005 |
| 1006 load_address = (symbolize_trace_macho_reader. |
| 1007 ReadMachOTextLoadAddress(symfile.symbolizable_path)) |
| 1008 assert load_address is not None |
| 1009 |
| 1010 cmd_base = [self.symbolizer_path, '-arch', 'x86_64', '-l', |
| 1011 '0x%x' % load_address, '-o', |
| 1012 symfile.symbolizable_path] |
| 1013 chars_for_other_arguments = len(' '.join(cmd_base)) + 1 |
| 1014 |
| 1015 # The maximum number of inputs that can be processed at once is limited by |
| 1016 # ARG_MAX. This currently evalutes to ~13000 on macOS. |
| 1017 max_inputs = (chars_max - chars_for_other_arguments) / chars_per_address |
| 1018 |
| 1019 all_keys = symfile.frames_by_address.keys() |
| 1020 processed_keys_count = 0 |
| 1021 while len(all_keys): |
| 1022 input_count = min(len(all_keys), max_inputs) |
| 1023 keys_to_process = all_keys[0:input_count] |
| 1024 cmd = list(cmd_base) |
| 1025 cmd.extend([hex(int(x) + load_address) |
| 1026 for x in keys_to_process]) |
| 1027 output_array = subprocess.check_output(cmd).split('\n') |
| 1028 for i in range(len(keys_to_process)): |
| 1029 for frame in (symfile.frames_by_address.values() |
| 1030 [i + processed_keys_count]): |
| 1031 frame.name = self._matcher.Match(output_array[i]) |
| 1032 processed_keys_count += len(keys_to_process) |
| 1033 all_keys = all_keys[input_count:] |
| 1034 |
| 1035 def _SymbolizeWin(self, symfile): |
| 1036 """Invoke symbolizer binary on windows and write all input in one go. |
| 1037 |
| 1038 Unlike linux, on windows, symbolization talks through a shared system |
| 1039 service that handles communication with the NT symbol servers. This |
| 1040 creates an explicit serialization (and therefor lock contention) of |
| 1041 any process using the symbol API for files do not have a local PDB. |
| 1042 |
| 1043 Thus, even though the windows symbolizer binary can be make command line |
| 1044 compatible with the POSIX addr2line interface, paralellizing the |
| 1045 symbolization does not yield the same performance effects. Running |
| 1046 just one symbolizer seems good enough for now. Can optimize later |
| 1047 if this becomes a bottleneck. |
| 1048 """ |
| 1049 cmd = [self.symbolizer_path, '--functions', '--demangle', '--exe', |
| 1050 symfile.symbolizable_path] |
| 1051 |
| 1052 proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stdin=subprocess.PIPE, |
| 1053 stderr=sys.stderr) |
| 1054 addrs = ["%x" % relative_pc for relative_pc in |
| 1055 symfile.frames_by_address.keys()] |
| 1056 (stdout_data, stderr_data) = proc.communicate('\n'.join(addrs)) |
| 1057 stdout_data = stdout_data.split('\n') |
| 1058 |
| 1059 # This is known to be in the same order as stderr_data. |
| 1060 for i, addr in enumerate(addrs): |
| 1061 for frame in symfile.frames_by_address[int(addr, 16)]: |
| 1062 # Output of addr2line with --functions is always 2 outputs per |
| 1063 # symbol, function name followed by source line number. Only grab |
| 1064 # the function name as line info is not always available. |
| 1065 frame.name = stdout_data[i * 2] |
| 1066 |
| 1067 def Symbolize(self, symfile, unsymbolized_name): |
| 1068 if self.is_mac: |
| 1069 self._SymbolizeMac(symfile) |
| 1070 elif self.is_win: |
| 1071 self._SymbolizeWin(symfile) |
| 1072 else: |
| 1073 self._SymbolizeLinuxAndAndroid(symfile, unsymbolized_name) |
| 1074 |
| 1075 def IsSymbolizableFile(self, file_path): |
| 1076 if self.is_win: |
| 1077 extension = os.path.splitext(file_path)[1].lower() |
| 1078 return extension in ['.dll', '.exe'] |
| 1079 else: |
| 1080 result = subprocess.check_output(['file', '-0', file_path]) |
| 1081 type_string = result[result.find('\0') + 1:] |
| 1082 return bool(re.match(r'.*(ELF|Mach-O) (32|64)-bit\b.*', |
| 1083 type_string, re.DOTALL)) |
| 1084 |
| 1085 |
| 400 def SymbolizeFiles(symfiles, symbolizer): | 1086 def SymbolizeFiles(symfiles, symbolizer): |
| 401 """Symbolizes each file in the given list of SymbolizableFiles | 1087 """Symbolizes each file in the given list of SymbolizableFiles |
| 402 and updates stack frames with symbolization results.""" | 1088 and updates stack frames with symbolization results.""" |
| 1089 |
| 1090 if not symfiles: |
| 1091 print 'Nothing to symbolize.' |
| 1092 return |
| 1093 |
| 403 print 'Symbolizing...' | 1094 print 'Symbolizing...' |
| 404 | 1095 |
| 405 def _SubPrintf(message, *args): | 1096 def _SubPrintf(message, *args): |
| 406 print (' ' + message).format(*args) | 1097 print (' ' + message).format(*args) |
| 407 | 1098 |
| 408 symbolized = False | |
| 409 for symfile in symfiles: | 1099 for symfile in symfiles: |
| 410 unsymbolized_name = '<{}>'.format( | 1100 unsymbolized_name = '<{}>'.format( |
| 411 symfile.path if symfile.path else 'unnamed') | 1101 symfile.path if symfile.path else 'unnamed') |
| 412 | 1102 |
| 413 problem = None | 1103 problem = None |
| 414 if not os.path.isabs(symfile.symbolizable_path): | 1104 if not os.path.isabs(symfile.symbolizable_path): |
| 415 problem = 'not a file' | 1105 problem = 'not a file' |
| 416 elif not os.path.isfile(symfile.symbolizable_path): | 1106 elif not os.path.isfile(symfile.symbolizable_path): |
| 417 problem = "file doesn't exist" | 1107 problem = "file doesn't exist" |
| 418 elif not symbolizer.IsSymbolizableFile(symfile.symbolizable_path): | 1108 elif not symbolizer.IsSymbolizableFile(symfile.symbolizable_path): |
| 419 problem = 'file is not symbolizable' | 1109 problem = 'file is not symbolizable' |
| 420 if problem: | 1110 if problem: |
| 421 _SubPrintf("Won't symbolize {} PCs for '{}': {}.", | 1111 _SubPrintf("Won't symbolize {} PCs for '{}': {}.", |
| 422 len(symfile.frames_by_address), | 1112 len(symfile.frames_by_address), |
| 423 symfile.symbolizable_path, | 1113 symfile.symbolizable_path, |
| 424 problem) | 1114 problem) |
| 425 for frames in symfile.frames_by_address.itervalues(): | 1115 for frames in symfile.frames_by_address.itervalues(): |
| 426 for frame in frames: | 1116 for frame in frames: |
| 427 frame.name = unsymbolized_name | 1117 frame.name = unsymbolized_name |
| 428 continue | 1118 continue |
| 429 | 1119 |
| 430 _SubPrintf('Symbolizing {} PCs from {}...', | 1120 _SubPrintf('Symbolizing {} PCs from {}...', |
| 431 len(symfile.frames_by_address), | 1121 len(symfile.frames_by_address), |
| 432 symfile.path) | 1122 symfile.path) |
| 433 | 1123 |
| 434 symbolizer.Symbolize(symfile, unsymbolized_name) | 1124 symbolizer.Symbolize(symfile, unsymbolized_name) |
| 435 symbolized = True | |
| 436 | 1125 |
| 437 return symbolized | 1126 |
| 1127 # Matches Android library paths, supports both K (/data/app-lib/<>/lib.so) |
| 1128 # as well as L+ (/data/app/<>/lib/<>/lib.so). Library name is available |
| 1129 # via 'name' group. |
| 1130 ANDROID_PATH_MATCHER = re.compile( |
| 1131 r'^/data/(?:' |
| 1132 r'app/[^/]+/lib/[^/]+/|' |
| 1133 r'app-lib/[^/]+/|' |
| 1134 r'data/[^/]+/incremental-install-files/lib/' |
| 1135 r')(?P<name>.*\.so)') |
| 1136 |
| 1137 # Subpath of output path where unstripped libraries are stored. |
| 1138 ANDROID_UNSTRIPPED_SUBPATH = 'lib.unstripped' |
| 438 | 1139 |
| 439 | 1140 |
| 440 def HaveFilesFromAndroid(symfiles): | 1141 def HaveFilesFromAndroid(symfiles): |
| 441 return any(ANDROID_PATH_MATCHER.match(f.path) for f in symfiles) | 1142 return any(ANDROID_PATH_MATCHER.match(f.path) for f in symfiles) |
| 442 | 1143 |
| 443 | 1144 |
| 444 def RemapAndroidFiles(symfiles, output_path): | 1145 def RemapAndroidFiles(symfiles, output_path): |
| 445 for symfile in symfiles: | 1146 for symfile in symfiles: |
| 446 match = ANDROID_PATH_MATCHER.match(symfile.path) | 1147 match = ANDROID_PATH_MATCHER.match(symfile.path) |
| 447 if match: | 1148 if match: |
| 448 name = match.group('name') | 1149 name = match.group('name') |
| 449 symfile.symbolizable_path = os.path.join( | 1150 symfile.symbolizable_path = os.path.join( |
| 450 output_path, ANDROID_UNSTRIPPED_SUBPATH, name) | 1151 output_path, ANDROID_UNSTRIPPED_SUBPATH, name) |
| 451 else: | 1152 else: |
| 452 # Clobber file path to trigger "not a file" problem in SymbolizeFiles(). | 1153 # Clobber file path to trigger "not a file" problem in SymbolizeFiles(). |
| 453 # Without this, files won't be symbolized with "file not found" problem, | 1154 # Without this, files won't be symbolized with "file not found" problem, |
| 454 # which is not accurate. | 1155 # which is not accurate. |
| 455 symfile.symbolizable_path = 'android://{}'.format(symfile.path) | 1156 symfile.symbolizable_path = 'android://{}'.format(symfile.path) |
| 456 | 1157 |
| 457 | 1158 |
| 1159 def Symbolize(options, trace, symbolizer): |
| 1160 symfiles = ResolveSymbolizableFiles(trace.processes) |
| 1161 |
| 1162 # Android trace files don't have any indication they are from Android. |
| 1163 # So we're checking for Android-specific paths. |
| 1164 if HaveFilesFromAndroid(symfiles): |
| 1165 if not options.output_directory: |
| 1166 sys.exit('The trace file appears to be from Android. Please ' |
| 1167 'specify output directory to properly symbolize it.') |
| 1168 RemapAndroidFiles(symfiles, os.path.abspath(options.output_directory)) |
| 1169 |
| 1170 SymbolizeFiles(symfiles, symbolizer) |
| 1171 |
| 1172 |
| 1173 def OpenTraceFile(file_path, mode): |
| 1174 if file_path.endswith('.gz'): |
| 1175 return gzip.open(file_path, mode + 'b') |
| 1176 else: |
| 1177 return open(file_path, mode + 't') |
| 1178 |
| 1179 |
| 458 # Suffix used for backup files. | 1180 # Suffix used for backup files. |
| 459 BACKUP_FILE_TAG = '.BACKUP' | 1181 BACKUP_FILE_TAG = '.BACKUP' |
| 460 | 1182 |
| 461 def main(): | 1183 def main(): |
| 462 parser = argparse.ArgumentParser() | 1184 parser = argparse.ArgumentParser() |
| 463 parser.add_argument('file', | 1185 parser.add_argument( |
| 464 help='Trace file to symbolize (.json or .json.gz)') | 1186 'file', |
| 465 parser.add_argument('--no-backup', | 1187 help='Trace file to symbolize (.json or .json.gz)') |
| 466 dest='backup', default='true', action='store_false', | |
| 467 help="Don't create {} files".format(BACKUP_FILE_TAG)) | |
| 468 parser.add_argument('--output-directory', | |
| 469 help='The path to the build output directory, such ' + | |
| 470 'as out/Debug. Only needed for Android.') | |
| 471 options = parser.parse_args() | |
| 472 | 1188 |
| 473 trace_file_path = options.file | 1189 parser.add_argument( |
| 474 def _OpenTraceFile(mode): | 1190 '--no-backup', dest='backup', default='true', action='store_false', |
| 475 if trace_file_path.endswith('.gz'): | 1191 help="Don't create {} files".format(BACKUP_FILE_TAG)) |
| 476 return gzip.open(trace_file_path, mode + 'b') | 1192 |
| 477 else: | 1193 parser.add_argument( |
| 478 return open(trace_file_path, mode + 't') | 1194 '--output-directory', |
| 1195 help='The path to the build output directory, such as out/Debug.') |
| 479 | 1196 |
| 480 symbolizer = Symbolizer() | 1197 symbolizer = Symbolizer() |
| 481 if symbolizer.symbolizer_path is None: | 1198 if symbolizer.symbolizer_path is None: |
| 482 sys.exit("Can't symbolize - no %s in PATH." % symbolizer.binary) | 1199 sys.exit("Can't symbolize - no %s in PATH." % symbolizer.binary) |
| 483 | 1200 |
| 1201 options = parser.parse_args() |
| 1202 |
| 1203 trace_file_path = options.file |
| 1204 |
| 484 print 'Reading trace file...' | 1205 print 'Reading trace file...' |
| 485 with _OpenTraceFile('r') as trace_file: | 1206 with OpenTraceFile(trace_file_path, 'r') as trace_file: |
| 486 trace = json.load(trace_file) | 1207 trace = Trace(json.load(trace_file)) |
| 487 | 1208 |
| 488 processes = CollectProcesses(trace) | 1209 Symbolize(options, trace, symbolizer) |
| 489 symfiles = ResolveSymbolizableFiles(processes) | |
| 490 | 1210 |
| 491 # Android trace files don't have any indication they are from Android. | 1211 if trace.modified: |
| 492 # So we're checking for Android-specific paths. | 1212 trace.ApplyModifications() |
| 493 if HaveFilesFromAndroid(symfiles): | |
| 494 if not options.output_directory: | |
| 495 parser.error('The trace file appears to be from Android. Please ' | |
| 496 "specify output directory (e.g. 'out/Debug') to properly " | |
| 497 'symbolize it.') | |
| 498 RemapAndroidFiles(symfiles, os.path.abspath(options.output_directory)) | |
| 499 | 1213 |
| 500 if SymbolizeFiles(symfiles, symbolizer): | |
| 501 if options.backup: | 1214 if options.backup: |
| 502 backup_file_path = trace_file_path + BACKUP_FILE_TAG | 1215 backup_file_path = trace_file_path + BACKUP_FILE_TAG |
| 503 print 'Backing up trace file to {}...'.format(backup_file_path) | 1216 print 'Backing up trace file to {}'.format(backup_file_path) |
| 504 os.rename(trace_file_path, backup_file_path) | 1217 os.rename(trace_file_path, backup_file_path) |
| 505 | 1218 |
| 506 print 'Updating trace file...' | 1219 print 'Updating the trace file...' |
| 507 with _OpenTraceFile('w') as trace_file: | 1220 with OpenTraceFile(trace_file_path, 'w') as trace_file: |
| 508 json.dump(trace, trace_file) | 1221 json.dump(trace.node, trace_file) |
| 509 else: | 1222 else: |
| 510 print 'No PCs symbolized - not updating trace file.' | 1223 print 'No modifications were made - not updating the trace file.' |
| 511 | 1224 |
| 512 | 1225 |
| 513 if __name__ == '__main__': | 1226 if __name__ == '__main__': |
| 514 main() | 1227 main() |
| OLD | NEW |