| OLD | NEW |
| (Empty) |
| 1 #!/usr/bin/env python | |
| 2 # Copyright 2014 The Chromium Authors. All rights reserved. | |
| 3 # Use of this source code is governed by a BSD-style license that can be | |
| 4 # found in the LICENSE file. | |
| 5 | |
| 6 """Generate a spatial analysis against an arbitrary library. | |
| 7 | |
| 8 To use, build the 'binary_size_tool' target. Then run this tool, passing | |
| 9 in the location of the library to be analyzed along with any other options | |
| 10 you desire. | |
| 11 """ | |
| 12 | |
| 13 import collections | |
| 14 import json | |
| 15 import logging | |
| 16 import multiprocessing | |
| 17 import optparse | |
| 18 import os | |
| 19 import re | |
| 20 import shutil | |
| 21 import struct | |
| 22 import subprocess | |
| 23 import sys | |
| 24 import tempfile | |
| 25 import time | |
| 26 | |
| 27 import binary_size_utils | |
| 28 | |
| 29 # This path change is not beautiful. Temporary (I hope) measure until | |
| 30 # the chromium project has figured out a proper way to organize the | |
| 31 # library of python tools. http://crbug.com/375725 | |
| 32 elf_symbolizer_path = os.path.abspath(os.path.join( | |
| 33 os.path.dirname(__file__), | |
| 34 '..', | |
| 35 '..', | |
| 36 'build', | |
| 37 'android', | |
| 38 'pylib')) | |
| 39 sys.path.append(elf_symbolizer_path) | |
| 40 import symbols.elf_symbolizer as elf_symbolizer # pylint: disable=F0401 | |
| 41 | |
| 42 | |
| 43 # Node dictionary keys. These are output in json read by the webapp so | |
| 44 # keep them short to save file size. | |
| 45 # Note: If these change, the webapp must also change. | |
| 46 NODE_TYPE_KEY = 'k' | |
| 47 NODE_NAME_KEY = 'n' | |
| 48 NODE_CHILDREN_KEY = 'children' | |
| 49 NODE_SYMBOL_TYPE_KEY = 't' | |
| 50 NODE_SYMBOL_SIZE_KEY = 'value' | |
| 51 NODE_MAX_DEPTH_KEY = 'maxDepth' | |
| 52 NODE_LAST_PATH_ELEMENT_KEY = 'lastPathElement' | |
| 53 | |
| 54 # The display name of the bucket where we put symbols without path. | |
| 55 NAME_NO_PATH_BUCKET = '(No Path)' | |
| 56 | |
| 57 # Try to keep data buckets smaller than this to avoid killing the | |
| 58 # graphing lib. | |
| 59 BIG_BUCKET_LIMIT = 3000 | |
| 60 | |
| 61 | |
| 62 def _MkChild(node, name): | |
| 63 child = node[NODE_CHILDREN_KEY].get(name) | |
| 64 if child is None: | |
| 65 child = {NODE_NAME_KEY: name, | |
| 66 NODE_CHILDREN_KEY: {}} | |
| 67 node[NODE_CHILDREN_KEY][name] = child | |
| 68 return child | |
| 69 | |
| 70 | |
| 71 | |
| 72 def SplitNoPathBucket(node): | |
| 73 """NAME_NO_PATH_BUCKET can be too large for the graphing lib to | |
| 74 handle. Split it into sub-buckets in that case.""" | |
| 75 root_children = node[NODE_CHILDREN_KEY] | |
| 76 if NAME_NO_PATH_BUCKET in root_children: | |
| 77 no_path_bucket = root_children[NAME_NO_PATH_BUCKET] | |
| 78 old_children = no_path_bucket[NODE_CHILDREN_KEY] | |
| 79 count = 0 | |
| 80 for symbol_type, symbol_bucket in old_children.iteritems(): | |
| 81 count += len(symbol_bucket[NODE_CHILDREN_KEY]) | |
| 82 if count > BIG_BUCKET_LIMIT: | |
| 83 new_children = {} | |
| 84 no_path_bucket[NODE_CHILDREN_KEY] = new_children | |
| 85 current_bucket = None | |
| 86 index = 0 | |
| 87 for symbol_type, symbol_bucket in old_children.iteritems(): | |
| 88 for symbol_name, value in symbol_bucket[NODE_CHILDREN_KEY].iteritems(): | |
| 89 if index % BIG_BUCKET_LIMIT == 0: | |
| 90 group_no = (index / BIG_BUCKET_LIMIT) + 1 | |
| 91 current_bucket = _MkChild(no_path_bucket, | |
| 92 '%s subgroup %d' % (NAME_NO_PATH_BUCKET, | |
| 93 group_no)) | |
| 94 assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'p' | |
| 95 node[NODE_TYPE_KEY] = 'p' # p for path | |
| 96 index += 1 | |
| 97 symbol_size = value[NODE_SYMBOL_SIZE_KEY] | |
| 98 AddSymbolIntoFileNode(current_bucket, symbol_type, | |
| 99 symbol_name, symbol_size) | |
| 100 | |
| 101 | |
| 102 def MakeChildrenDictsIntoLists(node): | |
| 103 largest_list_len = 0 | |
| 104 if NODE_CHILDREN_KEY in node: | |
| 105 largest_list_len = len(node[NODE_CHILDREN_KEY]) | |
| 106 child_list = [] | |
| 107 for child in node[NODE_CHILDREN_KEY].itervalues(): | |
| 108 child_largest_list_len = MakeChildrenDictsIntoLists(child) | |
| 109 if child_largest_list_len > largest_list_len: | |
| 110 largest_list_len = child_largest_list_len | |
| 111 child_list.append(child) | |
| 112 node[NODE_CHILDREN_KEY] = child_list | |
| 113 | |
| 114 return largest_list_len | |
| 115 | |
| 116 | |
| 117 def AddSymbolIntoFileNode(node, symbol_type, symbol_name, symbol_size): | |
| 118 """Puts symbol into the file path node |node|. | |
| 119 Returns the number of added levels in tree. I.e. returns 2.""" | |
| 120 | |
| 121 # 'node' is the file node and first step is to find its symbol-type bucket. | |
| 122 node[NODE_LAST_PATH_ELEMENT_KEY] = True | |
| 123 node = _MkChild(node, symbol_type) | |
| 124 assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'b' | |
| 125 node[NODE_SYMBOL_TYPE_KEY] = symbol_type | |
| 126 node[NODE_TYPE_KEY] = 'b' # b for bucket | |
| 127 | |
| 128 # 'node' is now the symbol-type bucket. Make the child entry. | |
| 129 node = _MkChild(node, symbol_name) | |
| 130 if NODE_CHILDREN_KEY in node: | |
| 131 if node[NODE_CHILDREN_KEY]: | |
| 132 logging.warning('A container node used as symbol for %s.' % symbol_name) | |
| 133 # This is going to be used as a leaf so no use for child list. | |
| 134 del node[NODE_CHILDREN_KEY] | |
| 135 node[NODE_SYMBOL_SIZE_KEY] = symbol_size | |
| 136 node[NODE_SYMBOL_TYPE_KEY] = symbol_type | |
| 137 node[NODE_TYPE_KEY] = 's' # s for symbol | |
| 138 | |
| 139 return 2 # Depth of the added subtree. | |
| 140 | |
| 141 | |
| 142 def MakeCompactTree(symbols, symbol_path_origin_dir): | |
| 143 result = {NODE_NAME_KEY: '/', | |
| 144 NODE_CHILDREN_KEY: {}, | |
| 145 NODE_TYPE_KEY: 'p', | |
| 146 NODE_MAX_DEPTH_KEY: 0} | |
| 147 seen_symbol_with_path = False | |
| 148 cwd = os.path.abspath(os.getcwd()) | |
| 149 for symbol_name, symbol_type, symbol_size, file_path, _address in symbols: | |
| 150 | |
| 151 if 'vtable for ' in symbol_name: | |
| 152 symbol_type = '@' # hack to categorize these separately | |
| 153 # Take path like '/foo/bar/baz', convert to ['foo', 'bar', 'baz'] | |
| 154 if file_path and file_path != "??": | |
| 155 file_path = os.path.abspath(os.path.join(symbol_path_origin_dir, | |
| 156 file_path)) | |
| 157 # Let the output structure be relative to $CWD if inside $CWD, | |
| 158 # otherwise relative to the disk root. This is to avoid | |
| 159 # unnecessary click-through levels in the output. | |
| 160 if file_path.startswith(cwd + os.sep): | |
| 161 file_path = file_path[len(cwd):] | |
| 162 if file_path.startswith('/'): | |
| 163 file_path = file_path[1:] | |
| 164 seen_symbol_with_path = True | |
| 165 else: | |
| 166 file_path = NAME_NO_PATH_BUCKET | |
| 167 | |
| 168 path_parts = file_path.split('/') | |
| 169 | |
| 170 # Find pre-existing node in tree, or update if it already exists | |
| 171 node = result | |
| 172 depth = 0 | |
| 173 while len(path_parts) > 0: | |
| 174 path_part = path_parts.pop(0) | |
| 175 if len(path_part) == 0: | |
| 176 continue | |
| 177 depth += 1 | |
| 178 node = _MkChild(node, path_part) | |
| 179 assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'p' | |
| 180 node[NODE_TYPE_KEY] = 'p' # p for path | |
| 181 | |
| 182 depth += AddSymbolIntoFileNode(node, symbol_type, symbol_name, symbol_size) | |
| 183 result[NODE_MAX_DEPTH_KEY] = max(result[NODE_MAX_DEPTH_KEY], depth) | |
| 184 | |
| 185 if not seen_symbol_with_path: | |
| 186 logging.warning('Symbols lack paths. Data will not be structured.') | |
| 187 | |
| 188 # The (no path) bucket can be extremely large if we failed to get | |
| 189 # path information. Split it into subgroups if needed. | |
| 190 SplitNoPathBucket(result) | |
| 191 | |
| 192 largest_list_len = MakeChildrenDictsIntoLists(result) | |
| 193 | |
| 194 if largest_list_len > BIG_BUCKET_LIMIT: | |
| 195 logging.warning('There are sections with %d nodes. ' | |
| 196 'Results might be unusable.' % largest_list_len) | |
| 197 return result | |
| 198 | |
| 199 | |
| 200 def DumpCompactTree(symbols, symbol_path_origin_dir, outfile): | |
| 201 tree_root = MakeCompactTree(symbols, symbol_path_origin_dir) | |
| 202 with open(outfile, 'w') as out: | |
| 203 out.write('var tree_data=') | |
| 204 # Use separators without whitespace to get a smaller file. | |
| 205 json.dump(tree_root, out, separators=(',', ':')) | |
| 206 print('Writing %d bytes json' % os.path.getsize(outfile)) | |
| 207 | |
| 208 | |
| 209 def MakeSourceMap(symbols): | |
| 210 sources = {} | |
| 211 for _sym, _symbol_type, size, path, _address in symbols: | |
| 212 key = None | |
| 213 if path: | |
| 214 key = os.path.normpath(path) | |
| 215 else: | |
| 216 key = '[no path]' | |
| 217 if key not in sources: | |
| 218 sources[key] = {'path': path, 'symbol_count': 0, 'size': 0} | |
| 219 record = sources[key] | |
| 220 record['size'] += size | |
| 221 record['symbol_count'] += 1 | |
| 222 return sources | |
| 223 | |
| 224 | |
| 225 # Regex for parsing "nm" output. A sample line looks like this: | |
| 226 # 0167b39c 00000018 t ACCESS_DESCRIPTION_free /path/file.c:95 | |
| 227 # | |
| 228 # The fields are: address, size, type, name, source location | |
| 229 # Regular expression explained ( see also: https://xkcd.com/208 ): | |
| 230 # ([0-9a-f]{8,}+) The address | |
| 231 # [\s]+ Whitespace separator | |
| 232 # ([0-9a-f]{8,}+) The size. From here on out it's all optional. | |
| 233 # [\s]+ Whitespace separator | |
| 234 # (\S?) The symbol type, which is any non-whitespace char | |
| 235 # [\s*] Whitespace separator | |
| 236 # ([^\t]*) Symbol name, any non-tab character (spaces ok!) | |
| 237 # [\t]? Tab separator | |
| 238 # (.*) The location (filename[:linennum|?][ (discriminator n)] | |
| 239 sNmPattern = re.compile( | |
| 240 r'([0-9a-f]{8,})[\s]+([0-9a-f]{8,})[\s]*(\S?)[\s*]([^\t]*)[\t]?(.*)') | |
| 241 | |
| 242 class Progress(): | |
| 243 def __init__(self): | |
| 244 self.count = 0 | |
| 245 self.skip_count = 0 | |
| 246 self.collisions = 0 | |
| 247 self.time_last_output = time.time() | |
| 248 self.count_last_output = 0 | |
| 249 self.disambiguations = 0 | |
| 250 self.was_ambiguous = 0 | |
| 251 | |
| 252 | |
| 253 def RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary, jobs, | |
| 254 disambiguate, src_path): | |
| 255 nm_output = RunNm(library, nm_binary) | |
| 256 nm_output_lines = nm_output.splitlines() | |
| 257 nm_output_lines_len = len(nm_output_lines) | |
| 258 address_symbol = {} | |
| 259 progress = Progress() | |
| 260 def map_address_symbol(symbol, addr): | |
| 261 progress.count += 1 | |
| 262 if addr in address_symbol: | |
| 263 # 'Collision between %s and %s.' % (str(symbol.name), | |
| 264 # str(address_symbol[addr].name)) | |
| 265 progress.collisions += 1 | |
| 266 else: | |
| 267 if symbol.disambiguated: | |
| 268 progress.disambiguations += 1 | |
| 269 if symbol.was_ambiguous: | |
| 270 progress.was_ambiguous += 1 | |
| 271 | |
| 272 address_symbol[addr] = symbol | |
| 273 | |
| 274 progress_output() | |
| 275 | |
| 276 def progress_output(): | |
| 277 progress_chunk = 100 | |
| 278 if progress.count % progress_chunk == 0: | |
| 279 time_now = time.time() | |
| 280 time_spent = time_now - progress.time_last_output | |
| 281 if time_spent > 1.0: | |
| 282 # Only output at most once per second. | |
| 283 progress.time_last_output = time_now | |
| 284 chunk_size = progress.count - progress.count_last_output | |
| 285 progress.count_last_output = progress.count | |
| 286 if time_spent > 0: | |
| 287 speed = chunk_size / time_spent | |
| 288 else: | |
| 289 speed = 0 | |
| 290 progress_percent = (100.0 * (progress.count + progress.skip_count) / | |
| 291 nm_output_lines_len) | |
| 292 disambiguation_percent = 0 | |
| 293 if progress.disambiguations != 0: | |
| 294 disambiguation_percent = (100.0 * progress.disambiguations / | |
| 295 progress.was_ambiguous) | |
| 296 | |
| 297 sys.stdout.write('\r%.1f%%: Looked up %d symbols (%d collisions, ' | |
| 298 '%d disambiguations where %.1f%% succeeded)' | |
| 299 ' - %.1f lookups/s.' % | |
| 300 (progress_percent, progress.count, progress.collisions, | |
| 301 progress.disambiguations, disambiguation_percent, speed)) | |
| 302 | |
| 303 # In case disambiguation was disabled, we remove the source path (which upon | |
| 304 # being set signals the symbolizer to enable disambiguation) | |
| 305 if not disambiguate: | |
| 306 src_path = None | |
| 307 symbolizer = elf_symbolizer.ELFSymbolizer(library, addr2line_binary, | |
| 308 map_address_symbol, | |
| 309 max_concurrent_jobs=jobs, | |
| 310 source_root_path=src_path) | |
| 311 user_interrupted = False | |
| 312 try: | |
| 313 for line in nm_output_lines: | |
| 314 match = sNmPattern.match(line) | |
| 315 if match: | |
| 316 location = match.group(5) | |
| 317 if not location: | |
| 318 addr = int(match.group(1), 16) | |
| 319 size = int(match.group(2), 16) | |
| 320 if addr in address_symbol: # Already looked up, shortcut | |
| 321 # ELFSymbolizer. | |
| 322 map_address_symbol(address_symbol[addr], addr) | |
| 323 continue | |
| 324 elif size == 0: | |
| 325 # Save time by not looking up empty symbols (do they even exist?) | |
| 326 print('Empty symbol: ' + line) | |
| 327 else: | |
| 328 symbolizer.SymbolizeAsync(addr, addr) | |
| 329 continue | |
| 330 | |
| 331 progress.skip_count += 1 | |
| 332 except KeyboardInterrupt: | |
| 333 user_interrupted = True | |
| 334 print('Interrupting - killing subprocesses. Please wait.') | |
| 335 | |
| 336 try: | |
| 337 symbolizer.Join() | |
| 338 except KeyboardInterrupt: | |
| 339 # Don't want to abort here since we will be finished in a few seconds. | |
| 340 user_interrupted = True | |
| 341 print('Patience you must have my young padawan.') | |
| 342 | |
| 343 print '' | |
| 344 | |
| 345 if user_interrupted: | |
| 346 print('Skipping the rest of the file mapping. ' | |
| 347 'Output will not be fully classified.') | |
| 348 | |
| 349 symbol_path_origin_dir = os.path.dirname(os.path.abspath(library)) | |
| 350 | |
| 351 with open(outfile, 'w') as out: | |
| 352 for line in nm_output_lines: | |
| 353 match = sNmPattern.match(line) | |
| 354 if match: | |
| 355 location = match.group(5) | |
| 356 if not location: | |
| 357 addr = int(match.group(1), 16) | |
| 358 symbol = address_symbol.get(addr) | |
| 359 if symbol is not None: | |
| 360 path = '??' | |
| 361 if symbol.source_path is not None: | |
| 362 path = os.path.abspath(os.path.join(symbol_path_origin_dir, | |
| 363 symbol.source_path)) | |
| 364 line_number = 0 | |
| 365 if symbol.source_line is not None: | |
| 366 line_number = symbol.source_line | |
| 367 out.write('%s\t%s:%d\n' % (line, path, line_number)) | |
| 368 continue | |
| 369 | |
| 370 out.write('%s\n' % line) | |
| 371 | |
| 372 print('%d symbols in the results.' % len(address_symbol)) | |
| 373 | |
| 374 | |
| 375 def RunNm(binary, nm_binary): | |
| 376 cmd = [nm_binary, '-C', '--print-size', '--size-sort', '--reverse-sort', | |
| 377 binary] | |
| 378 nm_process = subprocess.Popen(cmd, | |
| 379 stdout=subprocess.PIPE, | |
| 380 stderr=subprocess.PIPE) | |
| 381 (process_output, err_output) = nm_process.communicate() | |
| 382 | |
| 383 if nm_process.returncode != 0: | |
| 384 if err_output: | |
| 385 raise Exception, err_output | |
| 386 else: | |
| 387 raise Exception, process_output | |
| 388 | |
| 389 return process_output | |
| 390 | |
| 391 | |
| 392 def GetNmSymbols(nm_infile, outfile, library, jobs, verbose, | |
| 393 addr2line_binary, nm_binary, disambiguate, src_path): | |
| 394 if nm_infile is None: | |
| 395 if outfile is None: | |
| 396 outfile = tempfile.NamedTemporaryFile(delete=False).name | |
| 397 | |
| 398 if verbose: | |
| 399 print 'Running parallel addr2line, dumping symbols to ' + outfile | |
| 400 RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary, jobs, | |
| 401 disambiguate, src_path) | |
| 402 | |
| 403 nm_infile = outfile | |
| 404 | |
| 405 elif verbose: | |
| 406 print 'Using nm input from ' + nm_infile | |
| 407 with file(nm_infile, 'r') as infile: | |
| 408 return list(binary_size_utils.ParseNm(infile)) | |
| 409 | |
| 410 | |
| 411 PAK_RESOURCE_ID_TO_STRING = { "inited": False } | |
| 412 | |
| 413 def LoadPakIdsFromResourceFile(filename): | |
| 414 """Given a file name, it loads everything that looks like a resource id | |
| 415 into PAK_RESOURCE_ID_TO_STRING.""" | |
| 416 with open(filename) as resource_header: | |
| 417 for line in resource_header: | |
| 418 if line.startswith("#define "): | |
| 419 line_data = line.split() | |
| 420 if len(line_data) == 3: | |
| 421 try: | |
| 422 resource_number = int(line_data[2]) | |
| 423 resource_name = line_data[1] | |
| 424 PAK_RESOURCE_ID_TO_STRING[resource_number] = resource_name | |
| 425 except ValueError: | |
| 426 pass | |
| 427 | |
| 428 def GetReadablePakResourceName(pak_file, resource_id): | |
| 429 """Pak resources have a numeric identifier. It is not helpful when | |
| 430 trying to locate where footprint is generated. This does its best to | |
| 431 map the number to a usable string.""" | |
| 432 if not PAK_RESOURCE_ID_TO_STRING['inited']: | |
| 433 # Try to find resource header files generated by grit when | |
| 434 # building the pak file. We'll look for files named *resources.h" | |
| 435 # and lines of the type: | |
| 436 # #define MY_RESOURCE_JS 1234 | |
| 437 PAK_RESOURCE_ID_TO_STRING['inited'] = True | |
| 438 gen_dir = os.path.join(os.path.dirname(pak_file), 'gen') | |
| 439 if os.path.isdir(gen_dir): | |
| 440 for dirname, _dirs, files in os.walk(gen_dir): | |
| 441 for filename in files: | |
| 442 if filename.endswith('resources.h'): | |
| 443 LoadPakIdsFromResourceFile(os.path.join(dirname, filename)) | |
| 444 return PAK_RESOURCE_ID_TO_STRING.get(resource_id, | |
| 445 'Pak Resource %d' % resource_id) | |
| 446 | |
| 447 def AddPakData(symbols, pak_file): | |
| 448 """Adds pseudo-symbols from a pak file.""" | |
| 449 pak_file = os.path.abspath(pak_file) | |
| 450 with open(pak_file, 'rb') as pak: | |
| 451 data = pak.read() | |
| 452 | |
| 453 PAK_FILE_VERSION = 4 | |
| 454 HEADER_LENGTH = 2 * 4 + 1 # Two uint32s. (file version, number of entries) | |
| 455 # and one uint8 (encoding of text resources) | |
| 456 INDEX_ENTRY_SIZE = 2 + 4 # Each entry is a uint16 and a uint32. | |
| 457 version, num_entries, _encoding = struct.unpack('<IIB', data[:HEADER_LENGTH]) | |
| 458 assert version == PAK_FILE_VERSION, ('Unsupported pak file ' | |
| 459 'version (%d) in %s. Only ' | |
| 460 'support version %d' % | |
| 461 (version, pak_file, PAK_FILE_VERSION)) | |
| 462 if num_entries > 0: | |
| 463 # Read the index and data. | |
| 464 data = data[HEADER_LENGTH:] | |
| 465 for _ in range(num_entries): | |
| 466 resource_id, offset = struct.unpack('<HI', data[:INDEX_ENTRY_SIZE]) | |
| 467 data = data[INDEX_ENTRY_SIZE:] | |
| 468 _next_id, next_offset = struct.unpack('<HI', data[:INDEX_ENTRY_SIZE]) | |
| 469 resource_size = next_offset - offset | |
| 470 | |
| 471 symbol_name = GetReadablePakResourceName(pak_file, resource_id) | |
| 472 symbol_path = pak_file | |
| 473 symbol_type = 'd' # Data. Approximation. | |
| 474 symbol_size = resource_size | |
| 475 symbols.append((symbol_name, symbol_type, symbol_size, symbol_path)) | |
| 476 | |
| 477 def _find_in_system_path(binary): | |
| 478 """Locate the full path to binary in the system path or return None | |
| 479 if not found.""" | |
| 480 system_path = os.environ["PATH"].split(os.pathsep) | |
| 481 for path in system_path: | |
| 482 binary_path = os.path.join(path, binary) | |
| 483 if os.path.isfile(binary_path): | |
| 484 return binary_path | |
| 485 return None | |
| 486 | |
| 487 def CheckDebugFormatSupport(library, addr2line_binary): | |
| 488 """Kills the program if debug data is in an unsupported format. | |
| 489 | |
| 490 There are two common versions of the DWARF debug formats and | |
| 491 since we are right now transitioning from DWARF2 to newer formats, | |
| 492 it's possible to have a mix of tools that are not compatible. Detect | |
| 493 that and abort rather than produce meaningless output.""" | |
| 494 tool_output = subprocess.check_output([addr2line_binary, '--version']) | |
| 495 version_re = re.compile(r'^GNU [^ ]+ .* (\d+).(\d+).*?$', re.M) | |
| 496 parsed_output = version_re.match(tool_output) | |
| 497 major = int(parsed_output.group(1)) | |
| 498 minor = int(parsed_output.group(2)) | |
| 499 supports_dwarf4 = major > 2 or major == 2 and minor > 22 | |
| 500 | |
| 501 if supports_dwarf4: | |
| 502 return | |
| 503 | |
| 504 print('Checking version of debug information in %s.' % library) | |
| 505 debug_info = subprocess.check_output(['readelf', '--debug-dump=info', | |
| 506 '--dwarf-depth=1', library]) | |
| 507 dwarf_version_re = re.compile(r'^\s+Version:\s+(\d+)$', re.M) | |
| 508 parsed_dwarf_format_output = dwarf_version_re.search(debug_info) | |
| 509 version = int(parsed_dwarf_format_output.group(1)) | |
| 510 if version > 2: | |
| 511 print('The supplied tools only support DWARF2 debug data but the binary\n' + | |
| 512 'uses DWARF%d. Update the tools or compile the binary\n' % version + | |
| 513 'with -gdwarf-2.') | |
| 514 sys.exit(1) | |
| 515 | |
| 516 | |
| 517 def main(): | |
| 518 usage = """%prog [options] | |
| 519 | |
| 520 Runs a spatial analysis on a given library, looking up the source locations | |
| 521 of its symbols and calculating how much space each directory, source file, | |
| 522 and so on is taking. The result is a report that can be used to pinpoint | |
| 523 sources of large portions of the binary, etceteras. | |
| 524 | |
| 525 Under normal circumstances, you only need to pass two arguments, thusly: | |
| 526 | |
| 527 %prog --library /path/to/library --destdir /path/to/output | |
| 528 | |
| 529 In this mode, the program will dump the symbols from the specified library | |
| 530 and map those symbols back to source locations, producing a web-based | |
| 531 report in the specified output directory. | |
| 532 | |
| 533 Other options are available via '--help'. | |
| 534 """ | |
| 535 parser = optparse.OptionParser(usage=usage) | |
| 536 parser.add_option('--nm-in', metavar='PATH', | |
| 537 help='if specified, use nm input from <path> instead of ' | |
| 538 'generating it. Note that source locations should be ' | |
| 539 'present in the file; i.e., no addr2line symbol lookups ' | |
| 540 'will be performed when this option is specified. ' | |
| 541 'Mutually exclusive with --library.') | |
| 542 parser.add_option('--destdir', metavar='PATH', | |
| 543 help='write output to the specified directory. An HTML ' | |
| 544 'report is generated here along with supporting files; ' | |
| 545 'any existing report will be overwritten.') | |
| 546 parser.add_option('--library', metavar='PATH', | |
| 547 help='if specified, process symbols in the library at ' | |
| 548 'the specified path. Mutually exclusive with --nm-in.') | |
| 549 parser.add_option('--pak', metavar='PATH', | |
| 550 help='if specified, includes the contents of the ' | |
| 551 'specified *.pak file in the output.') | |
| 552 parser.add_option('--nm-binary', | |
| 553 help='use the specified nm binary to analyze library. ' | |
| 554 'This is to be used when the nm in the path is not for ' | |
| 555 'the right architecture or of the right version.') | |
| 556 parser.add_option('--addr2line-binary', | |
| 557 help='use the specified addr2line binary to analyze ' | |
| 558 'library. This is to be used when the addr2line in ' | |
| 559 'the path is not for the right architecture or ' | |
| 560 'of the right version.') | |
| 561 parser.add_option('--jobs', type='int', | |
| 562 help='number of jobs to use for the parallel ' | |
| 563 'addr2line processing pool; defaults to 1. More ' | |
| 564 'jobs greatly improve throughput but eat RAM like ' | |
| 565 'popcorn, and take several gigabytes each. Start low ' | |
| 566 'and ramp this number up until your machine begins to ' | |
| 567 'struggle with RAM. ' | |
| 568 'This argument is only valid when using --library.') | |
| 569 parser.add_option('-v', '--verbose', dest='verbose', action='store_true', | |
| 570 help='be verbose, printing lots of status information.') | |
| 571 parser.add_option('--nm-out', metavar='PATH', | |
| 572 help='(deprecated) No-op. nm.out is stored in --destdir.') | |
| 573 parser.add_option('--no-nm-out', action='store_true', | |
| 574 help='do not keep the nm output file. This file is useful ' | |
| 575 'if you want to see the fully processed nm output after ' | |
| 576 'the symbols have been mapped to source locations, or if ' | |
| 577 'you plan to run explain_binary_size_delta.py. By default ' | |
| 578 'the file \'nm.out\' is placed alongside the generated ' | |
| 579 'report. The nm.out file is only created when using ' | |
| 580 '--library.') | |
| 581 parser.add_option('--disable-disambiguation', action='store_true', | |
| 582 help='disables the disambiguation process altogether,' | |
| 583 ' NOTE: this may, depending on your toolchain, produce' | |
| 584 ' output with some symbols at the top layer if addr2line' | |
| 585 ' could not get the entire source path.') | |
| 586 parser.add_option('--source-path', default='./', | |
| 587 help='the path to the source code of the output binary, ' | |
| 588 'default set to current directory. Used in the' | |
| 589 ' disambiguation process.') | |
| 590 opts, _args = parser.parse_args() | |
| 591 | |
| 592 if ((not opts.library) and (not opts.nm_in)) or (opts.library and opts.nm_in): | |
| 593 parser.error('exactly one of --library or --nm-in is required') | |
| 594 if opts.nm_out: | |
| 595 print >> sys.stderr, ('WARNING: --nm-out is deprecated and has no effect.') | |
| 596 if (opts.nm_in): | |
| 597 if opts.jobs: | |
| 598 print >> sys.stderr, ('WARNING: --jobs has no effect ' | |
| 599 'when used with --nm-in') | |
| 600 if not opts.destdir: | |
| 601 parser.error('--destdir is a required argument') | |
| 602 if not opts.jobs: | |
| 603 # Use the number of processors but cap between 2 and 4 since raw | |
| 604 # CPU power isn't the limiting factor. It's I/O limited, memory | |
| 605 # bus limited and available-memory-limited. Too many processes and | |
| 606 # the computer will run out of memory and it will be slow. | |
| 607 opts.jobs = max(2, min(4, str(multiprocessing.cpu_count()))) | |
| 608 | |
| 609 if opts.addr2line_binary: | |
| 610 assert os.path.isfile(opts.addr2line_binary) | |
| 611 addr2line_binary = opts.addr2line_binary | |
| 612 else: | |
| 613 addr2line_binary = _find_in_system_path('addr2line') | |
| 614 assert addr2line_binary, 'Unable to find addr2line in the path. '\ | |
| 615 'Use --addr2line-binary to specify location.' | |
| 616 | |
| 617 if opts.nm_binary: | |
| 618 assert os.path.isfile(opts.nm_binary) | |
| 619 nm_binary = opts.nm_binary | |
| 620 else: | |
| 621 nm_binary = _find_in_system_path('nm') | |
| 622 assert nm_binary, 'Unable to find nm in the path. Use --nm-binary '\ | |
| 623 'to specify location.' | |
| 624 | |
| 625 if opts.pak: | |
| 626 assert os.path.isfile(opts.pak), 'Could not find ' % opts.pak | |
| 627 | |
| 628 print('addr2line: %s' % addr2line_binary) | |
| 629 print('nm: %s' % nm_binary) | |
| 630 | |
| 631 if opts.library: | |
| 632 CheckDebugFormatSupport(opts.library, addr2line_binary) | |
| 633 | |
| 634 # Prepare output directory and report guts | |
| 635 if not os.path.exists(opts.destdir): | |
| 636 os.makedirs(opts.destdir, 0755) | |
| 637 nm_out = os.path.join(opts.destdir, 'nm.out') | |
| 638 if opts.no_nm_out: | |
| 639 nm_out = None | |
| 640 | |
| 641 # Copy report boilerplate into output directory. This also proves that the | |
| 642 # output directory is safe for writing, so there should be no problems writing | |
| 643 # the nm.out file later. | |
| 644 data_js_file_name = os.path.join(opts.destdir, 'data.js') | |
| 645 d3_out = os.path.join(opts.destdir, 'd3') | |
| 646 if not os.path.exists(d3_out): | |
| 647 os.makedirs(d3_out, 0755) | |
| 648 d3_src = os.path.join(os.path.dirname(__file__), | |
| 649 '..', | |
| 650 '..', | |
| 651 'third_party', 'd3', 'src') | |
| 652 template_src = os.path.join(os.path.dirname(__file__), | |
| 653 'template') | |
| 654 shutil.copy(os.path.join(d3_src, 'LICENSE'), d3_out) | |
| 655 shutil.copy(os.path.join(d3_src, 'd3.js'), d3_out) | |
| 656 shutil.copy(os.path.join(template_src, 'index.html'), opts.destdir) | |
| 657 shutil.copy(os.path.join(template_src, 'D3SymbolTreeMap.js'), opts.destdir) | |
| 658 | |
| 659 # Run nm and/or addr2line to gather the data | |
| 660 symbols = GetNmSymbols(opts.nm_in, nm_out, opts.library, | |
| 661 opts.jobs, opts.verbose is True, | |
| 662 addr2line_binary, nm_binary, | |
| 663 opts.disable_disambiguation is None, | |
| 664 opts.source_path) | |
| 665 | |
| 666 # Post-processing | |
| 667 if opts.pak: | |
| 668 AddPakData(symbols, opts.pak) | |
| 669 if opts.library: | |
| 670 symbol_path_origin_dir = os.path.dirname(os.path.abspath(opts.library)) | |
| 671 else: | |
| 672 # Just a guess. Hopefully all paths in the input file are absolute. | |
| 673 symbol_path_origin_dir = os.path.abspath(os.getcwd()) | |
| 674 # Dump JSON for the HTML report. | |
| 675 DumpCompactTree(symbols, symbol_path_origin_dir, data_js_file_name) | |
| 676 print 'Report saved to ' + opts.destdir + '/index.html' | |
| 677 | |
| 678 if __name__ == '__main__': | |
| 679 sys.exit(main()) | |
| OLD | NEW |