| OLD | NEW |
| (Empty) |
| 1 #!/usr/bin/env python | |
| 2 # Copyright 2014 The Chromium Authors. All rights reserved. | |
| 3 # Use of this source code is governed by a BSD-style license that can be | |
| 4 # found in the LICENSE file. | |
| 5 | |
| 6 """Generate a spatial analysis against an arbitrary library. | |
| 7 | |
| 8 Adapted for Skia's use case from | |
| 9 chromium/src/tools/binary_size/run_binary_size_analysis.py. Main changes: | |
| 10 | |
| 11 -- Cleans up some deprecated codes. | |
| 12 -- Always use relative code path so the tree root is Skia repo's root. | |
| 13 -- Instead of outputting the standalone HTML/CSS/JS filesets, writes the | |
| 14 TreeMap JSON data into a Google Storage bucket. | |
| 15 -- Adds githash and total_size to the JSON data. | |
| 16 -- Outputs another summary data in JSON Bench format for skiaperf ingestion. | |
| 17 | |
| 18 The output JSON data for visualization is in the following format: | |
| 19 | |
| 20 { | |
| 21 "githash": 123abc, | |
| 22 "commit_ts": 1234567890, | |
| 23 "total_size": 1234567, | |
| 24 "key": {"source_type": "binary_size"}, | |
| 25 "tree_data": { | |
| 26 "maxDepth": 9, | |
| 27 "k": "p", "children":[ | |
| 28 {"k":"p","children":[ | |
| 29 {"k":"p","children":[ | |
| 30 {"k":"p","lastPathElement":true,"children":[ | |
| 31 {"k":"b","t":"t","children":[ | |
| 32 {"k":"s", "t":"t", "value":4029, | |
| 33 "n":"etc_encode_subblock_helper(unsigned char const*, ...)" | |
| 34 }, | |
| 35 ...... | |
| 36 } | |
| 37 } | |
| 38 | |
| 39 Another JSON file is generated for size summaries to be used in skiaperf. The | |
| 40 JSON format details can be found at: | |
| 41 https://github.com/google/skia/blob/master/bench/ResultsWriter.h#L54 | |
| 42 and: | |
| 43 https://skia.googlesource.com/buildbot/+/master/perf/go/ingester/nanobench.go | |
| 44 | |
| 45 In the binary size case, outputs look like: | |
| 46 | |
| 47 { | |
| 48 "gitHash": "123abc", | |
| 49 "key": { | |
| 50 "source_type": "binarysize" | |
| 51 } | |
| 52 "results: { | |
| 53 "src_lazy_global_weak_symbol": { | |
| 54 "memory": { | |
| 55 "bytes": 41, | |
| 56 "options": { | |
| 57 "path": "src_lazy", | |
| 58 "symbol": "global_weak_symbol" | |
| 59 } | |
| 60 } | |
| 61 }, | |
| 62 "src_lazy_global_read_only_data": { | |
| 63 "memory": { | |
| 64 "bytes": 13476, | |
| 65 "options": { | |
| 66 "path": "src_lazy", | |
| 67 "symbol": "global_read_only_data" | |
| 68 } | |
| 69 } | |
| 70 }, | |
| 71 ... | |
| 72 } | |
| 73 } | |
| 74 | |
| 75 """ | |
| 76 | |
| 77 import collections | |
| 78 import datetime | |
| 79 import json | |
| 80 import logging | |
| 81 import multiprocessing | |
| 82 import optparse | |
| 83 import os | |
| 84 import re | |
| 85 import shutil | |
| 86 import struct | |
| 87 import subprocess | |
| 88 import sys | |
| 89 import tempfile | |
| 90 import time | |
| 91 import urllib2 | |
| 92 | |
| 93 import binary_size_utils | |
| 94 import elf_symbolizer | |
| 95 | |
| 96 from recipe_engine.types import freeze | |
| 97 | |
| 98 # Node dictionary keys. These are output in json read by the webapp so | |
| 99 # keep them short to save file size. | |
| 100 # Note: If these change, the webapp must also change. | |
| 101 NODE_TYPE_KEY = 'k' | |
| 102 NODE_NAME_KEY = 'n' | |
| 103 NODE_CHILDREN_KEY = 'children' | |
| 104 NODE_SYMBOL_TYPE_KEY = 't' | |
| 105 NODE_SYMBOL_SIZE_KEY = 'value' | |
| 106 NODE_MAX_DEPTH_KEY = 'maxDepth' | |
| 107 NODE_LAST_PATH_ELEMENT_KEY = 'lastPathElement' | |
| 108 | |
| 109 # The display name of the bucket where we put symbols without path. | |
| 110 NAME_NO_PATH_BUCKET = '(No Path)' | |
| 111 | |
| 112 # Try to keep data buckets smaller than this to avoid killing the | |
| 113 # graphing lib. | |
| 114 BIG_BUCKET_LIMIT = 3000 | |
| 115 | |
| 116 # Skia addition: relative dir for libskia.so from code base. | |
| 117 LIBSKIA_RELATIVE_PATH = os.path.join('out', 'Release', 'lib') | |
| 118 | |
| 119 # Skia addition: dictionary mapping symbol type code to symbol name. | |
| 120 # See | |
| 121 # https://code.google.com/p/chromium/codesearch#chromium/src/tools/binary_size/t
emplate/D3SymbolTreeMap.js&l=74 | |
| 122 SYMBOL_MAP = freeze({ | |
| 123 'A': 'global_absolute', | |
| 124 'B': 'global_uninitialized_data', | |
| 125 'b': 'local_uninitialized_data', | |
| 126 'C': 'global_uninitialized_common', | |
| 127 'D': 'global_initialized_data', | |
| 128 'd': 'local_initialized_data', | |
| 129 'G': 'global_small initialized_data', | |
| 130 'g': 'local_small_initialized_data', | |
| 131 'i': 'indirect_function', | |
| 132 'N': 'debugging', | |
| 133 'p': 'stack_unwind', | |
| 134 'R': 'global_read_only_data', | |
| 135 'r': 'local_read_only_data', | |
| 136 'S': 'global_small_uninitialized_data', | |
| 137 's': 'local_small_uninitialized_data', | |
| 138 'T': 'global_code', | |
| 139 't': 'local_code', | |
| 140 'U': 'undefined', | |
| 141 'u': 'unique', | |
| 142 'V': 'global_weak_object', | |
| 143 'v': 'local_weak_object', | |
| 144 'W': 'global_weak_symbol', | |
| 145 'w': 'local_weak_symbol', | |
| 146 '@': 'vtable_entry', | |
| 147 '-': 'stabs_debugging', | |
| 148 '?': 'unrecognized', | |
| 149 }) | |
| 150 | |
| 151 | |
| 152 def _MkChild(node, name): | |
| 153 child = node[NODE_CHILDREN_KEY].get(name) | |
| 154 if child is None: | |
| 155 child = {NODE_NAME_KEY: name, | |
| 156 NODE_CHILDREN_KEY: {}} | |
| 157 node[NODE_CHILDREN_KEY][name] = child | |
| 158 return child | |
| 159 | |
| 160 | |
| 161 def SplitNoPathBucket(node): | |
| 162 """NAME_NO_PATH_BUCKET can be too large for the graphing lib to | |
| 163 handle. Split it into sub-buckets in that case.""" | |
| 164 root_children = node[NODE_CHILDREN_KEY] | |
| 165 if NAME_NO_PATH_BUCKET in root_children: | |
| 166 no_path_bucket = root_children[NAME_NO_PATH_BUCKET] | |
| 167 old_children = no_path_bucket[NODE_CHILDREN_KEY] | |
| 168 count = 0 | |
| 169 for symbol_type, symbol_bucket in old_children.iteritems(): | |
| 170 count += len(symbol_bucket[NODE_CHILDREN_KEY]) | |
| 171 if count > BIG_BUCKET_LIMIT: | |
| 172 new_children = {} | |
| 173 no_path_bucket[NODE_CHILDREN_KEY] = new_children | |
| 174 current_bucket = None | |
| 175 index = 0 | |
| 176 for symbol_type, symbol_bucket in old_children.iteritems(): | |
| 177 for symbol_name, value in symbol_bucket[NODE_CHILDREN_KEY].iteritems(): | |
| 178 if index % BIG_BUCKET_LIMIT == 0: | |
| 179 group_no = (index / BIG_BUCKET_LIMIT) + 1 | |
| 180 current_bucket = _MkChild(no_path_bucket, | |
| 181 '%s subgroup %d' % (NAME_NO_PATH_BUCKET, | |
| 182 group_no)) | |
| 183 assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'p' | |
| 184 node[NODE_TYPE_KEY] = 'p' # p for path | |
| 185 index += 1 | |
| 186 symbol_size = value[NODE_SYMBOL_SIZE_KEY] | |
| 187 AddSymbolIntoFileNode(current_bucket, symbol_type, | |
| 188 symbol_name, symbol_size) | |
| 189 | |
| 190 | |
| 191 def MakeChildrenDictsIntoLists(node): | |
| 192 largest_list_len = 0 | |
| 193 if NODE_CHILDREN_KEY in node: | |
| 194 largest_list_len = len(node[NODE_CHILDREN_KEY]) | |
| 195 child_list = [] | |
| 196 for child in node[NODE_CHILDREN_KEY].itervalues(): | |
| 197 child_largest_list_len = MakeChildrenDictsIntoLists(child) | |
| 198 if child_largest_list_len > largest_list_len: | |
| 199 largest_list_len = child_largest_list_len | |
| 200 child_list.append(child) | |
| 201 node[NODE_CHILDREN_KEY] = child_list | |
| 202 | |
| 203 return largest_list_len | |
| 204 | |
| 205 | |
| 206 def AddSymbolIntoFileNode(node, symbol_type, symbol_name, symbol_size): | |
| 207 """Puts symbol into the file path node |node|. | |
| 208 Returns the number of added levels in tree. I.e. returns 2.""" | |
| 209 | |
| 210 # 'node' is the file node and first step is to find its symbol-type bucket. | |
| 211 node[NODE_LAST_PATH_ELEMENT_KEY] = True | |
| 212 node = _MkChild(node, symbol_type) | |
| 213 assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'b' | |
| 214 node[NODE_SYMBOL_TYPE_KEY] = symbol_type | |
| 215 node[NODE_TYPE_KEY] = 'b' # b for bucket | |
| 216 | |
| 217 # 'node' is now the symbol-type bucket. Make the child entry. | |
| 218 node = _MkChild(node, symbol_name) | |
| 219 if NODE_CHILDREN_KEY in node: | |
| 220 if node[NODE_CHILDREN_KEY]: | |
| 221 logging.warning('A container node used as symbol for %s.' % symbol_name) | |
| 222 # This is going to be used as a leaf so no use for child list. | |
| 223 del node[NODE_CHILDREN_KEY] | |
| 224 node[NODE_SYMBOL_SIZE_KEY] = symbol_size | |
| 225 node[NODE_SYMBOL_TYPE_KEY] = symbol_type | |
| 226 node[NODE_TYPE_KEY] = 's' # s for symbol | |
| 227 | |
| 228 return 2 # Depth of the added subtree. | |
| 229 | |
| 230 | |
| 231 def MakeCompactTree(symbols, symbol_path_origin_dir): | |
| 232 result = {NODE_NAME_KEY: '/', | |
| 233 NODE_CHILDREN_KEY: {}, | |
| 234 NODE_TYPE_KEY: 'p', | |
| 235 NODE_MAX_DEPTH_KEY: 0} | |
| 236 seen_symbol_with_path = False | |
| 237 for symbol_name, symbol_type, symbol_size, file_path in symbols: | |
| 238 | |
| 239 if 'vtable for ' in symbol_name: | |
| 240 symbol_type = '@' # hack to categorize these separately | |
| 241 if file_path and file_path != "??": | |
| 242 seen_symbol_with_path = True | |
| 243 else: | |
| 244 file_path = NAME_NO_PATH_BUCKET | |
| 245 | |
| 246 path_parts = file_path.split('/') | |
| 247 | |
| 248 # Find pre-existing node in tree, or update if it already exists | |
| 249 node = result | |
| 250 depth = 0 | |
| 251 while len(path_parts) > 0: | |
| 252 path_part = path_parts.pop(0) | |
| 253 if len(path_part) == 0: | |
| 254 continue | |
| 255 depth += 1 | |
| 256 node = _MkChild(node, path_part) | |
| 257 assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'p' | |
| 258 node[NODE_TYPE_KEY] = 'p' # p for path | |
| 259 | |
| 260 depth += AddSymbolIntoFileNode(node, symbol_type, symbol_name, symbol_size) | |
| 261 result[NODE_MAX_DEPTH_KEY] = max(result[NODE_MAX_DEPTH_KEY], depth) | |
| 262 | |
| 263 if not seen_symbol_with_path: | |
| 264 logging.warning('Symbols lack paths. Data will not be structured.') | |
| 265 | |
| 266 # The (no path) bucket can be extremely large if we failed to get | |
| 267 # path information. Split it into subgroups if needed. | |
| 268 SplitNoPathBucket(result) | |
| 269 | |
| 270 largest_list_len = MakeChildrenDictsIntoLists(result) | |
| 271 | |
| 272 if largest_list_len > BIG_BUCKET_LIMIT: | |
| 273 logging.warning('There are sections with %d nodes. ' | |
| 274 'Results might be unusable.' % largest_list_len) | |
| 275 return result | |
| 276 | |
| 277 | |
| 278 # Skia added: summarizes tree size by symbol type for the given root node. | |
| 279 # Returns a dict keyed by symbol type, and value the type's overall size. | |
| 280 # e.g., {"t": 12345, "W": 543}. | |
| 281 def GetTreeSizes(node): | |
| 282 if 'children' not in node or not node['children']: | |
| 283 return {node['t']: node['value']} | |
| 284 dic = {} | |
| 285 for i in node['children']: | |
| 286 for k, v in GetTreeSizes(i).items(): | |
| 287 dic.setdefault(k, 0) | |
| 288 dic[k] += v | |
| 289 | |
| 290 return dic | |
| 291 | |
| 292 | |
| 293 # Skia added: creates dict to be converted to JSON in bench format. | |
| 294 # See top of file for the structure description. | |
| 295 def GetBenchDict(githash, tree_root): | |
| 296 dic = {'gitHash': githash, | |
| 297 'key': {'source_type': 'binarysize'}, | |
| 298 'results': {},} | |
| 299 for i in tree_root['children']: | |
| 300 if '(No Path)' == i['n']: # Already at symbol summary level. | |
| 301 for k, v in GetTreeSizes(i).items(): | |
| 302 dic['results']['no_path_' + SYMBOL_MAP[k]] = { | |
| 303 'memory': { | |
| 304 'bytes': v, | |
| 305 'options': {'path': 'no_path', | |
| 306 'symbol': SYMBOL_MAP[k],},}} | |
| 307 else: # We need to go deeper. | |
| 308 for c in i['children']: | |
| 309 path = i['n'] + '_' + c['n'] | |
| 310 for k, v in GetTreeSizes(c).items(): | |
| 311 dic['results'][path + '_' + SYMBOL_MAP[k]] = { | |
| 312 'memory': { | |
| 313 'bytes': v, | |
| 314 'options': {'path': path, | |
| 315 'symbol': SYMBOL_MAP[k],}}} | |
| 316 | |
| 317 return dic | |
| 318 | |
| 319 | |
| 320 # Skia added: constructs 'gsutil cp' subprocess command list. | |
| 321 def GetGsCopyCommandList(gsutil, src, dst): | |
| 322 return [gsutil, '-h', 'Content-Type:application/json', 'cp', '-a', | |
| 323 'public-read', src, dst] | |
| 324 | |
| 325 | |
| 326 def DumpCompactTree(symbols, symbol_path_origin_dir, ha, ts, issue, gsutil): | |
| 327 tree_root = MakeCompactTree(symbols, symbol_path_origin_dir) | |
| 328 json_data = {'tree_data': tree_root, | |
| 329 'githash': ha, | |
| 330 'commit_ts': ts, | |
| 331 'key': {'source_type': 'binary_size'}, | |
| 332 'total_size': sum(GetTreeSizes(tree_root).values()),} | |
| 333 tmpfile = tempfile.NamedTemporaryFile(delete=False).name | |
| 334 with open(tmpfile, 'w') as out: | |
| 335 # Use separators without whitespace to get a smaller file. | |
| 336 json.dump(json_data, out, separators=(',', ':')) | |
| 337 | |
| 338 GS_PREFIX = 'gs://chromium-skia-gm/' | |
| 339 # Writes to Google Storage for visualization. | |
| 340 subprocess.check_call(GetGsCopyCommandList( | |
| 341 gsutil, tmpfile, GS_PREFIX + 'size/' + ha + '.json')) | |
| 342 # Updates the latest data. | |
| 343 if not issue: | |
| 344 subprocess.check_call(GetGsCopyCommandList(gsutil, tmpfile, | |
| 345 GS_PREFIX + 'size/latest.json')) | |
| 346 # Writes an extra copy using year/month/day/hour path for easy ingestion. | |
| 347 with open(tmpfile, 'w') as out: | |
| 348 json.dump(GetBenchDict(ha, tree_root), out, separators=(',', ':')) | |
| 349 now = datetime.datetime.utcnow() | |
| 350 ingest_path = '/'.join(('nano-json-v1', str(now.year).zfill(4), | |
| 351 str(now.month).zfill(2), str(now.day).zfill(2), | |
| 352 str(now.hour).zfill(2))) | |
| 353 if issue: | |
| 354 ingest_path = '/'.join('trybot', ingest_path, issue) | |
| 355 subprocess.check_call(GetGsCopyCommandList(gsutil, tmpfile, | |
| 356 GS_PREFIX + ingest_path + '/binarysize_' + ha + '.json')) | |
| 357 | |
| 358 | |
| 359 def MakeSourceMap(symbols): | |
| 360 sources = {} | |
| 361 for _sym, _symbol_type, size, path in symbols: | |
| 362 key = None | |
| 363 if path: | |
| 364 key = os.path.normpath(path) | |
| 365 else: | |
| 366 key = '[no path]' | |
| 367 if key not in sources: | |
| 368 sources[key] = {'path': path, 'symbol_count': 0, 'size': 0} | |
| 369 record = sources[key] | |
| 370 record['size'] += size | |
| 371 record['symbol_count'] += 1 | |
| 372 return sources | |
| 373 | |
| 374 | |
| 375 # Regex for parsing "nm" output. A sample line looks like this: | |
| 376 # 0167b39c 00000018 t ACCESS_DESCRIPTION_free /path/file.c:95 | |
| 377 # | |
| 378 # The fields are: address, size, type, name, source location | |
| 379 # Regular expression explained ( see also: https://xkcd.com/208 ): | |
| 380 # ([0-9a-f]{8,}+) The address | |
| 381 # [\s]+ Whitespace separator | |
| 382 # ([0-9a-f]{8,}+) The size. From here on out it's all optional. | |
| 383 # [\s]+ Whitespace separator | |
| 384 # (\S?) The symbol type, which is any non-whitespace char | |
| 385 # [\s*] Whitespace separator | |
| 386 # ([^\t]*) Symbol name, any non-tab character (spaces ok!) | |
| 387 # [\t]? Tab separator | |
| 388 # (.*) The location (filename[:linennum|?][ (discriminator n)] | |
| 389 sNmPattern = re.compile( | |
| 390 r'([0-9a-f]{8,})[\s]+([0-9a-f]{8,})[\s]*(\S?)[\s*]([^\t]*)[\t]?(.*)') | |
| 391 | |
| 392 class Progress(): | |
| 393 def __init__(self): | |
| 394 self.count = 0 | |
| 395 self.skip_count = 0 | |
| 396 self.collisions = 0 | |
| 397 self.time_last_output = time.time() | |
| 398 self.count_last_output = 0 | |
| 399 self.disambiguations = 0 | |
| 400 self.was_ambiguous = 0 | |
| 401 | |
| 402 | |
| 403 def RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary, jobs, | |
| 404 disambiguate, src_path): | |
| 405 nm_output = RunNm(library, nm_binary) | |
| 406 nm_output_lines = nm_output.splitlines() | |
| 407 nm_output_lines_len = len(nm_output_lines) | |
| 408 address_symbol = {} | |
| 409 progress = Progress() | |
| 410 def map_address_symbol(symbol, addr): | |
| 411 progress.count += 1 | |
| 412 if addr in address_symbol: | |
| 413 # 'Collision between %s and %s.' % (str(symbol.name), | |
| 414 # str(address_symbol[addr].name)) | |
| 415 progress.collisions += 1 | |
| 416 else: | |
| 417 if symbol.disambiguated: | |
| 418 progress.disambiguations += 1 | |
| 419 if symbol.was_ambiguous: | |
| 420 progress.was_ambiguous += 1 | |
| 421 | |
| 422 address_symbol[addr] = symbol | |
| 423 | |
| 424 progress_output() | |
| 425 | |
| 426 def progress_output(): | |
| 427 progress_chunk = 100 | |
| 428 if progress.count % progress_chunk == 0: | |
| 429 time_now = time.time() | |
| 430 time_spent = time_now - progress.time_last_output | |
| 431 if time_spent > 1.0: | |
| 432 # Only output at most once per second. | |
| 433 progress.time_last_output = time_now | |
| 434 chunk_size = progress.count - progress.count_last_output | |
| 435 progress.count_last_output = progress.count | |
| 436 if time_spent > 0: | |
| 437 speed = chunk_size / time_spent | |
| 438 else: | |
| 439 speed = 0 | |
| 440 progress_percent = (100.0 * (progress.count + progress.skip_count) / | |
| 441 nm_output_lines_len) | |
| 442 disambiguation_percent = 0 | |
| 443 if progress.disambiguations != 0: | |
| 444 disambiguation_percent = (100.0 * progress.disambiguations / | |
| 445 progress.was_ambiguous) | |
| 446 | |
| 447 sys.stdout.write('\r%.1f%%: Looked up %d symbols (%d collisions, ' | |
| 448 '%d disambiguations where %.1f%% succeeded)' | |
| 449 ' - %.1f lookups/s.' % | |
| 450 (progress_percent, progress.count, progress.collisions, | |
| 451 progress.disambiguations, disambiguation_percent, speed)) | |
| 452 | |
| 453 # In case disambiguation was disabled, we remove the source path (which upon | |
| 454 # being set signals the symbolizer to enable disambiguation) | |
| 455 if not disambiguate: | |
| 456 src_path = None | |
| 457 symbol_path_origin_dir = os.path.dirname(library) | |
| 458 # Skia specific. | |
| 459 symbol_path_prefix = symbol_path_origin_dir.replace(LIBSKIA_RELATIVE_PATH, '') | |
| 460 symbolizer = elf_symbolizer.ELFSymbolizer(library, addr2line_binary, | |
| 461 map_address_symbol, | |
| 462 max_concurrent_jobs=jobs, | |
| 463 source_root_path=src_path, | |
| 464 prefix_to_remove=symbol_path_prefix) | |
| 465 user_interrupted = False | |
| 466 try: | |
| 467 for line in nm_output_lines: | |
| 468 match = sNmPattern.match(line) | |
| 469 if match: | |
| 470 location = match.group(5) | |
| 471 if not location: | |
| 472 addr = int(match.group(1), 16) | |
| 473 size = int(match.group(2), 16) | |
| 474 if addr in address_symbol: # Already looked up, shortcut | |
| 475 # ELFSymbolizer. | |
| 476 map_address_symbol(address_symbol[addr], addr) | |
| 477 continue | |
| 478 elif size == 0: | |
| 479 # Save time by not looking up empty symbols (do they even exist?) | |
| 480 print('Empty symbol: ' + line) | |
| 481 else: | |
| 482 symbolizer.SymbolizeAsync(addr, addr) | |
| 483 continue | |
| 484 | |
| 485 progress.skip_count += 1 | |
| 486 except KeyboardInterrupt: | |
| 487 user_interrupted = True | |
| 488 print('Interrupting - killing subprocesses. Please wait.') | |
| 489 | |
| 490 try: | |
| 491 symbolizer.Join() | |
| 492 except KeyboardInterrupt: | |
| 493 # Don't want to abort here since we will be finished in a few seconds. | |
| 494 user_interrupted = True | |
| 495 print('Patience you must have my young padawan.') | |
| 496 | |
| 497 print '' | |
| 498 | |
| 499 if user_interrupted: | |
| 500 print('Skipping the rest of the file mapping. ' | |
| 501 'Output will not be fully classified.') | |
| 502 | |
| 503 symbol_path_origin_dir = os.path.dirname(library) | |
| 504 # Skia specific: path prefix to strip. | |
| 505 symbol_path_prefix = symbol_path_origin_dir.replace(LIBSKIA_RELATIVE_PATH, '') | |
| 506 | |
| 507 with open(outfile, 'w') as out: | |
| 508 for line in nm_output_lines: | |
| 509 match = sNmPattern.match(line) | |
| 510 if match: | |
| 511 location = match.group(5) | |
| 512 if not location: | |
| 513 addr = int(match.group(1), 16) | |
| 514 symbol = address_symbol.get(addr) | |
| 515 if symbol is not None: | |
| 516 path = '??' | |
| 517 if symbol.source_path is not None: | |
| 518 path = symbol.source_path.replace(symbol_path_prefix, '') | |
| 519 line_number = 0 | |
| 520 if symbol.source_line is not None: | |
| 521 line_number = symbol.source_line | |
| 522 out.write('%s\t%s:%d\n' % (line, path, line_number)) | |
| 523 continue | |
| 524 | |
| 525 out.write('%s\n' % line) | |
| 526 | |
| 527 print('%d symbols in the results.' % len(address_symbol)) | |
| 528 | |
| 529 | |
| 530 def RunNm(binary, nm_binary): | |
| 531 cmd = [nm_binary, '-C', '--print-size', '--size-sort', '--reverse-sort', | |
| 532 binary] | |
| 533 nm_process = subprocess.Popen(cmd, | |
| 534 stdout=subprocess.PIPE, | |
| 535 stderr=subprocess.PIPE) | |
| 536 (process_output, err_output) = nm_process.communicate() | |
| 537 | |
| 538 if nm_process.returncode != 0: | |
| 539 if err_output: | |
| 540 raise Exception, err_output | |
| 541 else: | |
| 542 raise Exception, process_output | |
| 543 | |
| 544 return process_output | |
| 545 | |
| 546 | |
| 547 def GetNmSymbols(nm_infile, outfile, library, jobs, verbose, | |
| 548 addr2line_binary, nm_binary, disambiguate, src_path): | |
| 549 if nm_infile is None: | |
| 550 if outfile is None: | |
| 551 outfile = tempfile.NamedTemporaryFile(delete=False).name | |
| 552 | |
| 553 if verbose: | |
| 554 print 'Running parallel addr2line, dumping symbols to ' + outfile | |
| 555 RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary, jobs, | |
| 556 disambiguate, src_path) | |
| 557 | |
| 558 nm_infile = outfile | |
| 559 | |
| 560 elif verbose: | |
| 561 print 'Using nm input from ' + nm_infile | |
| 562 with file(nm_infile, 'r') as infile: | |
| 563 return list(binary_size_utils.ParseNm(infile)) | |
| 564 | |
| 565 | |
| 566 PAK_RESOURCE_ID_TO_STRING = { "inited": False } | |
| 567 | |
| 568 def LoadPakIdsFromResourceFile(filename): | |
| 569 """Given a file name, it loads everything that looks like a resource id | |
| 570 into PAK_RESOURCE_ID_TO_STRING.""" | |
| 571 with open(filename) as resource_header: | |
| 572 for line in resource_header: | |
| 573 if line.startswith("#define "): | |
| 574 line_data = line.split() | |
| 575 if len(line_data) == 3: | |
| 576 try: | |
| 577 resource_number = int(line_data[2]) | |
| 578 resource_name = line_data[1] | |
| 579 PAK_RESOURCE_ID_TO_STRING[resource_number] = resource_name | |
| 580 except ValueError: | |
| 581 pass | |
| 582 | |
| 583 def GetReadablePakResourceName(pak_file, resource_id): | |
| 584 """Pak resources have a numeric identifier. It is not helpful when | |
| 585 trying to locate where footprint is generated. This does its best to | |
| 586 map the number to a usable string.""" | |
| 587 if not PAK_RESOURCE_ID_TO_STRING['inited']: | |
| 588 # Try to find resource header files generated by grit when | |
| 589 # building the pak file. We'll look for files named *resources.h" | |
| 590 # and lines of the type: | |
| 591 # #define MY_RESOURCE_JS 1234 | |
| 592 PAK_RESOURCE_ID_TO_STRING['inited'] = True | |
| 593 gen_dir = os.path.join(os.path.dirname(pak_file), 'gen') | |
| 594 if os.path.isdir(gen_dir): | |
| 595 for dirname, _dirs, files in os.walk(gen_dir): | |
| 596 for filename in files: | |
| 597 if filename.endswith('resources.h'): | |
| 598 LoadPakIdsFromResourceFile(os.path.join(dirname, filename)) | |
| 599 return PAK_RESOURCE_ID_TO_STRING.get(resource_id, | |
| 600 'Pak Resource %d' % resource_id) | |
| 601 | |
| 602 def AddPakData(symbols, pak_file): | |
| 603 """Adds pseudo-symbols from a pak file.""" | |
| 604 pak_file = os.path.abspath(pak_file) | |
| 605 with open(pak_file, 'rb') as pak: | |
| 606 data = pak.read() | |
| 607 | |
| 608 PAK_FILE_VERSION = 4 | |
| 609 HEADER_LENGTH = 2 * 4 + 1 # Two uint32s. (file version, number of entries) | |
| 610 # and one uint8 (encoding of text resources) | |
| 611 INDEX_ENTRY_SIZE = 2 + 4 # Each entry is a uint16 and a uint32. | |
| 612 version, num_entries, _encoding = struct.unpack('<IIB', data[:HEADER_LENGTH]) | |
| 613 assert version == PAK_FILE_VERSION, ('Unsupported pak file ' | |
| 614 'version (%d) in %s. Only ' | |
| 615 'support version %d' % | |
| 616 (version, pak_file, PAK_FILE_VERSION)) | |
| 617 if num_entries > 0: | |
| 618 # Read the index and data. | |
| 619 data = data[HEADER_LENGTH:] | |
| 620 for _ in range(num_entries): | |
| 621 resource_id, offset = struct.unpack('<HI', data[:INDEX_ENTRY_SIZE]) | |
| 622 data = data[INDEX_ENTRY_SIZE:] | |
| 623 _next_id, next_offset = struct.unpack('<HI', data[:INDEX_ENTRY_SIZE]) | |
| 624 resource_size = next_offset - offset | |
| 625 | |
| 626 symbol_name = GetReadablePakResourceName(pak_file, resource_id) | |
| 627 symbol_path = pak_file | |
| 628 symbol_type = 'd' # Data. Approximation. | |
| 629 symbol_size = resource_size | |
| 630 symbols.append((symbol_name, symbol_type, symbol_size, symbol_path)) | |
| 631 | |
| 632 def _find_in_system_path(binary): | |
| 633 """Locate the full path to binary in the system path or return None | |
| 634 if not found.""" | |
| 635 system_path = os.environ["PATH"].split(os.pathsep) | |
| 636 for path in system_path: | |
| 637 binary_path = os.path.join(path, binary) | |
| 638 if os.path.isfile(binary_path): | |
| 639 return binary_path | |
| 640 return None | |
| 641 | |
| 642 def CheckDebugFormatSupport(library, addr2line_binary): | |
| 643 """Kills the program if debug data is in an unsupported format. | |
| 644 | |
| 645 There are two common versions of the DWARF debug formats and | |
| 646 since we are right now transitioning from DWARF2 to newer formats, | |
| 647 it's possible to have a mix of tools that are not compatible. Detect | |
| 648 that and abort rather than produce meaningless output.""" | |
| 649 tool_output = subprocess.check_output([addr2line_binary, '--version']) | |
| 650 version_re = re.compile(r'^GNU [^ ]+ .* (\d+).(\d+).*?$', re.M) | |
| 651 parsed_output = version_re.match(tool_output) | |
| 652 major = int(parsed_output.group(1)) | |
| 653 minor = int(parsed_output.group(2)) | |
| 654 supports_dwarf4 = major > 2 or major == 2 and minor > 22 | |
| 655 | |
| 656 if supports_dwarf4: | |
| 657 return | |
| 658 | |
| 659 print('Checking version of debug information in %s.' % library) | |
| 660 debug_info = subprocess.check_output(['readelf', '--debug-dump=info', | |
| 661 '--dwarf-depth=1', library]) | |
| 662 dwarf_version_re = re.compile(r'^\s+Version:\s+(\d+)$', re.M) | |
| 663 parsed_dwarf_format_output = dwarf_version_re.search(debug_info) | |
| 664 version = int(parsed_dwarf_format_output.group(1)) | |
| 665 if version > 2: | |
| 666 print('The supplied tools only support DWARF2 debug data but the binary\n' + | |
| 667 'uses DWARF%d. Update the tools or compile the binary\n' % version + | |
| 668 'with -gdwarf-2.') | |
| 669 sys.exit(1) | |
| 670 | |
| 671 | |
| 672 def main(): | |
| 673 usage = """%prog [options] | |
| 674 | |
| 675 Runs a spatial analysis on a given library, looking up the source locations | |
| 676 of its symbols and calculating how much space each directory, source file, | |
| 677 and so on is taking. The result is a report that can be used to pinpoint | |
| 678 sources of large portions of the binary, etceteras. | |
| 679 | |
| 680 Under normal circumstances, you only need to pass two arguments, thusly: | |
| 681 | |
| 682 %prog --library /path/to/library --destdir /path/to/output | |
| 683 | |
| 684 In this mode, the program will dump the symbols from the specified library | |
| 685 and map those symbols back to source locations, producing a web-based | |
| 686 report in the specified output directory. | |
| 687 | |
| 688 Other options are available via '--help'. | |
| 689 """ | |
| 690 parser = optparse.OptionParser(usage=usage) | |
| 691 parser.add_option('--nm-in', metavar='PATH', | |
| 692 help='if specified, use nm input from <path> instead of ' | |
| 693 'generating it. Note that source locations should be ' | |
| 694 'present in the file; i.e., no addr2line symbol lookups ' | |
| 695 'will be performed when this option is specified. ' | |
| 696 'Mutually exclusive with --library.') | |
| 697 parser.add_option('--destdir', metavar='PATH', | |
| 698 help='write output to the specified directory. An HTML ' | |
| 699 'report is generated here along with supporting files; ' | |
| 700 'any existing report will be overwritten. Not used in ' | |
| 701 'Skia.') | |
| 702 parser.add_option('--library', metavar='PATH', | |
| 703 help='if specified, process symbols in the library at ' | |
| 704 'the specified path. Mutually exclusive with --nm-in.') | |
| 705 parser.add_option('--pak', metavar='PATH', | |
| 706 help='if specified, includes the contents of the ' | |
| 707 'specified *.pak file in the output.') | |
| 708 parser.add_option('--nm-binary', | |
| 709 help='use the specified nm binary to analyze library. ' | |
| 710 'This is to be used when the nm in the path is not for ' | |
| 711 'the right architecture or of the right version.') | |
| 712 parser.add_option('--addr2line-binary', | |
| 713 help='use the specified addr2line binary to analyze ' | |
| 714 'library. This is to be used when the addr2line in ' | |
| 715 'the path is not for the right architecture or ' | |
| 716 'of the right version.') | |
| 717 parser.add_option('--jobs', type='int', | |
| 718 help='number of jobs to use for the parallel ' | |
| 719 'addr2line processing pool; defaults to 1. More ' | |
| 720 'jobs greatly improve throughput but eat RAM like ' | |
| 721 'popcorn, and take several gigabytes each. Start low ' | |
| 722 'and ramp this number up until your machine begins to ' | |
| 723 'struggle with RAM. ' | |
| 724 'This argument is only valid when using --library.') | |
| 725 parser.add_option('-v', dest='verbose', action='store_true', | |
| 726 help='be verbose, printing lots of status information.') | |
| 727 parser.add_option('--nm-out', metavar='PATH', | |
| 728 help='keep the nm output file, and store it at the ' | |
| 729 'specified path. This is useful if you want to see the ' | |
| 730 'fully processed nm output after the symbols have been ' | |
| 731 'mapped to source locations. By default, a tempfile is ' | |
| 732 'used and is deleted when the program terminates.' | |
| 733 'This argument is only valid when using --library.') | |
| 734 parser.add_option('--legacy', action='store_true', | |
| 735 help='emit legacy binary size report instead of modern') | |
| 736 parser.add_option('--disable-disambiguation', action='store_true', | |
| 737 help='disables the disambiguation process altogether,' | |
| 738 ' NOTE: this may, depending on your toolchain, produce' | |
| 739 ' output with some symbols at the top layer if addr2line' | |
| 740 ' could not get the entire source path.') | |
| 741 parser.add_option('--source-path', default='./', | |
| 742 help='the path to the source code of the output binary, ' | |
| 743 'default set to current directory. Used in the' | |
| 744 ' disambiguation process.') | |
| 745 parser.add_option('--githash', default='latest', | |
| 746 help='Git hash for the binary version. Added by Skia.') | |
| 747 parser.add_option('--commit_ts', type='int', default=-1, | |
| 748 help='Timestamp for the commit. Added by Skia.') | |
| 749 parser.add_option('--issue_number', default='', | |
| 750 help='The trybot issue number in string. Added by Skia.') | |
| 751 parser.add_option('--gsutil_path', default='gsutil', | |
| 752 help='Path to gsutil binary. Added by Skia.') | |
| 753 opts, _args = parser.parse_args() | |
| 754 | |
| 755 if ((not opts.library) and (not opts.nm_in)) or (opts.library and opts.nm_in): | |
| 756 parser.error('exactly one of --library or --nm-in is required') | |
| 757 if (opts.nm_in): | |
| 758 if opts.jobs: | |
| 759 print >> sys.stderr, ('WARNING: --jobs has no effect ' | |
| 760 'when used with --nm-in') | |
| 761 if not opts.jobs: | |
| 762 # Use the number of processors but cap between 2 and 4 since raw | |
| 763 # CPU power isn't the limiting factor. It's I/O limited, memory | |
| 764 # bus limited and available-memory-limited. Too many processes and | |
| 765 # the computer will run out of memory and it will be slow. | |
| 766 opts.jobs = max(2, min(4, str(multiprocessing.cpu_count()))) | |
| 767 | |
| 768 if opts.addr2line_binary: | |
| 769 assert os.path.isfile(opts.addr2line_binary) | |
| 770 addr2line_binary = opts.addr2line_binary | |
| 771 else: | |
| 772 addr2line_binary = _find_in_system_path('addr2line') | |
| 773 assert addr2line_binary, 'Unable to find addr2line in the path. '\ | |
| 774 'Use --addr2line-binary to specify location.' | |
| 775 | |
| 776 if opts.nm_binary: | |
| 777 assert os.path.isfile(opts.nm_binary) | |
| 778 nm_binary = opts.nm_binary | |
| 779 else: | |
| 780 nm_binary = _find_in_system_path('nm') | |
| 781 assert nm_binary, 'Unable to find nm in the path. Use --nm-binary '\ | |
| 782 'to specify location.' | |
| 783 | |
| 784 if opts.pak: | |
| 785 assert os.path.isfile(opts.pak), 'Could not find ' % opts.pak | |
| 786 | |
| 787 print('addr2line: %s' % addr2line_binary) | |
| 788 print('nm: %s' % nm_binary) | |
| 789 | |
| 790 if opts.library: | |
| 791 CheckDebugFormatSupport(opts.library, addr2line_binary) | |
| 792 | |
| 793 symbols = GetNmSymbols(opts.nm_in, opts.nm_out, opts.library, | |
| 794 opts.jobs, opts.verbose is True, | |
| 795 addr2line_binary, nm_binary, | |
| 796 opts.disable_disambiguation is None, | |
| 797 opts.source_path) | |
| 798 | |
| 799 if opts.pak: | |
| 800 AddPakData(symbols, opts.pak) | |
| 801 | |
| 802 if opts.legacy: # legacy report | |
| 803 print 'Do Not set legacy flag.' | |
| 804 | |
| 805 else: # modern report | |
| 806 if opts.library: | |
| 807 symbol_path_origin_dir = os.path.dirname(os.path.abspath(opts.library)) | |
| 808 else: | |
| 809 # Just a guess. Hopefully all paths in the input file are absolute. | |
| 810 symbol_path_origin_dir = os.path.abspath(os.getcwd()) | |
| 811 DumpCompactTree(symbols, symbol_path_origin_dir, opts.githash, | |
| 812 opts.commit_ts, opts.issue_number, opts.gsutil_path) | |
| 813 print 'Report data uploaded to GS.' | |
| 814 | |
| 815 | |
| 816 if __name__ == '__main__': | |
| 817 sys.exit(main()) | |
| OLD | NEW |