Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 #!/usr/bin/python | |
| 2 # Copyright 2014 The Chromium Authors. All rights reserved. | |
| 3 # Use of this source code is governed by a BSD-style license that can be | |
| 4 # found in the LICENSE file. | |
| 5 | |
| 6 """Generate a spatial analysis against an arbitrary library. | |
| 7 | |
| 8 To use, build the 'binary_size_tool' target. Then run this tool, passing | |
| 9 in the location of the library to be analyzed along with any other options | |
| 10 you desire. | |
| 11 """ | |
| 12 | |
| 13 import collections | |
| 14 import fileinput | |
| 15 import json | |
| 16 import optparse | |
| 17 import os | |
| 18 import pprint | |
| 19 import re | |
| 20 import shutil | |
| 21 import subprocess | |
| 22 import sys | |
| 23 import tempfile | |
| 24 | |
| 25 | |
| 26 def FormatBytes(bytes): | |
| 27 """Pretty-print a number of bytes.""" | |
| 28 if bytes > 1e6: | |
| 29 bytes = bytes / 1.0e6 | |
| 30 return '%.1fm' % bytes | |
| 31 if bytes > 1e3: | |
| 32 bytes = bytes / 1.0e3 | |
| 33 return '%.1fk' % bytes | |
| 34 return str(bytes) | |
| 35 | |
| 36 | |
| 37 def SymbolTypeToHuman(type): | |
| 38 """Convert a symbol type as printed by nm into a human-readable name.""" | |
| 39 return {'b': 'bss', | |
| 40 'd': 'data', | |
| 41 'r': 'read-only data', | |
| 42 't': 'code', | |
| 43 'w': 'weak symbol', | |
| 44 'v': 'weak symbol'}[type] | |
| 45 | |
| 46 | |
| 47 def ParseNm(input): | |
| 48 """Parse nm output. | |
| 49 | |
| 50 Argument: an iterable over lines of nm output. | |
| 51 | |
| 52 Yields: (symbol name, symbol type, symbol size, source file path). | |
| 53 Path may be None if nm couldn't figure out the source file. | |
| 54 """ | |
| 55 | |
| 56 # Match lines with size, symbol, optional location, optional discriminator | |
| 57 sym_re = re.compile(r'^[0-9a-f]{8} ' # address (8 hex digits) | |
| 58 '([0-9a-f]{8}) ' # size (8 hex digits) | |
| 59 '(.) ' # symbol type, one character | |
| 60 '([^\t]+)' # symbol name, separated from next by tab | |
| 61 '(?:\t(.*):[\d\?]+)?.*$') # location | |
| 62 # Match lines with addr but no size. | |
| 63 addr_re = re.compile(r'^[0-9a-f]{8} (.) ([^\t]+)(?:\t.*)?$') | |
| 64 # Match lines that don't have an address at all -- typically external symbols. | |
| 65 noaddr_re = re.compile(r'^ {8} (.) (.*)$') | |
| 66 | |
| 67 for line in input: | |
| 68 line = line.rstrip() | |
| 69 match = sym_re.match(line) | |
| 70 if match: | |
| 71 size, type, sym = match.groups()[0:3] | |
| 72 size = int(size, 16) | |
| 73 type = type.lower() | |
| 74 if type == 'v': | |
| 75 type = 'w' # just call them all weak | |
| 76 if type == 'b': | |
| 77 continue # skip all BSS for now | |
| 78 path = match.group(4) | |
| 79 yield sym, type, size, path | |
| 80 continue | |
| 81 match = addr_re.match(line) | |
| 82 if match: | |
| 83 type, sym = match.groups()[0:2] | |
| 84 # No size == we don't care. | |
| 85 continue | |
| 86 match = noaddr_re.match(line) | |
| 87 if match: | |
| 88 type, sym = match.groups() | |
| 89 if type in ('U', 'w'): | |
| 90 # external or weak symbol | |
| 91 continue | |
| 92 | |
| 93 print >>sys.stderr, 'unparsed:', repr(line) | |
| 94 | |
| 95 | |
| 96 def TreeifySymbols(symbols): | |
| 97 """Convert symbols into a path-based tree, calculating size information | |
| 98 along the way. | |
| 99 | |
| 100 The result is a dictionary that contains two kinds of nodes: | |
| 101 1. Leaf nodes, representing source code locations (e.g., c++ files) | |
| 102 These nodes have the following dictionary entries: | |
| 103 sizes: a dictionary whose keys are categories (such as code, data, | |
| 104 vtable, etceteras) and whose values are the size, in bytes, of | |
| 105 those categories; | |
| 106 size: the total size, in bytes, of all the entries in the sizes dict | |
| 107 2. Non-leaf nodes, representing directories | |
| 108 These nodes have the following dictionary entries: | |
| 109 children: a dictionary whose keys are names (path entries; either | |
| 110 directory or file names) and whose values are other nodes; | |
| 111 size: the total size, in bytes, of all the leaf nodes that are | |
| 112 contained within the children dict (recursively expanded) | |
| 113 | |
| 114 The result object is itself a dictionary that represents the common ancestor | |
| 115 of all child nodes, e.g. a path to which all other nodes beneath it are | |
| 116 relative. The 'size' attribute of this dict yields the sum of the size of all | |
| 117 leaf nodes within the data structure. | |
| 118 """ | |
| 119 dirs = {'children': {}, 'size': 0} | |
| 120 for sym, type, size, path in symbols: | |
| 121 dirs['size'] += size | |
| 122 if path: | |
| 123 path = os.path.normpath(path) | |
| 124 if path.startswith('/'): | |
| 125 path = path[1:] | |
| 126 | |
| 127 parts = None | |
| 128 if path: | |
| 129 parts = path.split('/') | |
| 130 | |
| 131 if parts: | |
| 132 assert path | |
| 133 file_key = parts.pop() | |
| 134 tree = dirs | |
| 135 try: | |
| 136 # Traverse the tree to the parent of the file node, creating as needed | |
| 137 for part in parts: | |
| 138 assert part != '' | |
| 139 if part not in tree['children']: | |
| 140 tree['children'][part] = {'children': {}, 'size': 0} | |
| 141 tree = tree['children'][part] | |
| 142 tree['size'] += size | |
| 143 | |
| 144 # Get (creating if necessary) the node for the file | |
| 145 # This node doesn't have a 'children' attribute | |
| 146 if file_key not in tree['children']: | |
| 147 tree['children'][file_key] = {'sizes': collections.defaultdict(int), | |
| 148 'size': 0} | |
| 149 tree = tree['children'][file_key] | |
| 150 tree['size'] += size | |
| 151 | |
| 152 # Accumulate size into a bucket within the file | |
| 153 if 'vtable for ' in sym: | |
| 154 tree['sizes']['[vtable]'] += size | |
| 155 elif 'r' == type or 'R' == type: | |
|
bulach
2014/01/16 15:01:59
ok, let's leave the map for a v2, but it'd be simp
Andrew Hayden (chromium.org)
2014/01/16 15:13:00
Ha, uh duh, yes. Sorry :)
| |
| 156 tree['sizes']['[rodata]'] += size | |
| 157 elif 'd' == type or 'D' == type: | |
| 158 tree['sizes']['[data]'] += size | |
| 159 elif 'b' == type or 'B' == type: | |
| 160 tree['sizes']['[bss]'] += size | |
| 161 elif 't' == type or 'T' == type: | |
| 162 # 'text' in binary parlance means 'code'. | |
| 163 tree['sizes']['[code]'] += size | |
| 164 elif 'w' == type or 'W' == type: | |
| 165 tree['sizes']['[weak]'] += size | |
| 166 else: | |
| 167 tree['sizes']['[other]'] += size | |
| 168 except: | |
| 169 print >>sys.stderr, sym, parts, key | |
| 170 raise | |
| 171 else: | |
| 172 key = 'symbols without paths' | |
| 173 if key not in dirs['children']: | |
| 174 dirs['children'][key] = {'sizes': collections.defaultdict(int), | |
| 175 'size': 0} | |
| 176 tree = dirs['children'][key] | |
| 177 subkey = 'misc' | |
| 178 if (sym.endswith('::__FUNCTION__') or | |
| 179 sym.endswith('::__PRETTY_FUNCTION__')): | |
| 180 subkey = '__FUNCTION__' | |
| 181 elif sym.startswith('CSWTCH.'): | |
| 182 subkey = 'CSWTCH' | |
| 183 elif '::' in sym: | |
| 184 subkey = sym[0:sym.find('::') + 2] | |
| 185 tree['sizes'][subkey] = tree['sizes'].get(subkey, 0) + size | |
| 186 tree['size'] += size | |
| 187 return dirs | |
| 188 | |
| 189 | |
| 190 def JsonifyTree(tree, name): | |
| 191 """Convert TreeifySymbols output to a JSON treemap. | |
| 192 | |
| 193 The format is very similar, with the notable exceptions being | |
|
bulach
2014/01/16 15:01:59
nit: needs to be aligned with the """, i.e., inden
Andrew Hayden (chromium.org)
2014/01/16 15:13:00
Done.
| |
| 194 lists of children instead of maps and some different attribute names.""" | |
| 195 children = [] | |
| 196 if 'children' in tree: | |
| 197 # Non-leaf node. Recurse. | |
| 198 for child_name, child in tree['children'].iteritems(): | |
| 199 children.append(JsonifyTree(child, child_name)) | |
| 200 else: | |
| 201 # Leaf node; dump per-file stats as entries in the treemap | |
| 202 for kind, size in tree['sizes'].iteritems(): | |
| 203 child_json = {'name': kind + ' (' + FormatBytes(size) + ')', | |
| 204 'data': { '$area': size }} | |
| 205 css_class = { | |
|
bulach
2014/01/16 15:01:59
nit: may want to put this as a constant at the top
Andrew Hayden (chromium.org)
2014/01/16 15:13:00
Done.
| |
| 206 '[vtable]': 'vtable', | |
| 207 '[rodata]': 'read-only_data', | |
| 208 '[data]': 'data', | |
| 209 '[bss]': 'bss', | |
| 210 '[code]': 'code', | |
| 211 '[weak]': 'weak_symbol' | |
| 212 }.get(kind) | |
| 213 if css_class is not None: child_json['data']['$symbol'] = css_class | |
| 214 children.append(child_json) | |
| 215 # Sort children by size, largest to smallest. | |
| 216 children.sort(key=lambda child: -child['data']['$area']) | |
| 217 | |
| 218 # For leaf nodes, the 'size' attribute is the size of the leaf; | |
| 219 # Non-leaf nodes don't really have a size, but their 'size' attribute is | |
| 220 # the sum of the sizes of all their children. | |
| 221 return {'name': name + ' (' + FormatBytes(tree['size']) + ')', | |
| 222 'data': { '$area': tree['size'] }, | |
| 223 'children': children } | |
| 224 | |
| 225 | |
| 226 def DumpTreemap(symbols, outfile): | |
| 227 dirs = TreeifySymbols(symbols) | |
| 228 out = open(outfile, 'w') | |
| 229 try: | |
| 230 out.write('var kTree = ' + json.dumps(JsonifyTree(dirs, '/'))) | |
| 231 finally: | |
| 232 out.flush() | |
| 233 out.close() | |
| 234 | |
| 235 | |
| 236 def DumpLargestSymbols(symbols, outfile, n): | |
| 237 # a list of (sym, type, size, path); sort by size. | |
| 238 symbols = sorted(symbols, key=lambda x: -x[2]) | |
| 239 dumped = 0 | |
| 240 out = open(outfile, 'w') | |
| 241 try: | |
| 242 out.write('var largestSymbols = [\n') | |
| 243 for sym, type, size, path in symbols: | |
| 244 if type in ('b', 'w'): | |
| 245 continue # skip bss and weak symbols | |
| 246 if path is None: | |
| 247 path = '' | |
| 248 entry = {'size': FormatBytes(size), | |
| 249 'symbol': sym, | |
| 250 'type': SymbolTypeToHuman(type), | |
| 251 'location': path } | |
| 252 out.write(json.dumps(entry)) | |
| 253 out.write(',\n') | |
| 254 dumped += 1 | |
| 255 if dumped >= n: | |
| 256 return | |
| 257 finally: | |
| 258 out.write('];\n') | |
| 259 out.flush() | |
| 260 out.close() | |
| 261 | |
| 262 | |
| 263 def MakeSourceMap(symbols): | |
| 264 sources = {} | |
| 265 for sym, type, size, path in symbols: | |
| 266 key = None | |
| 267 if path: | |
| 268 key = os.path.normpath(path) | |
| 269 else: | |
| 270 key = '[no path]' | |
| 271 if key not in sources: | |
| 272 sources[key] = {'path': path, 'symbol_count': 0, 'size': 0} | |
| 273 record = sources[key] | |
| 274 record['size'] += size | |
| 275 record['symbol_count'] += 1 | |
| 276 return sources | |
| 277 | |
| 278 | |
| 279 def DumpLargestSources(symbols, outfile, n): | |
| 280 map = MakeSourceMap(symbols) | |
| 281 sources = sorted(map.values(), key=lambda x: -x['size']) | |
| 282 dumped = 0 | |
| 283 out = open(outfile, 'w') | |
| 284 try: | |
| 285 out.write('var largestSources = [\n') | |
| 286 for record in sources: | |
| 287 entry = {'size': FormatBytes(record['size']), | |
| 288 'symbol_count': str(record['symbol_count']), | |
| 289 'location': record['path']} | |
| 290 out.write(json.dumps(entry)) | |
| 291 out.write(',\n') | |
| 292 dumped += 1 | |
| 293 if dumped >= n: | |
| 294 return | |
| 295 finally: | |
| 296 out.write('];\n') | |
| 297 out.flush() | |
| 298 out.close() | |
| 299 | |
| 300 | |
| 301 def DumpLargestVTables(symbols, outfile, n): | |
| 302 vtables = [] | |
| 303 for symbol, type, size, path in symbols: | |
| 304 if 'vtable for ' in symbol: | |
| 305 vtables.append({'symbol': symbol, 'path': path, 'size': size}) | |
| 306 vtables = sorted(vtables, key=lambda x: -x['size']) | |
| 307 dumped = 0 | |
| 308 out = open(outfile, 'w') | |
| 309 try: | |
| 310 out.write('var largestVTables = [\n') | |
| 311 for record in vtables: | |
| 312 entry = {'size': FormatBytes(record['size']), | |
| 313 'symbol': record['symbol'], | |
| 314 'location': record['path']} | |
| 315 out.write(json.dumps(entry)) | |
| 316 out.write(',\n') | |
| 317 dumped += 1 | |
| 318 if dumped >= n: | |
| 319 return | |
| 320 finally: | |
| 321 out.write('];\n') | |
| 322 out.flush() | |
| 323 out.close() | |
| 324 | |
| 325 | |
| 326 def RunParallelAddress2Line(outfile, library, arch, jobs, verbose): | |
| 327 """Run a parallel addr2line processing engine to dump and resolve symbols.""" | |
| 328 out_dir = os.getenv('CHROMIUM_OUT_DIR', 'out') | |
| 329 build_type = os.getenv('BUILDTYPE', 'Release') | |
| 330 classpath = os.path.join(out_dir, build_type, 'lib.java', | |
| 331 'binary_size_java.jar') | |
| 332 cmd = ['java', | |
| 333 '-classpath', classpath, | |
| 334 'org.chromium.tools.binary_size.ParallelAddress2Line', | |
| 335 '--disambiguate', | |
| 336 '--outfile', outfile, | |
| 337 '--library', library, | |
| 338 '--threads', jobs] | |
| 339 if verbose is True: | |
| 340 cmd.append('--verbose') | |
| 341 prefix = os.path.join('third_party', 'android_tools', 'ndk', 'toolchains') | |
| 342 if arch == 'android-arm': | |
| 343 prefix = os.path.join(prefix, 'arm-linux-androideabi-4.7', 'prebuilt', | |
| 344 'linux-x86_64', 'bin', 'arm-linux-androideabi-') | |
| 345 cmd.extend(['--nm', prefix + 'nm', '--addr2line', prefix + 'addr2line']) | |
| 346 elif arch == 'android-mips': | |
| 347 prefix = os.path.join(prefix, 'mipsel-linux-android-4.7', 'prebuilt', | |
| 348 'linux-x86_64', 'bin', 'mipsel-linux-android-') | |
| 349 cmd.extend(['--nm', prefix + 'nm', '--addr2line', prefix + 'addr2line']) | |
| 350 elif arch == 'android-x86': | |
| 351 prefix = os.path.join(prefix, 'x86-4.7', 'prebuilt', | |
| 352 'linux-x86_64', 'bin', 'i686-linux-android-') | |
| 353 cmd.extend(['--nm', prefix + 'nm', '--addr2line', prefix + 'addr2line']) | |
| 354 # else, use whatever is in PATH (don't pass --nm or --addr2line) | |
| 355 | |
| 356 if verbose: | |
| 357 print cmd | |
| 358 | |
| 359 return_code = subprocess.call(cmd) | |
| 360 if return_code: | |
| 361 raise RuntimeError('Failed to run ParallelAddress2Line: returned ' + | |
| 362 str(return_code)) | |
| 363 | |
| 364 | |
| 365 def GetNmSymbols(infile, outfile, library, arch, jobs, verbose): | |
| 366 if infile is None: | |
| 367 if outfile is None: | |
| 368 infile = tempfile.NamedTemporaryFile(delete=False).name | |
| 369 else: | |
| 370 infile = outfile | |
| 371 | |
| 372 if verbose: | |
| 373 print 'Running parallel addr2line, dumping symbols to ' + infile; | |
| 374 RunParallelAddress2Line(outfile=infile, library=library, arch=arch, | |
| 375 jobs=jobs, verbose=verbose) | |
| 376 elif verbose: | |
| 377 print 'Using nm input from ' + infile | |
| 378 with file(infile, 'r') as infile: | |
| 379 return list(ParseNm(infile)) | |
| 380 | |
| 381 | |
| 382 def main(): | |
| 383 usage="""%prog [options] | |
| 384 | |
| 385 Runs a spatial analysis on a given library, looking up the source locations | |
| 386 of its symbols and calculating how much space each directory, source file, | |
| 387 and so on is taking. The result is a report that can be used to pinpoint | |
| 388 sources of large portions of the binary, etceteras. | |
| 389 | |
| 390 Under normal circumstances, you only need to pass two arguments, thusly: | |
| 391 | |
| 392 %prog --library /path/to/library --destdir /path/to/output | |
| 393 | |
| 394 In this mode, the program will dump the symbols from the specified library | |
| 395 and map those symbols back to source locations, producing a web-based | |
| 396 report in the specified output directory. | |
| 397 | |
| 398 Other options are available via '--help'. | |
| 399 """ | |
| 400 parser = optparse.OptionParser(usage=usage) | |
| 401 parser.add_option('--nm-in', metavar='PATH', | |
| 402 help='if specified, use nm input from <path> instead of ' | |
| 403 'generating it. Note that source locations should be ' | |
| 404 'present in the file; i.e., no addr2line symbol lookups ' | |
| 405 'will be performed when this option is specified. ' | |
| 406 'Mutually exclusive with --library.') | |
| 407 parser.add_option('--destdir', metavar='PATH', | |
| 408 help='write output to the specified directory. An HTML ' | |
| 409 'report is generated here along with supporting files; ' | |
| 410 'any existing report will be overwritten.') | |
| 411 parser.add_option('--library', metavar='PATH', | |
| 412 help='if specified, process symbols in the library at ' | |
| 413 'the specified path. Mutually exclusive with --nm-in.') | |
| 414 parser.add_option('--arch', | |
| 415 help='the architecture that the library is targeted to. ' | |
| 416 'Determines which nm/addr2line binaries are used. When ' | |
| 417 '\'host-native\' is chosen, the program will use whichever ' | |
| 418 'nm/addr2line binaries are on the PATH. This is ' | |
| 419 'appropriate when you are analyzing a binary by and for ' | |
| 420 'your computer. ' | |
| 421 'This argument is only valid when using --library. ' | |
| 422 'Default is \'host-native\'.', | |
| 423 choices=['host-native', 'android-arm', | |
| 424 'android-mips', 'android-x86'],) | |
| 425 parser.add_option('--jobs', | |
| 426 help='number of jobs to use for the parallel ' | |
| 427 'addr2line processing pool; defaults to 1. More ' | |
| 428 'jobs greatly improve throughput but eat RAM like ' | |
| 429 'popcorn, and take several gigabytes each. Start low ' | |
| 430 'and ramp this number up until your machine begins to ' | |
| 431 'struggle with RAM. ' | |
| 432 'This argument is only valid when using --library.') | |
| 433 parser.add_option('-v', dest='verbose', action='store_true', | |
| 434 help='be verbose, printing lots of status information.') | |
| 435 parser.add_option('--nm-out', metavar='PATH', | |
| 436 help='keep the nm output file, and store it at the ' | |
| 437 'specified path. This is useful if you want to see the ' | |
| 438 'fully processed nm output after the symbols have been ' | |
| 439 'mapped to source locations. By default, a tempfile is ' | |
| 440 'used and is deleted when the program terminates.' | |
| 441 'This argument is only valid when using --library.') | |
| 442 opts, args = parser.parse_args() | |
| 443 | |
| 444 if ((not opts.library) and (not opts.nm_in)) or (opts.library and opts.nm_in): | |
| 445 parser.error('exactly one of --library or --nm-in is required') | |
| 446 if (opts.nm_in): | |
| 447 if opts.jobs: | |
| 448 print >> sys.stderr, ('WARNING: --jobs has no effect ' | |
| 449 'when used with --nm-in') | |
| 450 if opts.arch: | |
| 451 print >> sys.stderr, ('WARNING: --arch has no effect ' | |
| 452 'when used with --nm-in') | |
| 453 if not opts.destdir: | |
| 454 parser.error('--destdir is required argument') | |
| 455 if not opts.jobs: | |
| 456 opts.jobs = '1' | |
| 457 if not opts.arch: | |
| 458 opts.arch = 'host-native' | |
| 459 | |
| 460 symbols = GetNmSymbols(opts.nm_in, opts.nm_out, opts.library, opts.arch, | |
| 461 opts.jobs, opts.verbose is True) | |
| 462 if not os.path.exists(opts.destdir): | |
| 463 os.makedirs(opts.destdir, 0755) | |
| 464 | |
| 465 DumpTreemap(symbols, os.path.join(opts.destdir, 'treemap-dump.js')) | |
| 466 DumpLargestSymbols(symbols, | |
| 467 os.path.join(opts.destdir, 'largest-symbols.js'), 100) | |
| 468 DumpLargestSources(symbols, | |
| 469 os.path.join(opts.destdir, 'largest-sources.js'), 100) | |
| 470 DumpLargestVTables(symbols, | |
| 471 os.path.join(opts.destdir, 'largest-vtables.js'), 100) | |
| 472 | |
| 473 # TODO(andrewhayden): Switch to D3 for greater flexibility | |
| 474 treemap_out = os.path.join(opts.destdir, 'webtreemap') | |
| 475 if not os.path.exists(treemap_out): | |
| 476 os.makedirs(treemap_out, 0755) | |
| 477 treemap_src = os.path.join('third_party', 'webtreemap', 'src', | |
| 478 'webtreemap-gh-pages') | |
| 479 shutil.copy(os.path.join(treemap_src, 'COPYING'), treemap_out) | |
| 480 shutil.copy(os.path.join(treemap_src, 'webtreemap.js'), treemap_out) | |
| 481 shutil.copy(os.path.join(treemap_src, 'webtreemap.css'), treemap_out) | |
| 482 shutil.copy(os.path.join('tools', 'binary_size', 'template', 'index.html'), | |
| 483 opts.destdir) | |
| 484 if opts.verbose: | |
| 485 print 'Report saved to ' + opts.destdir + '/index.html' | |
| 486 | |
| 487 | |
| 488 if __name__ == '__main__': | |
| 489 sys.exit(main()) | |
| OLD | NEW |