Index: tools/binary_size/run_binary_size_analysis.py |
diff --git a/tools/binary_size/run_binary_size_analysis.py b/tools/binary_size/run_binary_size_analysis.py |
new file mode 100755 |
index 0000000000000000000000000000000000000000..1870fc208f278dd5648e044fc9b09705115763b8 |
--- /dev/null |
+++ b/tools/binary_size/run_binary_size_analysis.py |
@@ -0,0 +1,514 @@ |
+#!/usr/bin/python |
+# Copyright 2014 The Chromium Authors. All rights reserved. |
+# Use of this source code is governed by a BSD-style license that can be |
+# found in the LICENSE file. |
+ |
+"""Generate a spatial analysis against an arbitrary library. |
+ |
+To use, build the 'binary_size_tool' target. Then run this tool, passing |
+in the location of the library to be analyzed along with any other options |
+you desire. |
+""" |
+ |
+import fileinput |
+import json |
+import optparse |
+import os |
+import pprint |
+import re |
+import shutil |
+import subprocess |
+import sys |
+import tempfile |
+ |
bulach
2014/01/10 11:23:24
nit: need another \n
Andrew Hayden (chromium.org)
2014/01/16 14:26:49
Done.
|
+def FormatBytes(bytes): |
+ """Pretty-print a number of bytes.""" |
+ if bytes > 1e6: |
+ bytes = bytes / 1.0e6 |
+ return '%.1fm' % bytes |
+ if bytes > 1e3: |
+ bytes = bytes / 1.0e3 |
+ return '%.1fk' % bytes |
+ return str(bytes) |
+ |
+ |
+def SymbolTypeToHuman(type): |
+ """Convert a symbol type as printed by nm into a human-readable name.""" |
+ return {'b': 'bss', |
+ 'd': 'data', |
+ 'r': 'read-only data', |
+ 't': 'code', |
+ 'w': 'weak symbol', |
+ 'v': 'weak symbol'}[type] |
+ |
+ |
+def ParseNm(input): |
+ """Parse nm output. |
+ |
+ Argument: an iterable over lines of nm output. |
+ |
+ Yields: (symbol name, symbol type, symbol size, source file path). |
+ Path may be None if nm couldn't figure out the source file. |
+ """ |
+ |
+ # Match lines with size, symbol, optional location, optional discriminator |
+ sym_re = re.compile(r'^[0-9a-f]{8} ' # address (8 hex digits) |
+ '([0-9a-f]{8}) ' # size (8 hex digits) |
+ '(.) ' # symbol type, one character |
+ '([^\t]+)' # symbol name, separated from next by tab |
+ '(?:\t(.*):[\d\?]+)?.*$') # location |
+ # Match lines with addr but no size. |
+ addr_re = re.compile(r'^[0-9a-f]{8} (.) ([^\t]+)(?:\t.*)?$') |
+ # Match lines that don't have an address at all -- typically external symbols. |
+ noaddr_re = re.compile(r'^ {8} (.) (.*)$') |
+ |
+ for line in input: |
+ line = line.rstrip() |
+ match = sym_re.match(line) |
+ if match: |
+ size, type, sym = match.groups()[0:3] |
+ size = int(size, 16) |
+ type = type.lower() |
+ if type == 'v': |
+ type = 'w' # just call them all weak |
+ if type == 'b': |
+ continue # skip all BSS for now |
+ path = match.group(4) |
+ yield sym, type, size, path |
+ continue |
+ match = addr_re.match(line) |
+ if match: |
+ type, sym = match.groups()[0:2] |
+ # No size == we don't care. |
+ continue |
+ match = noaddr_re.match(line) |
+ if match: |
+ type, sym = match.groups() |
+ if type in ('U', 'w'): |
+ # external or weak symbol |
+ continue |
+ |
+ print >>sys.stderr, 'unparsed:', repr(line) |
+ |
+ |
+def TreeifySymbols(symbols): |
+ """Convert symbols into a path-based tree, calculating size information |
+ along the way. |
+ |
+ The result is a dictionary that contains two kinds of nodes: |
+ 1. Leaf nodes, representing source code locations (e.g., c++ files) |
+ These nodes have the following dictionary entries: |
+ sizes: a dictionary whose keys are categories (such as code, data, |
+ vtable, etceteras) and whose values are the size, in bytes, of |
+ those categories; |
+ size: the total size, in bytes, of all the entries in the sizes dict |
+ 2. Non-leaf nodes, representing directories |
+ These nodes have the following dictionary entries: |
+ children: a dictionary whose keys are names (path entries; either |
+ directory or file names) and whose values are other nodes; |
+ size: the total size, in bytes, of all the leaf nodes that are |
+ contained within the children dict (recursively expanded) |
+ |
+ The result object is itself a dictionary that represents the common ancestor |
+ of all child nodes, e.g. a path to which all other nodes beneath it are |
+ relative. The 'size' attribute of this dict yields the sum of the size of all |
+ leaf nodes within the data structure. |
+ """ |
+ dirs = {'children': {}, 'size': 0} |
+ for sym, type, size, path in symbols: |
+ dirs['size'] += size |
+ if path: |
+ path = os.path.normpath(path) |
+ if path.startswith('/'): |
+ path = path[1:] |
+ |
+ parts = None |
+ if path: |
+ parts = path.split('/') |
+ |
+ if parts: |
+ assert path |
+ fileKey = parts.pop() |
+ tree = dirs |
+ try: |
+ # Traverse the tree to the parent of the file node, creating as needed |
+ for part in parts: |
+ assert part != '' |
+ if part not in tree['children']: |
+ tree['children'][part] = {'children': {}, 'size': 0} |
+ tree = tree['children'][part] |
+ tree['size'] += size |
+ |
+ # Get (creating if necessary) the node for the file |
+ # This node doesn't have a 'children' attribute |
+ if fileKey not in tree['children']: |
bulach
2014/01/10 11:23:24
nit: file_key
Andrew Hayden (chromium.org)
2014/01/16 14:26:49
Done.
|
+ tree['children'][fileKey] = {'sizes': {}, 'size': 0} |
bulach
2014/01/10 11:23:24
nit: would be simpler with "import collections" ab
Andrew Hayden (chromium.org)
2014/01/16 14:26:49
I had never heard of defaultdict, but yes, this ma
|
+ tree = tree['children'][fileKey] |
+ tree['size'] += size |
+ |
+ # Accumulate size into a bucket within the file |
+ if 'vtable for ' in sym: |
+ if not '[vtable]' in tree['sizes']: |
+ tree['sizes']['[vtable]'] = 0 |
+ tree['sizes']['[vtable]'] += size |
+ elif 'r' == type or 'R' == type: |
+ if not '[rodata]' in tree['sizes']: |
+ tree['sizes']['[rodata]'] = 0 |
+ tree['sizes']['[rodata]'] += size |
+ elif 'd' == type or 'D' == type: |
+ if not '[data]' in tree['sizes']: |
+ tree['sizes']['[data]'] = 0 |
+ tree['sizes']['[data]'] += size |
+ elif 'b' == type or 'B' == type: |
+ if not '[bss]' in tree['sizes']: |
+ tree['sizes']['[bss]'] = 0 |
+ tree['sizes']['[bss]'] += size |
+ elif 't' == type or 'T' == type: |
+ # 'text' in binary parlance means 'code'. |
+ if not '[code]' in tree['sizes']: |
+ tree['sizes']['[code]'] = 0 |
+ tree['sizes']['[code]'] += size |
+ elif 'w' == type or 'W' == type: |
+ if not '[weak]' in tree['sizes']: |
+ tree['sizes']['[weak]'] = 0 |
+ tree['sizes']['[weak]'] += size |
+ else: |
+ if not '[other]' in tree['sizes']: |
+ tree['sizes']['[other]'] = 0 |
+ tree['sizes']['[other]'] += size |
+ except: |
+ print >>sys.stderr, sym, parts, key |
+ raise |
+ else: |
+ key = 'symbols without paths' |
+ if key not in dirs['children']: |
+ dirs['children'][key] = {'sizes': {}, 'size': 0} |
+ tree = dirs['children'][key] |
+ subkey = 'misc' |
+ if (sym.endswith('::__FUNCTION__') or |
+ sym.endswith('::__PRETTY_FUNCTION__')): |
+ subkey = '__FUNCTION__' |
+ elif sym.startswith('CSWTCH.'): |
+ subkey = 'CSWTCH' |
+ elif '::' in sym: |
+ subkey = sym[0:sym.find('::') + 2] |
+ tree['sizes'][subkey] = tree['sizes'].get(subkey, 0) + size |
+ tree['size'] += size |
+ return dirs |
+ |
+ |
+def JsonifyTree(tree, name): |
+ """ Convert the output of TreeifySymbols to a format suitable for a |
bulach
2014/01/10 11:23:24
nit: (g)pylint would complain, the first line of t
Andrew Hayden (chromium.org)
2014/01/16 14:26:49
Done.
|
+JSON treemap. The format is very similar, with the notable exceptions being |
+lists of children instead of maps, and some different attribute names.""" |
+ children = [] |
+ if 'children' in tree: |
+ # Non-leaf node. Recurse. |
+ for childName, child in tree['children'].iteritems(): |
bulach
2014/01/10 11:23:24
nit: child_name
Andrew Hayden (chromium.org)
2014/01/16 14:26:49
Done.
|
+ children.append(JsonifyTree(child, childName)) |
+ else: |
+ # Leaf node; dump per-file stats as entries in the treemap |
+ for kind, size in tree['sizes'].iteritems(): |
+ childJson = {'name': kind + ' (' + FormatBytes(size) + ')', |
bulach
2014/01/10 11:23:24
nit: child_json
Andrew Hayden (chromium.org)
2014/01/16 14:26:49
Done.
|
+ 'data': { '$area': size }} |
+ cssClass = None |
bulach
2014/01/10 11:23:24
nit: css_class, but again, this would be simpler w
Andrew Hayden (chromium.org)
2014/01/16 14:26:49
Done.
|
+ if kind == '[vtable]': cssClass='vtable' |
+ elif kind == '[rodata]': cssClass='read-only_data' |
+ elif kind == '[data]': cssClass='data' |
+ elif kind == '[bss]': cssClass='bss' |
+ elif kind == '[code]': cssClass='code' |
+ elif kind == '[weak]': cssClass='weak_symbol' |
+ if cssClass is not None: childJson['data']['$symbol'] = cssClass |
+ children.append(childJson) |
+ # Sort children by size, largest to smallest. |
+ children.sort(key=lambda child: -child['data']['$area']) |
+ |
+ # For leaf nodes, the 'size' attribute is the size of the leaf; |
+ # Non-leaf nodes don't really have a size, but their 'size' attribute is |
+ # the sum of the sizes of all their children. |
+ return {'name': name + ' (' + FormatBytes(tree['size']) + ')', |
+ 'data': { '$area': tree['size'] }, |
+ 'children': children } |
+ |
+ |
+def DumpTreemap(symbols, outfile): |
+ dirs = TreeifySymbols(symbols) |
+ out = open(outfile, 'w') |
+ try: |
+ out.write('var kTree = ' + json.dumps(JsonifyTree(dirs, '/'))) |
+ finally: |
+ out.flush() |
+ out.close() |
+ |
+ |
+def DumpLargestSymbols(symbols, outfile, n): |
+ # a list of (sym, type, size, path); sort by size. |
+ symbols = sorted(symbols, key=lambda x: -x[2]) |
+ dumped = 0 |
+ out = open(outfile, 'w') |
+ try: |
+ out.write('var largestSymbols = [\n') |
+ for sym, type, size, path in symbols: |
+ if type in ('b', 'w'): |
+ continue # skip bss and weak symbols |
+ if path is None: |
+ path = '' |
+ entry = {'size': FormatBytes(size), |
+ 'symbol': sym, |
+ 'type': SymbolTypeToHuman(type), |
+ 'location': path } |
+ out.write(json.dumps(entry)) |
+ out.write(',\n') |
+ dumped += 1 |
+ if dumped >= n: |
+ return |
+ finally: |
+ out.write('];\n') |
+ out.flush() |
+ out.close() |
+ |
+ |
+def MakeSourceMap(symbols): |
+ sources = {} |
+ for sym, type, size, path in symbols: |
+ key = None |
+ if path: |
+ key = os.path.normpath(path) |
+ else: |
+ key = '[no path]' |
+ if key not in sources: |
+ sources[key] = {'path': path, 'symbol_count': 0, 'size': 0} |
+ record = sources[key] |
+ record['size'] += size |
+ record['symbol_count'] += 1 |
+ return sources |
+ |
+ |
+def DumpLargestSources(symbols, outfile, n): |
+ map = MakeSourceMap(symbols) |
+ sources = sorted(map.values(), key=lambda x: -x['size']) |
+ dumped = 0 |
+ out = open(outfile, 'w') |
+ try: |
+ out.write('var largestSources = [\n') |
+ for record in sources: |
+ entry = {'size': FormatBytes(record['size']), |
+ 'symbol_count': str(record['symbol_count']), |
+ 'location': record['path']} |
+ out.write(json.dumps(entry)) |
+ out.write(',\n') |
+ dumped += 1 |
+ if dumped >= n: |
+ return |
+ finally: |
+ out.write('];\n') |
+ out.flush() |
+ out.close() |
+ |
+ |
+def DumpLargestVTables(symbols, outfile, n): |
+ vtables = [] |
+ for symbol, type, size, path in symbols: |
+ if 'vtable for ' in symbol: |
+ vtables.append({'symbol': symbol, 'path': path, 'size': size}) |
+ vtables = sorted(vtables, key=lambda x: -x['size']) |
+ dumped = 0 |
+ out = open(outfile, 'w') |
+ try: |
+ out.write('var largestVTables = [\n') |
+ for record in vtables: |
+ entry = {'size': FormatBytes(record['size']), |
+ 'symbol': record['symbol'], |
+ 'location': record['path']} |
+ out.write(json.dumps(entry)) |
+ out.write(',\n') |
+ dumped += 1 |
+ if dumped >= n: |
+ return |
+ finally: |
+ out.write('];\n') |
+ out.flush() |
+ out.close() |
+ |
+ |
+def RunPA2L(outfile, library, arch, threads, verbose): |
bulach
2014/01/10 11:23:24
nit: perhaps RunParallelAddress2Line would be clea
Andrew Hayden (chromium.org)
2014/01/16 14:26:49
Done.
|
+ """Run a parallel addr2line processing engine to dump and resolve symbols""" |
+ out_dir = os.getenv('CHROMIUM_OUT_DIR', 'out') |
+ build_type = os.getenv('BUILDTYPE', 'Release') |
+ classpath = os.path.join(out_dir, build_type, 'lib.java', |
+ 'binary_size_java.jar') |
+ cmd = ['java', |
+ '-classpath', classpath, |
+ 'org.chromium.tools.binary_size.ParallelAddress2Line', |
+ '--disambiguate', |
+ '--outfile', outfile, |
+ '--library', library, |
+ '--threads', threads] |
+ if verbose is True: |
+ cmd.append('--verbose') |
+ prefix = os.path.join('third_party', 'android_tools', 'ndk', 'toolchains') |
+ if arch == 'android-arm': |
+ prefix = os.path.join(prefix, 'arm-linux-androideabi-4.7', 'prebuilt', |
+ 'linux-x86_64', 'bin', 'arm-linux-androideabi-') |
+ cmd.extend(['--nm', prefix + 'nm', '--addr2line', prefix + 'addr2line']) |
+ elif arch == 'android-mips': |
+ prefix = os.path.join(prefix, 'mipsel-linux-android-4.7', 'prebuilt', |
+ 'linux-x86_64', 'bin', 'mipsel-linux-android-') |
+ cmd.extend(['--nm', prefix + 'nm', '--addr2line', prefix + 'addr2line']) |
+ elif arch == 'android-x86': |
+ prefix = os.path.join(prefix, 'x86-4.7', 'prebuilt', |
+ 'linux-x86_64', 'bin', 'i686-linux-android-') |
+ cmd.extend(['--nm', prefix + 'nm', '--addr2line', prefix + 'addr2line']) |
+ # else, use whatever is in PATH (don't pass --nm or --addr2line) |
+ |
+ if verbose: |
+ print cmd |
+ |
+ return_code = subprocess.call(cmd) |
+ if return_code: |
+ raise RuntimeError('Failed to run ParallelAddress2Line: returned ' + |
+ str(return_code)) |
+ |
+ |
+def GetNmSymbols(infile, outfile, library, arch, threads, verbose): |
+ if infile is None: |
+ if outfile is None: |
+ infile = tempfile.NamedTemporaryFile(delete=False).name |
+ else: |
+ infile = outfile |
+ |
+ if verbose: |
+ print 'Running parallel addr2line, dumping symbols to ' + infile; |
+ RunPA2L(outfile=infile, library=library, arch=arch, |
+ threads=threads, verbose=verbose) |
+ elif verbose: |
+ print 'Using nm input from ' + infile |
+ with file(infile, 'r') as infile: |
+ return list(ParseNm(infile)) |
+ |
+ |
+def main(): |
+ usage="""%prog [options] |
+ |
+ Runs a spatial analysis on a given library, looking up the source locations |
+ of its symbols and calculating how much space each directory, source file, |
+ and so on is taking. The result is a report that can be used to pinpoint |
+ sources of large portions of the binary, etceteras. |
+ |
+ Under normal circumstances, you only need to pass two arguments, thusly: |
+ |
+ %prog --library /path/to/library --destdir /path/to/output |
+ |
+ In this mode, the program will dump the symbols from the specified library |
+ and map those symbols back to source locations, producing a web-based |
+ report in the specified output directory. |
+ |
+ Other options are available via '--help'. |
+ """ |
+ parser = optparse.OptionParser(usage=usage) |
+ parser.add_option('--nm-in', dest='nm_in', metavar='PATH', |
bulach
2014/01/10 11:23:24
nit: dest is automatically computed out of name, n
Andrew Hayden (chromium.org)
2014/01/16 14:26:49
Done.
|
+ help='if specified, use nm input from <path> instead of ' |
+ 'generating it. Note that source locations should be ' |
+ 'present in the file; i.e., no addr2line symbol lookups ' |
+ 'will be performed when this option is specified. ' |
+ 'Mutually exclusive with --library.') |
+ parser.add_option('--destdir', metavar='PATH', |
+ help='write output to the specified directory. An HTML ' |
+ 'report is generated here along with supporting files; ' |
+ 'any existing report will be overwritten.') |
+ parser.add_option('--library', metavar='PATH', |
+ help='if specified, process symbols in the library at ' |
+ 'the specified path. Mutually exclusive with --nm-in.') |
+ parser.add_option('--arch', |
+ help='the architecture that the library is targeted to. ' |
+ 'Currently supports the following: ' |
+ 'host-native, android-arm, android-mips, android-x86.' |
+ 'the default is host-native. This determines ' |
+ 'what nm/addr2line binaries are used. When host-native ' |
+ 'is chosen (the default), the program will use whichever ' |
+ 'nm/addr2line binaries are on the PATH. This is ' |
+ 'appropriate when you are analyzing a binary by and for ' |
+ 'your computer. ' |
+ 'This argument is only valid when using --library.') |
bulach
2014/01/10 11:23:24
nit: default='host-native'
also, could do with ch
Andrew Hayden (chromium.org)
2014/01/16 14:26:49
We don't just use the default because I emit warni
|
+ parser.add_option('--pa2l-threads', dest='threads', |
bulach
2014/01/10 11:23:24
nit: perhaps just --jobs ? I think that's more inl
Andrew Hayden (chromium.org)
2014/01/16 14:26:49
Same discussion about defaults in the face of --nm
|
+ help='number of threads to use for the parallel ' |
+ 'addr2line processing pool; defaults to 1. More ' |
+ 'threads greatly improve throughput but eat RAM like ' |
+ 'popcorn, and take several gigabytes each. Start low ' |
+ 'and ramp this number up until your machine begins to ' |
+ 'struggle with RAM. ' |
+ 'This argument is only valid when using --library.') |
+ parser.add_option('-v', dest='verbose', action='store_true', |
+ help='be verbose, printing lots of status information.') |
+ parser.add_option('--nm-out', dest='nm_out', |
bulach
2014/01/10 11:23:24
nit: remove dest
Andrew Hayden (chromium.org)
2014/01/16 14:26:49
Done.
|
+ help='keep the nm output file, and store it at the ' |
+ 'specified path. This is useful if you want to see the ' |
+ 'fully processed nm output after the symbols have been ' |
+ 'mapped to source locations. By default, a tempfile is ' |
+ 'used and is deleted when the program terminates.' |
+ 'This argument is only valid when using --library.') |
+ opts, args = parser.parse_args() |
+ |
+ if ((not opts.library) and (not opts.nm_in)) or (opts.library and opts.nm_in): |
+ parser.error('exactly one of --library or --nm-in is required') |
+ if (opts.nm_in): |
+ if opts.threads: |
+ print >> sys.stderr, ('WARNING: --pa2l-threads has no effect ' |
+ 'when used with --nm-in') |
+ if opts.arch: |
+ print >> sys.stderr, ('WARNING: --arch has no effect ' |
+ 'when used with --nm-in') |
+ if not opts.destdir: |
+ parser.error('--destdir is required argument') |
+ if not opts.threads: |
+ opts.threads = 1 |
+ if not opts.arch: |
+ opts.arch = 'host-native' |
bulach
2014/01/10 11:23:24
nit: as above, remove the 463-466..
Andrew Hayden (chromium.org)
2014/01/16 14:26:49
I don't know how to do this easily while retaining
|
+ |
+ if opts.arch not in ['host-native', 'android-arm', |
+ 'android-mips', 'android-x86']: |
+ parser.error('arch must be one of ' |
+ '[host-native,android-arm,android-mips,android-x86]') |
bulach
2014/01/10 11:23:24
as above, make it a choices
Andrew Hayden (chromium.org)
2014/01/16 14:26:49
Done.
|
+ |
+ symbols = GetNmSymbols(opts.nm_in, opts.nm_out, opts.library, opts.arch, |
+ opts.threads, opts.verbose is True) |
+ if not os.path.exists(opts.destdir): |
+ os.makedirs(opts.destdir, 0755) |
+ DumpTreemap(symbols, os.path.join(opts.destdir, 'treemap-dump.js')) |
+ DumpLargestSymbols(symbols, |
+ os.path.join(opts.destdir, 'largest-symbols.js'), 100) |
+ DumpLargestSources(symbols, |
+ os.path.join(opts.destdir, 'largest-sources.js'), 100) |
+ DumpLargestVTables(symbols, |
+ os.path.join(opts.destdir, 'largest-vtables.js'), 100) |
+ |
+ # TODO(andrewhayden): Switch to d3 and/or mirror webtreemap project |
+ if not os.path.exists(os.path.join(opts.destdir, 'webtreemap.js')): |
+ url = 'https://github.com/martine/webtreemap/archive/gh-pages.zip' |
+ tmpdir = tempfile.mkdtemp('binary_size') |
+ zipfile = os.path.join(tmpdir, 'webtreemap.zip') |
+ try: |
+ cmd = ['wget', '-O', zipfile, url] |
+ return_code = subprocess.call(cmd) |
+ if return_code: |
+ raise RuntimeError('Failed to download: returned ' + str(return_code)) |
+ cmd = ['unzip', '-o', zipfile, '-d', tmpdir] |
+ return_code = subprocess.call(cmd) |
+ if return_code: |
+ raise RuntimeError('Failed to unzip: returned ' + str(return_code)) |
+ shutil.move(os.path.join(tmpdir, 'webtreemap-gh-pages', 'COPYING'), |
+ opts.destdir) |
+ shutil.move(os.path.join(tmpdir, 'webtreemap-gh-pages', 'webtreemap.js'), |
+ opts.destdir) |
+ shutil.move(os.path.join(tmpdir, 'webtreemap-gh-pages', 'webtreemap.css'), |
+ opts.destdir) |
+ finally: |
+ shutil.rmtree(tmpdir, ignore_errors=True) |
+ shutil.copy(os.path.join('tools', 'binary_size', 'template', 'index.html'), |
+ opts.destdir) |
+ if opts.verbose: |
+ print 'Report saved to ' + opts.destdir + '/index.html' |
+ |
+ |
+if __name__ == '__main__': |
+ sys.exit(main()) |