Chromium Code Reviews| Index: tools/binary_size/run_binary_size_analysis.py |
| diff --git a/tools/binary_size/run_binary_size_analysis.py b/tools/binary_size/run_binary_size_analysis.py |
| new file mode 100755 |
| index 0000000000000000000000000000000000000000..cd44225a1a90479faef8cbed6ca09b2817725bb5 |
| --- /dev/null |
| +++ b/tools/binary_size/run_binary_size_analysis.py |
| @@ -0,0 +1,338 @@ |
| +#!/usr/bin/python |
| +# Copyright 2014 The Chromium Authors. All rights reserved. |
| +# Use of this source code is governed by a BSD-style license that can be |
| +# found in the LICENSE file. |
| + |
| +"""Generate a spatial analysis against an arbitrary library. |
| + |
| +To use, build the 'binary_size_java' target. Then run this tool, passing |
| +in the location of the library to be analyzed along with any other options |
| +you desire. |
| +""" |
| + |
| +import fileinput |
| +import optparse |
| +import os |
| +import pprint |
| +import re |
| +import shutil |
| +import subprocess |
| +import sys |
| +import tempfile |
| +import json |
| + |
| +def format_bytes(bytes): |
| + """Pretty-print a number of bytes.""" |
| + if bytes > 1e6: |
| + bytes = bytes / 1.0e6 |
| + return '%.1fm' % bytes |
| + if bytes > 1e3: |
| + bytes = bytes / 1.0e3 |
| + return '%.1fk' % bytes |
| + return str(bytes) |
| + |
| + |
| +def parse_nm(input): |
|
bulach
2014/01/07 19:38:30
not sure, is this needed?
Andrew Hayden (chromium.org)
2014/01/08 00:56:45
Yes, the bloat script parses nm to convert the str
bulach
2014/01/08 15:04:00
got it... there's some hidden irony in that whilst
|
| + """Parse nm output. |
| + |
| + Argument: an iterable over lines of nm output. |
| + |
| + Yields: (symbol name, symbol type, symbol size, source file path). |
| + Path may be None if nm couldn't figure out the source file. |
| + """ |
| + |
| + # Match lines with size, symbol, optional location, optional discriminator |
| + sym_re = re.compile(r'^[0-9a-f]{8} ([0-9a-f]{8}) (.) ([^\t]+)(?:\t(.*):[\d\?]+)?.*$') |
| + |
| + # Match lines with addr but no size. |
| + addr_re = re.compile(r'^[0-9a-f]{8} (.) ([^\t]+)(?:\t.*)?$') |
| + # Match lines that don't have an address at all -- typically external symbols. |
| + noaddr_re = re.compile(r'^ {8} (.) (.*)$') |
| + |
| + for line in input: |
| + line = line.rstrip() |
| + match = sym_re.match(line) |
| + if match: |
| + size, type, sym = match.groups()[0:3] |
| + size = int(size, 16) |
| + type = type.lower() |
| + if type == 'v': |
| + type = 'w' # just call them all weak |
| + if type == 'b': |
| + continue # skip all BSS for now |
| + path = match.group(4) |
| + yield sym, type, size, path |
| + continue |
| + match = addr_re.match(line) |
| + if match: |
| + type, sym = match.groups()[0:2] |
| + # No size == we don't care. |
| + continue |
| + match = noaddr_re.match(line) |
| + if match: |
| + type, sym = match.groups() |
| + if type in ('U', 'w'): |
| + # external or weak symbol |
| + continue |
| + |
| + print >>sys.stderr, 'unparsed:', repr(line) |
| + |
| + |
| +def treeify_syms(symbols): |
| + dirs = {} |
| + for sym, type, size, path in symbols: |
| + if path: |
| + path = os.path.normpath(path) |
| + if path.startswith('/usr/include'): |
| + path = path.replace('/usr/include', 'usrinclude') |
| + elif path.startswith('/'): |
| + path = path[1:] |
| + |
| + parts = None |
| + # TODO: make segmenting by namespace work. |
| + if False and '::' in sym: |
| + if sym.startswith('vtable for '): |
| + sym = sym[len('vtable for '):] |
| + parts = sym.split('::') |
| + parts.append('[vtable]') |
| + else: |
| + parts = sym.split('::') |
| + parts[0] = '::' + parts[0] |
| + elif path and '/' in path: |
| + parts = path.split('/') |
| + |
| + if parts: |
| + key = parts.pop() |
| + tree = dirs |
| + try: |
| + for part in parts: |
| + assert part != '', path |
| + if part not in tree: |
| + tree[part] = {} |
| + tree = tree[part] |
| + tree[key] = tree.get(key, 0) + size |
| + except: |
| + print >>sys.stderr, sym, parts, key |
| + raise |
| + else: |
| + key = 'symbols without paths' |
| + if key not in dirs: |
| + dirs[key] = {} |
| + tree = dirs[key] |
| + subkey = 'misc' |
| + if (sym.endswith('::__FUNCTION__') or |
| + sym.endswith('::__PRETTY_FUNCTION__')): |
| + subkey = '__FUNCTION__' |
| + elif sym.startswith('CSWTCH.'): |
| + subkey = 'CSWTCH' |
| + elif '::' in sym: |
| + subkey = sym[0:sym.find('::') + 2] |
| + #else: |
| + # print >>sys.stderr, 'unbucketed (no path?):', sym, type, size, path |
| + tree[subkey] = tree.get(subkey, 0) + size |
| + return dirs |
| + |
| + |
| +def jsonify_tree(tree, name): |
| + children = [] |
| + total = 0 |
| + files = 0 |
| + |
| + for key, val in tree.iteritems(): |
| + if isinstance(val, dict): |
| + subtree = jsonify_tree(val, key) |
| + total += subtree['data']['$area'] |
| + children.append(subtree) |
| + else: |
| + total += val |
| + children.append({ |
| + 'name': key + ' ' + format_bytes(val), |
| + 'data': { '$area': val } |
| + }) |
| + |
| + children.sort(key=lambda child: -child['data']['$area']) |
| + |
| + return { |
| + 'name': name + ' ' + format_bytes(total), |
| + 'data': { |
| + '$area': total, |
| + }, |
| + 'children': children, |
| + } |
| + |
| + |
| +def dump_nm(infile, outfile): |
| + dirs = treeify_syms(parse_nm(infile)) |
| + out = sys.stdout |
| + if outfile is not None: |
| + out = open(outfile, 'w') |
| + out.write('var kTree = ' + json.dumps(jsonify_tree(dirs, '/'), indent=2)) |
| + out.flush() |
| + if outfile is not None: |
| + out.close() |
| + |
| + |
| +def run_pa2l(outfile, library, arch, threads, verbose=False): |
| + """Run a parallel addr2line processing engine to dump and resolve symbols""" |
| + out_dir = os.getenv('CHROMIUM_OUT_DIR', 'out') |
| + buildtype = os.getenv('BUILDTYPE', 'Release') |
| + classpath = out_dir + '/' + buildtype + '/lib.java/binary_size_java.jar' |
| + cmd = ['java', |
| + '-classpath', classpath, |
| + 'org.chromium.tools.binary_size.ParallelAddress2Line', |
| + '--disambiguate', |
| + '--outfile', outfile, |
| + '--library', library, |
| + '--threads', threads] |
| + if verbose is True: |
| + cmd.append('--verbose') |
| + if arch == 'android-arm': |
| + cmd.extend([ |
| + '--nm', 'third_party/android_tools/ndk/toolchains/arm-linux-androideabi-4.7/prebuilt/linux-x86_64/bin/arm-linux-androideabi-nm', |
| + '--addr2line', 'third_party/android_tools/ndk/toolchains/arm-linux-androideabi-4.7/prebuilt/linux-x86_64/bin/arm-linux-androideabi-addr2line', |
| + ]) |
| + elif arch == 'android-mips': |
| + cmd.extend([ |
| + '--nm', 'third_party/android_tools/ndk/toolchains/mipsel-linux-android-4.7/prebuilt/linux-x86_64/bin/mipsel-linux-android-nm', |
| + '--addr2line', 'third_party/android_tools/ndk/toolchains/mipsel-linux-android-4.7/prebuilt/linux-x86_64/bin/mipsel-linux-android-addr2line', |
| + ]) |
| + elif arch == 'android-x86': |
| + cmd.extend([ |
| + '--nm', 'third_party/android_tools/ndk/toolchains/x86-4.7/prebuilt/linux-x86_64/bin/i686-linux-android-nm' |
| + '--addr2line', 'third_party/android_tools/ndk/toolchains/x86-4.7/prebuilt/linux-x86_64/bin/i686-linux-android-addr2line', |
| + ]) |
| + # else, use whatever is in PATH (don't pass --nm or --addr2line) |
| + |
| + if verbose: |
| + print cmd |
| + |
| + return_code = subprocess.call(cmd) |
| + if return_code: |
| + raise RuntimeError('Failed to run ParallelAddress2Line: returned ' + str(return_code)) |
| + |
| +usage="""%prog [options] |
| + |
| +Runs a spatial analysis on a given library, looking up the source locations of |
| +its symbols and calculating how much space each directory, source file, and so |
| +on is taking. The result is a report that can be used to pinpoint sources of |
| +large portions of the binary, etceteras. |
| + |
| +Under normal circumstances, you only need to pass two arguments, thusly: |
| + |
| + %prog --library /path/to/library --destdir /path/to/output |
| + |
| +In this mode, the program will dump the symbols from the specified library and |
| +map those symbols back to source locations, producing a web-based report in the |
| +specified output directory. |
| + |
| +Other options are available via '--help'. |
| +""" |
| +parser = optparse.OptionParser(usage=usage) |
| +parser.add_option('--nm-in', dest='nm_in', metavar='PATH', |
| + help='if specified, use nm input from <path> instead of ' |
| + 'generating it. Note that source locations should be present ' |
| + 'in the file; i.e., no addr2line symbol lookups will be ' |
| + 'performed when this option is specified. Mutually exclusive ' |
| + 'with --library.') |
| +parser.add_option('--destdir', metavar='PATH', |
| + help='write output to the specified directory. An HTML ' |
| + 'report is generated here along with supporting files; any ' |
| + 'existing report will be overwritten.') |
| +parser.add_option('--library', metavar='PATH', |
| + help='if specified, process symbols in the library at the ' |
| + 'specified path. Mutually exclusive with --nm-in.') |
| +parser.add_option('--arch', |
| + help='the architecture that the library is targeted to. ' |
| + 'Currently supports the following: ' |
| + 'host-native, android-arm, android-mips, android-x86.' |
| + 'the default is host-native. This determines ' |
| + 'what nm/addr2line binaries are used. When host-native is ' |
| + 'chosen (the default), the program will use whichever ' |
| + 'nm/addr2line binaries are on the PATH. This is appropriate ' |
| + 'when you are analyzing a binary by and for your computer. ' |
| + 'This argument is only valid when using --library.') |
| +parser.add_option('--pa2l-threads', dest='threads', |
| + help='number of threads to use for the parallel addr2line ' |
| + 'processing pool; defaults to 1. More threads greatly ' |
| + 'improve throughput but eat RAM like popcorn, and take ' |
| + 'several gigabytes each. Start low and ramp this number up ' |
| + 'until your machine begins to struggle with RAM.' |
| + 'This argument is only valid when using --library.') |
| +parser.add_option('-v', dest='verbose', action='store_true', |
| + help='be verbose, printing lots of status information.') |
| +parser.add_option('--nm-out', dest='nm_out', |
| + help='keep the nm output file, and store it at the specified ' |
| + 'path. This is useful if you want to see the fully processed ' |
| + 'nm output after the symbols have been mapped to source ' |
| + 'locations. By default, a tempfile is used and is deleted ' |
| + 'when the program terminates.' |
| + 'This argument is only valid when using --library.') |
| +opts, args = parser.parse_args() |
| + |
| +if ((not opts.library) and (not opts.nm_in)) or (opts.library and opts.nm_in): |
| + parser.error('exactly one of --library or --nm-in is required') |
| +if (opts.nm_in): |
| + if opts.threads: |
| + print >> sys.stderr, ('WARNING: --pa2l-threads has no effect ' |
| + 'when used with --nm-in') |
| + if opts.arch: |
| + print >> sys.stderr, ('WARNING: --arch has no effect ' |
| + 'when used with --nm-in') |
| +if not opts.destdir: |
| + parser.error('--destdir is required argument') |
| +if not opts.threads: |
| + opts.threads = 1 |
| +if not opts.arch: |
| + opts.arch = 'host-native' |
| + |
| +if opts.arch not in ['host-native', 'android-arm', |
| + 'android-mips', 'android-x86']: |
| + parser.error('arch must be one of ' |
| + '[host-native,android-arm,android-mips,android-x86]') |
| + |
| +nm_in = opts.nm_in |
| +temp_file = None |
| +if nm_in is None: |
| + if opts.nm_out is None: |
| + temp_file = tempfile.NamedTemporaryFile(prefix='binary_size_nm', delete=False) |
| + nm_in = temp_file.name |
| + else: |
| + nm_in = opts.nm_out |
| + |
| + if opts.verbose: |
| + print 'Running parallel addr2line, dumping symbols to ' + nm_in; |
| + run_pa2l(outfile=nm_in, |
| + library=opts.library, |
| + arch=opts.arch, |
| + threads=opts.threads, |
| + verbose=(opts.verbose is True)) |
| +elif opts.verbose: |
| + print 'Using nm input from ' + nm_in |
| + |
| +if not os.path.exists(opts.destdir): |
| + os.makedirs(opts.destdir, 0755) |
| + |
| +jspath = opts.destdir + '/treemap-dump.js' |
| +nmfile = open(nm_in, 'r') |
| +dump_nm(nmfile, jspath) |
| +if not os.path.exists(opts.destdir + '/webtreemap.js'): |
| + url = 'https://github.com/martine/webtreemap/archive/gh-pages.zip' |
| + tmpdir = tempfile.mkdtemp('binary_size') |
| + try: |
| + cmd = ['wget', '-O', tmpdir + '/webtreemap.zip', url] |
| + return_code = subprocess.call(cmd) |
| + if return_code: |
| + raise RuntimeError('Failed to download: returned ' + str(return_code)) |
| + cmd = ['unzip', '-o', tmpdir + '/webtreemap.zip', '-d', tmpdir] |
| + return_code = subprocess.call(cmd) |
| + if return_code: |
| + raise RuntimeError('Failed to unzip: returned ' + str(return_code)) |
| + |
| + shutil.move(tmpdir + '/webtreemap-gh-pages/COPYING', opts.destdir) |
| + shutil.move(tmpdir + '/webtreemap-gh-pages/webtreemap.js', opts.destdir) |
| + shutil.move(tmpdir + '/webtreemap-gh-pages/webtreemap.css', opts.destdir) |
| + finally: |
| + shutil.rmtree(tmpdir, ignore_errors=True) |
| +shutil.copy('tools/binary_size/template/index.html', opts.destdir) |
| +if opts.verbose: |
| + print 'Report saved to ' + opts.destdir + '/index.html' |