Chromium Code Reviews

Unified Diff: tools/binary_size/run_binary_size_analysis.py

Issue 119083006: Add tool to help analyze binary size (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Remove unnecessary threadsafety from Record.java Created 6 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Index: tools/binary_size/run_binary_size_analysis.py
diff --git a/tools/binary_size/run_binary_size_analysis.py b/tools/binary_size/run_binary_size_analysis.py
new file mode 100755
index 0000000000000000000000000000000000000000..95464b506133b8fee62bbfdaf5ab9d8799c29c22
--- /dev/null
+++ b/tools/binary_size/run_binary_size_analysis.py
@@ -0,0 +1,419 @@
+#!/usr/bin/python
bulach 2014/01/08 15:04:00 make sure this file has a chmod +x :) I got bitten
Andrew Hayden (chromium.org) 2014/01/08 21:04:10 Thank you for the reminder. I just ran: git update
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Generate a spatial analysis against an arbitrary library.
+
+To use, build the 'binary_size_java' target. Then run this tool, passing
+in the location of the library to be analyzed along with any other options
+you desire.
+"""
+
+import fileinput
+import optparse
+import os
+import pprint
+import re
+import shutil
+import subprocess
+import sys
+import tempfile
+import json
bulach 2014/01/08 15:04:00 nit: sort order
Andrew Hayden (chromium.org) 2014/01/08 21:04:10 Done.
+
+def format_bytes(bytes):
bulach 2014/01/08 15:04:00 chromium's python style guide is a bit different..
Andrew Hayden (chromium.org) 2014/01/08 21:04:10 I cobbled this together from an older Chromium-aut
+ """Pretty-print a number of bytes."""
+ if bytes > 1e6:
+ bytes = bytes / 1.0e6
+ return '%.1fm' % bytes
+ if bytes > 1e3:
+ bytes = bytes / 1.0e3
+ return '%.1fk' % bytes
+ return str(bytes)
+
+
+def symbol_type_to_human(type):
+ """Convert a symbol type as printed by nm into a human-readable name."""
+ return {
+ 'b': 'bss',
+ 'd': 'data',
+ 'r': 'read-only data',
+ 't': 'code',
+ 'w': 'weak symbol',
+ 'v': 'weak symbol'
+ }[type]
+
+
+def parse_nm(input):
+ """Parse nm output.
+
+ Argument: an iterable over lines of nm output.
+
+ Yields: (symbol name, symbol type, symbol size, source file path).
+ Path may be None if nm couldn't figure out the source file.
+ """
+
+ # Match lines with size, symbol, optional location, optional discriminator
+ sym_re = re.compile(r'^[0-9a-f]{8} ([0-9a-f]{8}) (.) ([^\t]+)(?:\t(.*):[\d\?]+)?.*$')
+ # Match lines with addr but no size.
+ addr_re = re.compile(r'^[0-9a-f]{8} (.) ([^\t]+)(?:\t.*)?$')
+ # Match lines that don't have an address at all -- typically external symbols.
+ noaddr_re = re.compile(r'^ {8} (.) (.*)$')
+
+ for line in input:
+ line = line.rstrip()
+ match = sym_re.match(line)
+ if match:
+ size, type, sym = match.groups()[0:3]
+ size = int(size, 16)
+ type = type.lower()
+ if type == 'v':
+ type = 'w' # just call them all weak
+ if type == 'b':
+ continue # skip all BSS for now
+ path = match.group(4)
+ yield sym, type, size, path
+ continue
+ match = addr_re.match(line)
+ if match:
+ type, sym = match.groups()[0:2]
+ # No size == we don't care.
+ continue
+ match = noaddr_re.match(line)
+ if match:
+ type, sym = match.groups()
+ if type in ('U', 'w'):
+ # external or weak symbol
+ continue
+
+ print >>sys.stderr, 'unparsed:', repr(line)
+
+
+def treeify_syms(symbols):
+ dirs = {}
+ for sym, type, size, path in symbols:
+ if path:
+ path = os.path.normpath(path)
+ if path.startswith('/'):
+ path = path[1:]
+
+ parts = None
+ # TODO: make segmenting by namespace work.
bulach 2014/01/08 15:04:00 nit: TODO(andrewhayden)
Andrew Hayden (chromium.org) 2014/01/08 21:04:10 Done.
+ if False and '::' in sym:
+ if sym.startswith('vtable for '):
+ sym = sym[len('vtable for '):]
+ parts = sym.split('::')
+ parts.append('[vtable]')
+ else:
+ parts = sym.split('::')
+ parts[0] = '::' + parts[0]
+ elif path and '/' in path:
bulach 2014/01/08 15:04:00 nit: is this test needed? I think split will work
Andrew Hayden (chromium.org) 2014/01/08 21:04:10 Done.
+ parts = path.split('/')
+ elif path:
+ parts = [path]
+
+ if parts:
+ key = parts.pop()
+ tree = dirs
+ try:
+ for part in parts:
+ assert part != ''
+ assert path
+ if part not in tree:
+ tree[part] = {}
+ tree = tree[part]
+ tree[key] = tree.get(key, 0) + size
+ except:
+ print >>sys.stderr, sym, parts, key
+ raise
+ else:
+ key = 'symbols without paths'
+ if key not in dirs:
+ dirs[key] = {}
+ tree = dirs[key]
+ subkey = 'misc'
+ if (sym.endswith('::__FUNCTION__') or
+ sym.endswith('::__PRETTY_FUNCTION__')):
+ subkey = '__FUNCTION__'
+ elif sym.startswith('CSWTCH.'):
+ subkey = 'CSWTCH'
+ elif '::' in sym:
+ subkey = sym[0:sym.find('::') + 2]
+ #else:
+ # print >>sys.stderr, 'unbucketed (no path?):', sym, type, size, path
+ tree[subkey] = tree.get(subkey, 0) + size
+ return dirs
+
+
+def jsonify_tree(tree, name):
+ children = []
+ total = 0
+ files = 0
+
+ for key, val in tree.iteritems():
+ if isinstance(val, dict):
+ subtree = jsonify_tree(val, key)
+ total += subtree['data']['$area']
+ children.append(subtree)
+ else:
+ total += val
+ children.append({
+ 'name': key + ' ' + format_bytes(val),
+ 'data': { '$area': val }
+ })
+
+ children.sort(key=lambda child: -child['data']['$area'])
+
+ return {
+ 'name': name + ' ' + format_bytes(total),
+ 'data': {
+ '$area': total,
+ },
+ 'children': children,
+ }
+
+
+def dump_nm(symbols, outfile):
+ dirs = treeify_syms(symbols)
+ out = sys.stdout
+ if outfile is not None:
+ out = open(outfile, 'w')
+ out.write('var kTree = ' + json.dumps(jsonify_tree(dirs, '/'), indent=2))
+ out.flush()
+ if outfile is not None:
+ out.close()
+
bulach 2014/01/08 15:04:00 nit: need another \n here.
Andrew Hayden (chromium.org) 2014/01/08 21:04:10 Done.
+def dump_largest_symbols(symbols, outfile, n):
+ # a list of (sym, type, size, path); sort by size.
+ symbols = sorted(symbols, key=lambda x: -x[2])
+ dumped = 0
+ out = sys.stdout
+ if outfile is not None:
+ out = open(outfile, 'w')
+ try:
+ out.write('var largestSymbols = [\n')
+ for sym, type, size, path in symbols:
+ if type in ('b', 'w'):
+ continue # skip bss and weak symbols
+ if path is None:
+ path = ''
+ out.write(' {\'size\': \'' + format_bytes(size) + '\','
+ '\'symbol\': \'' + sym + '\','
+ '\'type\': \'' + symbol_type_to_human(type) + '\','
+ '\'location\': \'' + path + '\'},\n')
bulach 2014/01/08 15:04:00 I think it'd be more readable as: entry = { 'size'
Andrew Hayden (chromium.org) 2014/01/08 21:04:10 Sorry, still adapting to python. Makes sense, fixe
+ dumped += 1
+ if dumped >= n:
+ return
+ finally:
+ out.write('];\n')
+ out.flush()
+ if outfile is not None:
bulach 2014/01/08 15:04:00 if it was None, the previous two lines would've fa
Andrew Hayden (chromium.org) 2014/01/08 21:04:10 No, outfile versus out[stream]. The code always go
+ out.close()
+
bulach 2014/01/08 15:04:00 nit: another \n here (two between top levels), so
Andrew Hayden (chromium.org) 2014/01/08 21:04:10 Done.
+def make_source_map(symbols):
+ sources = {}
+ for sym, type, size, path in symbols:
+ key = None
+ if path:
+ key = os.path.normpath(path)
+ else:
+ key = '[no path]'
+ if key not in sources:
+ sources[key] = {'path': path, 'symbol_count': 0, 'size': 0}
+ record = sources[key]
+ record['size'] += size
+ record['symbol_count'] += 1
+ return sources
+
+def dump_largest_sources(symbols, outfile, n):
+ map = make_source_map(symbols)
+ sources = sorted(map.values(), key=lambda x: -x['size'])
+ dumped = 0
+ out = sys.stdout
+ if outfile is not None:
+ out = open(outfile, 'w')
+ try:
+ out.write('var largestSources = [\n')
+ for record in sources:
+ out.write(' {\'size\': \'' + format_bytes(record['size']) + '\','
bulach 2014/01/08 15:04:00 ditto, using json would avoid the "quoting 'hell'
Andrew Hayden (chromium.org) 2014/01/08 21:04:10 Done.
+ '\'symbol_count\': \'' + str(record['symbol_count']) + '\','
+ '\'location\': \'' + record['path'] + '\'},\n')
+ dumped += 1
+ if dumped >= n:
+ return
+ finally:
+ out.write('];\n')
+ out.flush()
+ if outfile is not None:
+ out.close()
+
+
+def run_pa2l(outfile, library, arch, threads, verbose=False):
bulach 2014/01/08 15:04:00 nit: only called on one place, can remove the defa
Andrew Hayden (chromium.org) 2014/01/08 21:04:10 Done.
+ """Run a parallel addr2line processing engine to dump and resolve symbols"""
+ out_dir = os.getenv('CHROMIUM_OUT_DIR', 'out')
+ buildtype = os.getenv('BUILDTYPE', 'Release')
+ classpath = out_dir + '/' + buildtype + '/lib.java/binary_size_java.jar'
bulach 2014/01/08 15:04:00 nit: classpath = os.path.join(out_dir, build_type
Andrew Hayden (chromium.org) 2014/01/08 21:04:10 Done.
+ cmd = ['java',
+ '-classpath', classpath,
+ 'org.chromium.tools.binary_size.ParallelAddress2Line',
+ '--disambiguate',
+ '--outfile', outfile,
+ '--library', library,
+ '--threads', threads]
+ if verbose is True:
+ cmd.append('--verbose')
+ if arch == 'android-arm':
+ cmd.extend([
+ '--nm', 'third_party/android_tools/ndk/toolchains/arm-linux-androideabi-4.7/prebuilt/linux-x86_64/bin/arm-linux-androideabi-nm',
bulach 2014/01/08 15:04:00 nit: it has to be <80cols. since it's already unde
Andrew Hayden (chromium.org) 2014/01/08 21:04:10 I've cleaned this up a bit and now use os.path.joi
+ '--addr2line', 'third_party/android_tools/ndk/toolchains/arm-linux-androideabi-4.7/prebuilt/linux-x86_64/bin/arm-linux-androideabi-addr2line',
+ ])
+ elif arch == 'android-mips':
+ cmd.extend([
+ '--nm', 'third_party/android_tools/ndk/toolchains/mipsel-linux-android-4.7/prebuilt/linux-x86_64/bin/mipsel-linux-android-nm',
+ '--addr2line', 'third_party/android_tools/ndk/toolchains/mipsel-linux-android-4.7/prebuilt/linux-x86_64/bin/mipsel-linux-android-addr2line',
+ ])
+ elif arch == 'android-x86':
+ cmd.extend([
+ '--nm', 'third_party/android_tools/ndk/toolchains/x86-4.7/prebuilt/linux-x86_64/bin/i686-linux-android-nm'
+ '--addr2line', 'third_party/android_tools/ndk/toolchains/x86-4.7/prebuilt/linux-x86_64/bin/i686-linux-android-addr2line',
+ ])
+ # else, use whatever is in PATH (don't pass --nm or --addr2line)
+
+ if verbose:
+ print cmd
+
+ return_code = subprocess.call(cmd)
+ if return_code:
+ raise RuntimeError('Failed to run ParallelAddress2Line: returned ' + str(return_code))
+
+usage="""%prog [options]
bulach 2014/01/08 15:04:00 this whole block has to be under def main(): ...
Andrew Hayden (chromium.org) 2014/01/08 21:04:10 Done.
+
+Runs a spatial analysis on a given library, looking up the source locations of
+its symbols and calculating how much space each directory, source file, and so
+on is taking. The result is a report that can be used to pinpoint sources of
+large portions of the binary, etceteras.
+
+Under normal circumstances, you only need to pass two arguments, thusly:
+
+ %prog --library /path/to/library --destdir /path/to/output
+
+In this mode, the program will dump the symbols from the specified library and
+map those symbols back to source locations, producing a web-based report in the
+specified output directory.
+
+Other options are available via '--help'.
+"""
+parser = optparse.OptionParser(usage=usage)
+parser.add_option('--nm-in', dest='nm_in', metavar='PATH',
+ help='if specified, use nm input from <path> instead of '
+ 'generating it. Note that source locations should be present '
+ 'in the file; i.e., no addr2line symbol lookups will be '
+ 'performed when this option is specified. Mutually exclusive '
+ 'with --library.')
+parser.add_option('--destdir', metavar='PATH',
+ help='write output to the specified directory. An HTML '
+ 'report is generated here along with supporting files; any '
+ 'existing report will be overwritten.')
+parser.add_option('--library', metavar='PATH',
+ help='if specified, process symbols in the library at the '
+ 'specified path. Mutually exclusive with --nm-in.')
+parser.add_option('--arch',
+ help='the architecture that the library is targeted to. '
+ 'Currently supports the following: '
+ 'host-native, android-arm, android-mips, android-x86.'
+ 'the default is host-native. This determines '
+ 'what nm/addr2line binaries are used. When host-native is '
+ 'chosen (the default), the program will use whichever '
+ 'nm/addr2line binaries are on the PATH. This is appropriate '
+ 'when you are analyzing a binary by and for your computer. '
+ 'This argument is only valid when using --library.')
+parser.add_option('--pa2l-threads', dest='threads',
+ help='number of threads to use for the parallel addr2line '
+ 'processing pool; defaults to 1. More threads greatly '
+ 'improve throughput but eat RAM like popcorn, and take '
+ 'several gigabytes each. Start low and ramp this number up '
+ 'until your machine begins to struggle with RAM.'
+ 'This argument is only valid when using --library.')
+parser.add_option('-v', dest='verbose', action='store_true',
+ help='be verbose, printing lots of status information.')
+parser.add_option('--nm-out', dest='nm_out',
+ help='keep the nm output file, and store it at the specified '
+ 'path. This is useful if you want to see the fully processed '
+ 'nm output after the symbols have been mapped to source '
+ 'locations. By default, a tempfile is used and is deleted '
+ 'when the program terminates.'
+ 'This argument is only valid when using --library.')
+opts, args = parser.parse_args()
+
+if ((not opts.library) and (not opts.nm_in)) or (opts.library and opts.nm_in):
+ parser.error('exactly one of --library or --nm-in is required')
+if (opts.nm_in):
+ if opts.threads:
+ print >> sys.stderr, ('WARNING: --pa2l-threads has no effect '
+ 'when used with --nm-in')
+ if opts.arch:
+ print >> sys.stderr, ('WARNING: --arch has no effect '
+ 'when used with --nm-in')
+if not opts.destdir:
+ parser.error('--destdir is required argument')
+if not opts.threads:
+ opts.threads = 1
+if not opts.arch:
+ opts.arch = 'host-native'
+
+if opts.arch not in ['host-native', 'android-arm',
+ 'android-mips', 'android-x86']:
+ parser.error('arch must be one of '
+ '[host-native,android-arm,android-mips,android-x86]')
+
+nm_in = opts.nm_in
bulach 2014/01/08 15:04:00 369-393 would be better as: symbols = GetNMSymbol
Andrew Hayden (chromium.org) 2014/01/08 21:04:10 Oh yes, I had been meaning to extract this! And mo
+temp_file = None
+if nm_in is None:
+ if opts.nm_out is None:
+ temp_file = tempfile.NamedTemporaryFile(prefix='binary_size_nm', delete=False)
+ nm_in = temp_file.name
+ else:
+ nm_in = opts.nm_out
+
+ if opts.verbose:
+ print 'Running parallel addr2line, dumping symbols to ' + nm_in;
+ run_pa2l(outfile=nm_in,
+ library=opts.library,
+ arch=opts.arch,
+ threads=opts.threads,
+ verbose=(opts.verbose is True))
+elif opts.verbose:
+ print 'Using nm input from ' + nm_in
+
+if not os.path.exists(opts.destdir):
+ os.makedirs(opts.destdir, 0755)
+
+nmfile = open(nm_in, 'r')
bulach 2014/01/08 15:04:00 nit: with file(nm_in, 'r') as nm_file: symbols
Andrew Hayden (chromium.org) 2014/01/08 21:04:10 Careful. You are in danger of making me into a hal
+symbols = list(parse_nm(nmfile))
+nmfile.close()
+
+dump_nm(symbols, opts.destdir + '/treemap-dump.js')
+dump_largest_symbols(symbols, opts.destdir + '/largest-symbols.js', 100)
bulach 2014/01/08 15:04:00 os.path.join in these three places..
Andrew Hayden (chromium.org) 2014/01/08 21:04:10 Done.
+dump_largest_sources(symbols, opts.destdir + '/largest-sources.js', 100)
+
+if not os.path.exists(opts.destdir + '/webtreemap.js'):
+ url = 'https://github.com/martine/webtreemap/archive/gh-pages.zip'
bulach 2014/01/08 15:04:00 please, get third-party reviewers approval.. also
Andrew Hayden (chromium.org) 2014/01/08 21:04:10 Will add TODO for the latter part and will email t
+ tmpdir = tempfile.mkdtemp('binary_size')
+ try:
+ cmd = ['wget', '-O', tmpdir + '/webtreemap.zip', url]
+ return_code = subprocess.call(cmd)
+ if return_code:
+ raise RuntimeError('Failed to download: returned ' + str(return_code))
+ cmd = ['unzip', '-o', tmpdir + '/webtreemap.zip', '-d', tmpdir]
+ return_code = subprocess.call(cmd)
+ if return_code:
+ raise RuntimeError('Failed to unzip: returned ' + str(return_code))
+
+ shutil.move(tmpdir + '/webtreemap-gh-pages/COPYING', opts.destdir)
+ shutil.move(tmpdir + '/webtreemap-gh-pages/webtreemap.js', opts.destdir)
+ shutil.move(tmpdir + '/webtreemap-gh-pages/webtreemap.css', opts.destdir)
+ finally:
+ shutil.rmtree(tmpdir, ignore_errors=True)
+shutil.copy('tools/binary_size/template/index.html', opts.destdir)
+if opts.verbose:
+ print 'Report saved to ' + opts.destdir + '/index.html'

Powered by Google App Engine