| Index: tools/binary_size/run_binary_size_analysis.py
|
| diff --git a/tools/binary_size/run_binary_size_analysis.py b/tools/binary_size/run_binary_size_analysis.py
|
| new file mode 100755
|
| index 0000000000000000000000000000000000000000..f2ba4dc5955f79ba4614d4e1bd5f20da103b66cb
|
| --- /dev/null
|
| +++ b/tools/binary_size/run_binary_size_analysis.py
|
| @@ -0,0 +1,491 @@
|
| +#!/usr/bin/python
|
| +# Copyright 2014 The Chromium Authors. All rights reserved.
|
| +# Use of this source code is governed by a BSD-style license that can be
|
| +# found in the LICENSE file.
|
| +
|
| +"""Generate a spatial analysis against an arbitrary library.
|
| +
|
| +To use, build the 'binary_size_tool' target. Then run this tool, passing
|
| +in the location of the library to be analyzed along with any other options
|
| +you desire.
|
| +"""
|
| +
|
| +import collections
|
| +import fileinput
|
| +import json
|
| +import optparse
|
| +import os
|
| +import pprint
|
| +import re
|
| +import shutil
|
| +import subprocess
|
| +import sys
|
| +import tempfile
|
| +
|
| +
|
| +def FormatBytes(bytes):
|
| + """Pretty-print a number of bytes."""
|
| + if bytes > 1e6:
|
| + bytes = bytes / 1.0e6
|
| + return '%.1fm' % bytes
|
| + if bytes > 1e3:
|
| + bytes = bytes / 1.0e3
|
| + return '%.1fk' % bytes
|
| + return str(bytes)
|
| +
|
| +
|
| +def SymbolTypeToHuman(type):
|
| + """Convert a symbol type as printed by nm into a human-readable name."""
|
| + return {'b': 'bss',
|
| + 'd': 'data',
|
| + 'r': 'read-only data',
|
| + 't': 'code',
|
| + 'w': 'weak symbol',
|
| + 'v': 'weak symbol'}[type]
|
| +
|
| +
|
| +def ParseNm(input):
|
| + """Parse nm output.
|
| +
|
| + Argument: an iterable over lines of nm output.
|
| +
|
| + Yields: (symbol name, symbol type, symbol size, source file path).
|
| + Path may be None if nm couldn't figure out the source file.
|
| + """
|
| +
|
| + # Match lines with size, symbol, optional location, optional discriminator
|
| + sym_re = re.compile(r'^[0-9a-f]{8} ' # address (8 hex digits)
|
| + '([0-9a-f]{8}) ' # size (8 hex digits)
|
| + '(.) ' # symbol type, one character
|
| + '([^\t]+)' # symbol name, separated from next by tab
|
| + '(?:\t(.*):[\d\?]+)?.*$') # location
|
| + # Match lines with addr but no size.
|
| + addr_re = re.compile(r'^[0-9a-f]{8} (.) ([^\t]+)(?:\t.*)?$')
|
| + # Match lines that don't have an address at all -- typically external symbols.
|
| + noaddr_re = re.compile(r'^ {8} (.) (.*)$')
|
| +
|
| + for line in input:
|
| + line = line.rstrip()
|
| + match = sym_re.match(line)
|
| + if match:
|
| + size, type, sym = match.groups()[0:3]
|
| + size = int(size, 16)
|
| + type = type.lower()
|
| + if type == 'v':
|
| + type = 'w' # just call them all weak
|
| + if type == 'b':
|
| + continue # skip all BSS for now
|
| + path = match.group(4)
|
| + yield sym, type, size, path
|
| + continue
|
| + match = addr_re.match(line)
|
| + if match:
|
| + type, sym = match.groups()[0:2]
|
| + # No size == we don't care.
|
| + continue
|
| + match = noaddr_re.match(line)
|
| + if match:
|
| + type, sym = match.groups()
|
| + if type in ('U', 'w'):
|
| + # external or weak symbol
|
| + continue
|
| +
|
| + print >>sys.stderr, 'unparsed:', repr(line)
|
| +
|
| +
|
| +def TreeifySymbols(symbols):
|
| + """Convert symbols into a path-based tree, calculating size information
|
| + along the way.
|
| +
|
| + The result is a dictionary that contains two kinds of nodes:
|
| + 1. Leaf nodes, representing source code locations (e.g., c++ files)
|
| + These nodes have the following dictionary entries:
|
| + sizes: a dictionary whose keys are categories (such as code, data,
|
| + vtable, etceteras) and whose values are the size, in bytes, of
|
| + those categories;
|
| + size: the total size, in bytes, of all the entries in the sizes dict
|
| + 2. Non-leaf nodes, representing directories
|
| + These nodes have the following dictionary entries:
|
| + children: a dictionary whose keys are names (path entries; either
|
| + directory or file names) and whose values are other nodes;
|
| + size: the total size, in bytes, of all the leaf nodes that are
|
| + contained within the children dict (recursively expanded)
|
| +
|
| + The result object is itself a dictionary that represents the common ancestor
|
| + of all child nodes, e.g. a path to which all other nodes beneath it are
|
| + relative. The 'size' attribute of this dict yields the sum of the size of all
|
| + leaf nodes within the data structure.
|
| + """
|
| + dirs = {'children': {}, 'size': 0}
|
| + for sym, type, size, path in symbols:
|
| + dirs['size'] += size
|
| + if path:
|
| + path = os.path.normpath(path)
|
| + if path.startswith('/'):
|
| + path = path[1:]
|
| +
|
| + parts = None
|
| + if path:
|
| + parts = path.split('/')
|
| +
|
| + if parts:
|
| + assert path
|
| + file_key = parts.pop()
|
| + tree = dirs
|
| + try:
|
| + # Traverse the tree to the parent of the file node, creating as needed
|
| + for part in parts:
|
| + assert part != ''
|
| + if part not in tree['children']:
|
| + tree['children'][part] = {'children': {}, 'size': 0}
|
| + tree = tree['children'][part]
|
| + tree['size'] += size
|
| +
|
| + # Get (creating if necessary) the node for the file
|
| + # This node doesn't have a 'children' attribute
|
| + if file_key not in tree['children']:
|
| + tree['children'][file_key] = {'sizes': collections.defaultdict(int),
|
| + 'size': 0}
|
| + tree = tree['children'][file_key]
|
| + tree['size'] += size
|
| +
|
| + # Accumulate size into a bucket within the file
|
| + type = type.lower()
|
| + if 'vtable for ' in sym:
|
| + tree['sizes']['[vtable]'] += size
|
| + elif 'r' == type:
|
| + tree['sizes']['[rodata]'] += size
|
| + elif 'd' == type:
|
| + tree['sizes']['[data]'] += size
|
| + elif 'b' == type:
|
| + tree['sizes']['[bss]'] += size
|
| + elif 't' == type:
|
| + # 'text' in binary parlance means 'code'.
|
| + tree['sizes']['[code]'] += size
|
| + elif 'w' == type:
|
| + tree['sizes']['[weak]'] += size
|
| + else:
|
| + tree['sizes']['[other]'] += size
|
| + except:
|
| + print >>sys.stderr, sym, parts, key
|
| + raise
|
| + else:
|
| + key = 'symbols without paths'
|
| + if key not in dirs['children']:
|
| + dirs['children'][key] = {'sizes': collections.defaultdict(int),
|
| + 'size': 0}
|
| + tree = dirs['children'][key]
|
| + subkey = 'misc'
|
| + if (sym.endswith('::__FUNCTION__') or
|
| + sym.endswith('::__PRETTY_FUNCTION__')):
|
| + subkey = '__FUNCTION__'
|
| + elif sym.startswith('CSWTCH.'):
|
| + subkey = 'CSWTCH'
|
| + elif '::' in sym:
|
| + subkey = sym[0:sym.find('::') + 2]
|
| + tree['sizes'][subkey] = tree['sizes'].get(subkey, 0) + size
|
| + tree['size'] += size
|
| + return dirs
|
| +
|
| +
|
| +def JsonifyTree(tree, name):
|
| + """Convert TreeifySymbols output to a JSON treemap.
|
| +
|
| + The format is very similar, with the notable exceptions being
|
| + lists of children instead of maps and some different attribute names."""
|
| + children = []
|
| + css_class_map = {
|
| + '[vtable]': 'vtable',
|
| + '[rodata]': 'read-only_data',
|
| + '[data]': 'data',
|
| + '[bss]': 'bss',
|
| + '[code]': 'code',
|
| + '[weak]': 'weak_symbol'
|
| + }
|
| + if 'children' in tree:
|
| + # Non-leaf node. Recurse.
|
| + for child_name, child in tree['children'].iteritems():
|
| + children.append(JsonifyTree(child, child_name))
|
| + else:
|
| + # Leaf node; dump per-file stats as entries in the treemap
|
| + for kind, size in tree['sizes'].iteritems():
|
| + child_json = {'name': kind + ' (' + FormatBytes(size) + ')',
|
| + 'data': { '$area': size }}
|
| + css_class = css_class_map.get(kind)
|
| + if css_class is not None: child_json['data']['$symbol'] = css_class
|
| + children.append(child_json)
|
| + # Sort children by size, largest to smallest.
|
| + children.sort(key=lambda child: -child['data']['$area'])
|
| +
|
| + # For leaf nodes, the 'size' attribute is the size of the leaf;
|
| + # Non-leaf nodes don't really have a size, but their 'size' attribute is
|
| + # the sum of the sizes of all their children.
|
| + return {'name': name + ' (' + FormatBytes(tree['size']) + ')',
|
| + 'data': { '$area': tree['size'] },
|
| + 'children': children }
|
| +
|
| +
|
| +def DumpTreemap(symbols, outfile):
|
| + dirs = TreeifySymbols(symbols)
|
| + out = open(outfile, 'w')
|
| + try:
|
| + out.write('var kTree = ' + json.dumps(JsonifyTree(dirs, '/')))
|
| + finally:
|
| + out.flush()
|
| + out.close()
|
| +
|
| +
|
| +def DumpLargestSymbols(symbols, outfile, n):
|
| + # a list of (sym, type, size, path); sort by size.
|
| + symbols = sorted(symbols, key=lambda x: -x[2])
|
| + dumped = 0
|
| + out = open(outfile, 'w')
|
| + try:
|
| + out.write('var largestSymbols = [\n')
|
| + for sym, type, size, path in symbols:
|
| + if type in ('b', 'w'):
|
| + continue # skip bss and weak symbols
|
| + if path is None:
|
| + path = ''
|
| + entry = {'size': FormatBytes(size),
|
| + 'symbol': sym,
|
| + 'type': SymbolTypeToHuman(type),
|
| + 'location': path }
|
| + out.write(json.dumps(entry))
|
| + out.write(',\n')
|
| + dumped += 1
|
| + if dumped >= n:
|
| + return
|
| + finally:
|
| + out.write('];\n')
|
| + out.flush()
|
| + out.close()
|
| +
|
| +
|
| +def MakeSourceMap(symbols):
|
| + sources = {}
|
| + for sym, type, size, path in symbols:
|
| + key = None
|
| + if path:
|
| + key = os.path.normpath(path)
|
| + else:
|
| + key = '[no path]'
|
| + if key not in sources:
|
| + sources[key] = {'path': path, 'symbol_count': 0, 'size': 0}
|
| + record = sources[key]
|
| + record['size'] += size
|
| + record['symbol_count'] += 1
|
| + return sources
|
| +
|
| +
|
| +def DumpLargestSources(symbols, outfile, n):
|
| + map = MakeSourceMap(symbols)
|
| + sources = sorted(map.values(), key=lambda x: -x['size'])
|
| + dumped = 0
|
| + out = open(outfile, 'w')
|
| + try:
|
| + out.write('var largestSources = [\n')
|
| + for record in sources:
|
| + entry = {'size': FormatBytes(record['size']),
|
| + 'symbol_count': str(record['symbol_count']),
|
| + 'location': record['path']}
|
| + out.write(json.dumps(entry))
|
| + out.write(',\n')
|
| + dumped += 1
|
| + if dumped >= n:
|
| + return
|
| + finally:
|
| + out.write('];\n')
|
| + out.flush()
|
| + out.close()
|
| +
|
| +
|
| +def DumpLargestVTables(symbols, outfile, n):
|
| + vtables = []
|
| + for symbol, type, size, path in symbols:
|
| + if 'vtable for ' in symbol:
|
| + vtables.append({'symbol': symbol, 'path': path, 'size': size})
|
| + vtables = sorted(vtables, key=lambda x: -x['size'])
|
| + dumped = 0
|
| + out = open(outfile, 'w')
|
| + try:
|
| + out.write('var largestVTables = [\n')
|
| + for record in vtables:
|
| + entry = {'size': FormatBytes(record['size']),
|
| + 'symbol': record['symbol'],
|
| + 'location': record['path']}
|
| + out.write(json.dumps(entry))
|
| + out.write(',\n')
|
| + dumped += 1
|
| + if dumped >= n:
|
| + return
|
| + finally:
|
| + out.write('];\n')
|
| + out.flush()
|
| + out.close()
|
| +
|
| +
|
| +def RunParallelAddress2Line(outfile, library, arch, jobs, verbose):
|
| + """Run a parallel addr2line processing engine to dump and resolve symbols."""
|
| + out_dir = os.getenv('CHROMIUM_OUT_DIR', 'out')
|
| + build_type = os.getenv('BUILDTYPE', 'Release')
|
| + classpath = os.path.join(out_dir, build_type, 'lib.java',
|
| + 'binary_size_java.jar')
|
| + cmd = ['java',
|
| + '-classpath', classpath,
|
| + 'org.chromium.tools.binary_size.ParallelAddress2Line',
|
| + '--disambiguate',
|
| + '--outfile', outfile,
|
| + '--library', library,
|
| + '--threads', jobs]
|
| + if verbose is True:
|
| + cmd.append('--verbose')
|
| + prefix = os.path.join('third_party', 'android_tools', 'ndk', 'toolchains')
|
| + if arch == 'android-arm':
|
| + prefix = os.path.join(prefix, 'arm-linux-androideabi-4.7', 'prebuilt',
|
| + 'linux-x86_64', 'bin', 'arm-linux-androideabi-')
|
| + cmd.extend(['--nm', prefix + 'nm', '--addr2line', prefix + 'addr2line'])
|
| + elif arch == 'android-mips':
|
| + prefix = os.path.join(prefix, 'mipsel-linux-android-4.7', 'prebuilt',
|
| + 'linux-x86_64', 'bin', 'mipsel-linux-android-')
|
| + cmd.extend(['--nm', prefix + 'nm', '--addr2line', prefix + 'addr2line'])
|
| + elif arch == 'android-x86':
|
| + prefix = os.path.join(prefix, 'x86-4.7', 'prebuilt',
|
| + 'linux-x86_64', 'bin', 'i686-linux-android-')
|
| + cmd.extend(['--nm', prefix + 'nm', '--addr2line', prefix + 'addr2line'])
|
| + # else, use whatever is in PATH (don't pass --nm or --addr2line)
|
| +
|
| + if verbose:
|
| + print cmd
|
| +
|
| + return_code = subprocess.call(cmd)
|
| + if return_code:
|
| + raise RuntimeError('Failed to run ParallelAddress2Line: returned ' +
|
| + str(return_code))
|
| +
|
| +
|
| +def GetNmSymbols(infile, outfile, library, arch, jobs, verbose):
|
| + if infile is None:
|
| + if outfile is None:
|
| + infile = tempfile.NamedTemporaryFile(delete=False).name
|
| + else:
|
| + infile = outfile
|
| +
|
| + if verbose:
|
| + print 'Running parallel addr2line, dumping symbols to ' + infile;
|
| + RunParallelAddress2Line(outfile=infile, library=library, arch=arch,
|
| + jobs=jobs, verbose=verbose)
|
| + elif verbose:
|
| + print 'Using nm input from ' + infile
|
| + with file(infile, 'r') as infile:
|
| + return list(ParseNm(infile))
|
| +
|
| +
|
| +def main():
|
| + usage="""%prog [options]
|
| +
|
| + Runs a spatial analysis on a given library, looking up the source locations
|
| + of its symbols and calculating how much space each directory, source file,
|
| + and so on is taking. The result is a report that can be used to pinpoint
|
| + sources of large portions of the binary, etceteras.
|
| +
|
| + Under normal circumstances, you only need to pass two arguments, thusly:
|
| +
|
| + %prog --library /path/to/library --destdir /path/to/output
|
| +
|
| + In this mode, the program will dump the symbols from the specified library
|
| + and map those symbols back to source locations, producing a web-based
|
| + report in the specified output directory.
|
| +
|
| + Other options are available via '--help'.
|
| + """
|
| + parser = optparse.OptionParser(usage=usage)
|
| + parser.add_option('--nm-in', metavar='PATH',
|
| + help='if specified, use nm input from <path> instead of '
|
| + 'generating it. Note that source locations should be '
|
| + 'present in the file; i.e., no addr2line symbol lookups '
|
| + 'will be performed when this option is specified. '
|
| + 'Mutually exclusive with --library.')
|
| + parser.add_option('--destdir', metavar='PATH',
|
| + help='write output to the specified directory. An HTML '
|
| + 'report is generated here along with supporting files; '
|
| + 'any existing report will be overwritten.')
|
| + parser.add_option('--library', metavar='PATH',
|
| + help='if specified, process symbols in the library at '
|
| + 'the specified path. Mutually exclusive with --nm-in.')
|
| + parser.add_option('--arch',
|
| + help='the architecture that the library is targeted to. '
|
| + 'Determines which nm/addr2line binaries are used. When '
|
| + '\'host-native\' is chosen, the program will use whichever '
|
| + 'nm/addr2line binaries are on the PATH. This is '
|
| + 'appropriate when you are analyzing a binary by and for '
|
| + 'your computer. '
|
| + 'This argument is only valid when using --library. '
|
| + 'Default is \'host-native\'.',
|
| + choices=['host-native', 'android-arm',
|
| + 'android-mips', 'android-x86'],)
|
| + parser.add_option('--jobs',
|
| + help='number of jobs to use for the parallel '
|
| + 'addr2line processing pool; defaults to 1. More '
|
| + 'jobs greatly improve throughput but eat RAM like '
|
| + 'popcorn, and take several gigabytes each. Start low '
|
| + 'and ramp this number up until your machine begins to '
|
| + 'struggle with RAM. '
|
| + 'This argument is only valid when using --library.')
|
| + parser.add_option('-v', dest='verbose', action='store_true',
|
| + help='be verbose, printing lots of status information.')
|
| + parser.add_option('--nm-out', metavar='PATH',
|
| + help='keep the nm output file, and store it at the '
|
| + 'specified path. This is useful if you want to see the '
|
| + 'fully processed nm output after the symbols have been '
|
| + 'mapped to source locations. By default, a tempfile is '
|
| + 'used and is deleted when the program terminates.'
|
| + 'This argument is only valid when using --library.')
|
| + opts, args = parser.parse_args()
|
| +
|
| + if ((not opts.library) and (not opts.nm_in)) or (opts.library and opts.nm_in):
|
| + parser.error('exactly one of --library or --nm-in is required')
|
| + if (opts.nm_in):
|
| + if opts.jobs:
|
| + print >> sys.stderr, ('WARNING: --jobs has no effect '
|
| + 'when used with --nm-in')
|
| + if opts.arch:
|
| + print >> sys.stderr, ('WARNING: --arch has no effect '
|
| + 'when used with --nm-in')
|
| + if not opts.destdir:
|
| + parser.error('--destdir is required argument')
|
| + if not opts.jobs:
|
| + opts.jobs = '1'
|
| + if not opts.arch:
|
| + opts.arch = 'host-native'
|
| +
|
| + symbols = GetNmSymbols(opts.nm_in, opts.nm_out, opts.library, opts.arch,
|
| + opts.jobs, opts.verbose is True)
|
| + if not os.path.exists(opts.destdir):
|
| + os.makedirs(opts.destdir, 0755)
|
| +
|
| + DumpTreemap(symbols, os.path.join(opts.destdir, 'treemap-dump.js'))
|
| + DumpLargestSymbols(symbols,
|
| + os.path.join(opts.destdir, 'largest-symbols.js'), 100)
|
| + DumpLargestSources(symbols,
|
| + os.path.join(opts.destdir, 'largest-sources.js'), 100)
|
| + DumpLargestVTables(symbols,
|
| + os.path.join(opts.destdir, 'largest-vtables.js'), 100)
|
| +
|
| + # TODO(andrewhayden): Switch to D3 for greater flexibility
|
| + treemap_out = os.path.join(opts.destdir, 'webtreemap')
|
| + if not os.path.exists(treemap_out):
|
| + os.makedirs(treemap_out, 0755)
|
| + treemap_src = os.path.join('third_party', 'webtreemap', 'src',
|
| + 'webtreemap-gh-pages')
|
| + shutil.copy(os.path.join(treemap_src, 'COPYING'), treemap_out)
|
| + shutil.copy(os.path.join(treemap_src, 'webtreemap.js'), treemap_out)
|
| + shutil.copy(os.path.join(treemap_src, 'webtreemap.css'), treemap_out)
|
| + shutil.copy(os.path.join('tools', 'binary_size', 'template', 'index.html'),
|
| + opts.destdir)
|
| + if opts.verbose:
|
| + print 'Report saved to ' + opts.destdir + '/index.html'
|
| +
|
| +
|
| +if __name__ == '__main__':
|
| + sys.exit(main())
|
|
|