Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(275)

Unified Diff: tools/binary_size/run_binary_size_analysis.py

Issue 119083006: Add tool to help analyze binary size (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Rebase onto master for a clean commit of gyp change Created 6 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: tools/binary_size/run_binary_size_analysis.py
diff --git a/tools/binary_size/run_binary_size_analysis.py b/tools/binary_size/run_binary_size_analysis.py
new file mode 100755
index 0000000000000000000000000000000000000000..f2ba4dc5955f79ba4614d4e1bd5f20da103b66cb
--- /dev/null
+++ b/tools/binary_size/run_binary_size_analysis.py
@@ -0,0 +1,491 @@
+#!/usr/bin/python
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Generate a spatial analysis against an arbitrary library.
+
+To use, build the 'binary_size_tool' target. Then run this tool, passing
+in the location of the library to be analyzed along with any other options
+you desire.
+"""
+
+import collections
+import fileinput
+import json
+import optparse
+import os
+import pprint
+import re
+import shutil
+import subprocess
+import sys
+import tempfile
+
+
+def FormatBytes(bytes):
+ """Pretty-print a number of bytes."""
+ if bytes > 1e6:
+ bytes = bytes / 1.0e6
+ return '%.1fm' % bytes
+ if bytes > 1e3:
+ bytes = bytes / 1.0e3
+ return '%.1fk' % bytes
+ return str(bytes)
+
+
+def SymbolTypeToHuman(type):
+ """Convert a symbol type as printed by nm into a human-readable name."""
+ return {'b': 'bss',
+ 'd': 'data',
+ 'r': 'read-only data',
+ 't': 'code',
+ 'w': 'weak symbol',
+ 'v': 'weak symbol'}[type]
+
+
+def ParseNm(input):
+ """Parse nm output.
+
+ Argument: an iterable over lines of nm output.
+
+ Yields: (symbol name, symbol type, symbol size, source file path).
+ Path may be None if nm couldn't figure out the source file.
+ """
+
+ # Match lines with size, symbol, optional location, optional discriminator
+ sym_re = re.compile(r'^[0-9a-f]{8} ' # address (8 hex digits)
+ '([0-9a-f]{8}) ' # size (8 hex digits)
+ '(.) ' # symbol type, one character
+ '([^\t]+)' # symbol name, separated from next by tab
+ '(?:\t(.*):[\d\?]+)?.*$') # location
+ # Match lines with addr but no size.
+ addr_re = re.compile(r'^[0-9a-f]{8} (.) ([^\t]+)(?:\t.*)?$')
+ # Match lines that don't have an address at all -- typically external symbols.
+ noaddr_re = re.compile(r'^ {8} (.) (.*)$')
+
+ for line in input:
+ line = line.rstrip()
+ match = sym_re.match(line)
+ if match:
+ size, type, sym = match.groups()[0:3]
+ size = int(size, 16)
+ type = type.lower()
+ if type == 'v':
+ type = 'w' # just call them all weak
+ if type == 'b':
+ continue # skip all BSS for now
+ path = match.group(4)
+ yield sym, type, size, path
+ continue
+ match = addr_re.match(line)
+ if match:
+ type, sym = match.groups()[0:2]
+ # No size == we don't care.
+ continue
+ match = noaddr_re.match(line)
+ if match:
+ type, sym = match.groups()
+ if type in ('U', 'w'):
+ # external or weak symbol
+ continue
+
+ print >>sys.stderr, 'unparsed:', repr(line)
+
+
+def TreeifySymbols(symbols):
+ """Convert symbols into a path-based tree, calculating size information
+ along the way.
+
+ The result is a dictionary that contains two kinds of nodes:
+ 1. Leaf nodes, representing source code locations (e.g., c++ files)
+ These nodes have the following dictionary entries:
+ sizes: a dictionary whose keys are categories (such as code, data,
+ vtable, etceteras) and whose values are the size, in bytes, of
+ those categories;
+ size: the total size, in bytes, of all the entries in the sizes dict
+ 2. Non-leaf nodes, representing directories
+ These nodes have the following dictionary entries:
+ children: a dictionary whose keys are names (path entries; either
+ directory or file names) and whose values are other nodes;
+ size: the total size, in bytes, of all the leaf nodes that are
+ contained within the children dict (recursively expanded)
+
+ The result object is itself a dictionary that represents the common ancestor
+ of all child nodes, e.g. a path to which all other nodes beneath it are
+ relative. The 'size' attribute of this dict yields the sum of the size of all
+ leaf nodes within the data structure.
+ """
+ dirs = {'children': {}, 'size': 0}
+ for sym, type, size, path in symbols:
+ dirs['size'] += size
+ if path:
+ path = os.path.normpath(path)
+ if path.startswith('/'):
+ path = path[1:]
+
+ parts = None
+ if path:
+ parts = path.split('/')
+
+ if parts:
+ assert path
+ file_key = parts.pop()
+ tree = dirs
+ try:
+ # Traverse the tree to the parent of the file node, creating as needed
+ for part in parts:
+ assert part != ''
+ if part not in tree['children']:
+ tree['children'][part] = {'children': {}, 'size': 0}
+ tree = tree['children'][part]
+ tree['size'] += size
+
+ # Get (creating if necessary) the node for the file
+ # This node doesn't have a 'children' attribute
+ if file_key not in tree['children']:
+ tree['children'][file_key] = {'sizes': collections.defaultdict(int),
+ 'size': 0}
+ tree = tree['children'][file_key]
+ tree['size'] += size
+
+ # Accumulate size into a bucket within the file
+ type = type.lower()
+ if 'vtable for ' in sym:
+ tree['sizes']['[vtable]'] += size
+ elif 'r' == type:
+ tree['sizes']['[rodata]'] += size
+ elif 'd' == type:
+ tree['sizes']['[data]'] += size
+ elif 'b' == type:
+ tree['sizes']['[bss]'] += size
+ elif 't' == type:
+ # 'text' in binary parlance means 'code'.
+ tree['sizes']['[code]'] += size
+ elif 'w' == type:
+ tree['sizes']['[weak]'] += size
+ else:
+ tree['sizes']['[other]'] += size
+ except:
+ print >>sys.stderr, sym, parts, key
+ raise
+ else:
+ key = 'symbols without paths'
+ if key not in dirs['children']:
+ dirs['children'][key] = {'sizes': collections.defaultdict(int),
+ 'size': 0}
+ tree = dirs['children'][key]
+ subkey = 'misc'
+ if (sym.endswith('::__FUNCTION__') or
+ sym.endswith('::__PRETTY_FUNCTION__')):
+ subkey = '__FUNCTION__'
+ elif sym.startswith('CSWTCH.'):
+ subkey = 'CSWTCH'
+ elif '::' in sym:
+ subkey = sym[0:sym.find('::') + 2]
+ tree['sizes'][subkey] = tree['sizes'].get(subkey, 0) + size
+ tree['size'] += size
+ return dirs
+
+
+def JsonifyTree(tree, name):
+ """Convert TreeifySymbols output to a JSON treemap.
+
+ The format is very similar, with the notable exceptions being
+ lists of children instead of maps and some different attribute names."""
+ children = []
+ css_class_map = {
+ '[vtable]': 'vtable',
+ '[rodata]': 'read-only_data',
+ '[data]': 'data',
+ '[bss]': 'bss',
+ '[code]': 'code',
+ '[weak]': 'weak_symbol'
+ }
+ if 'children' in tree:
+ # Non-leaf node. Recurse.
+ for child_name, child in tree['children'].iteritems():
+ children.append(JsonifyTree(child, child_name))
+ else:
+ # Leaf node; dump per-file stats as entries in the treemap
+ for kind, size in tree['sizes'].iteritems():
+ child_json = {'name': kind + ' (' + FormatBytes(size) + ')',
+ 'data': { '$area': size }}
+ css_class = css_class_map.get(kind)
+ if css_class is not None: child_json['data']['$symbol'] = css_class
+ children.append(child_json)
+ # Sort children by size, largest to smallest.
+ children.sort(key=lambda child: -child['data']['$area'])
+
+ # For leaf nodes, the 'size' attribute is the size of the leaf;
+ # Non-leaf nodes don't really have a size, but their 'size' attribute is
+ # the sum of the sizes of all their children.
+ return {'name': name + ' (' + FormatBytes(tree['size']) + ')',
+ 'data': { '$area': tree['size'] },
+ 'children': children }
+
+
+def DumpTreemap(symbols, outfile):
+ dirs = TreeifySymbols(symbols)
+ out = open(outfile, 'w')
+ try:
+ out.write('var kTree = ' + json.dumps(JsonifyTree(dirs, '/')))
+ finally:
+ out.flush()
+ out.close()
+
+
+def DumpLargestSymbols(symbols, outfile, n):
+ # a list of (sym, type, size, path); sort by size.
+ symbols = sorted(symbols, key=lambda x: -x[2])
+ dumped = 0
+ out = open(outfile, 'w')
+ try:
+ out.write('var largestSymbols = [\n')
+ for sym, type, size, path in symbols:
+ if type in ('b', 'w'):
+ continue # skip bss and weak symbols
+ if path is None:
+ path = ''
+ entry = {'size': FormatBytes(size),
+ 'symbol': sym,
+ 'type': SymbolTypeToHuman(type),
+ 'location': path }
+ out.write(json.dumps(entry))
+ out.write(',\n')
+ dumped += 1
+ if dumped >= n:
+ return
+ finally:
+ out.write('];\n')
+ out.flush()
+ out.close()
+
+
+def MakeSourceMap(symbols):
+ sources = {}
+ for sym, type, size, path in symbols:
+ key = None
+ if path:
+ key = os.path.normpath(path)
+ else:
+ key = '[no path]'
+ if key not in sources:
+ sources[key] = {'path': path, 'symbol_count': 0, 'size': 0}
+ record = sources[key]
+ record['size'] += size
+ record['symbol_count'] += 1
+ return sources
+
+
+def DumpLargestSources(symbols, outfile, n):
+ map = MakeSourceMap(symbols)
+ sources = sorted(map.values(), key=lambda x: -x['size'])
+ dumped = 0
+ out = open(outfile, 'w')
+ try:
+ out.write('var largestSources = [\n')
+ for record in sources:
+ entry = {'size': FormatBytes(record['size']),
+ 'symbol_count': str(record['symbol_count']),
+ 'location': record['path']}
+ out.write(json.dumps(entry))
+ out.write(',\n')
+ dumped += 1
+ if dumped >= n:
+ return
+ finally:
+ out.write('];\n')
+ out.flush()
+ out.close()
+
+
+def DumpLargestVTables(symbols, outfile, n):
+ vtables = []
+ for symbol, type, size, path in symbols:
+ if 'vtable for ' in symbol:
+ vtables.append({'symbol': symbol, 'path': path, 'size': size})
+ vtables = sorted(vtables, key=lambda x: -x['size'])
+ dumped = 0
+ out = open(outfile, 'w')
+ try:
+ out.write('var largestVTables = [\n')
+ for record in vtables:
+ entry = {'size': FormatBytes(record['size']),
+ 'symbol': record['symbol'],
+ 'location': record['path']}
+ out.write(json.dumps(entry))
+ out.write(',\n')
+ dumped += 1
+ if dumped >= n:
+ return
+ finally:
+ out.write('];\n')
+ out.flush()
+ out.close()
+
+
+def RunParallelAddress2Line(outfile, library, arch, jobs, verbose):
+ """Run a parallel addr2line processing engine to dump and resolve symbols."""
+ out_dir = os.getenv('CHROMIUM_OUT_DIR', 'out')
+ build_type = os.getenv('BUILDTYPE', 'Release')
+ classpath = os.path.join(out_dir, build_type, 'lib.java',
+ 'binary_size_java.jar')
+ cmd = ['java',
+ '-classpath', classpath,
+ 'org.chromium.tools.binary_size.ParallelAddress2Line',
+ '--disambiguate',
+ '--outfile', outfile,
+ '--library', library,
+ '--threads', jobs]
+ if verbose is True:
+ cmd.append('--verbose')
+ prefix = os.path.join('third_party', 'android_tools', 'ndk', 'toolchains')
+ if arch == 'android-arm':
+ prefix = os.path.join(prefix, 'arm-linux-androideabi-4.7', 'prebuilt',
+ 'linux-x86_64', 'bin', 'arm-linux-androideabi-')
+ cmd.extend(['--nm', prefix + 'nm', '--addr2line', prefix + 'addr2line'])
+ elif arch == 'android-mips':
+ prefix = os.path.join(prefix, 'mipsel-linux-android-4.7', 'prebuilt',
+ 'linux-x86_64', 'bin', 'mipsel-linux-android-')
+ cmd.extend(['--nm', prefix + 'nm', '--addr2line', prefix + 'addr2line'])
+ elif arch == 'android-x86':
+ prefix = os.path.join(prefix, 'x86-4.7', 'prebuilt',
+ 'linux-x86_64', 'bin', 'i686-linux-android-')
+ cmd.extend(['--nm', prefix + 'nm', '--addr2line', prefix + 'addr2line'])
+ # else, use whatever is in PATH (don't pass --nm or --addr2line)
+
+ if verbose:
+ print cmd
+
+ return_code = subprocess.call(cmd)
+ if return_code:
+ raise RuntimeError('Failed to run ParallelAddress2Line: returned ' +
+ str(return_code))
+
+
+def GetNmSymbols(infile, outfile, library, arch, jobs, verbose):
+ if infile is None:
+ if outfile is None:
+ infile = tempfile.NamedTemporaryFile(delete=False).name
+ else:
+ infile = outfile
+
+ if verbose:
+ print 'Running parallel addr2line, dumping symbols to ' + infile;
+ RunParallelAddress2Line(outfile=infile, library=library, arch=arch,
+ jobs=jobs, verbose=verbose)
+ elif verbose:
+ print 'Using nm input from ' + infile
+ with file(infile, 'r') as infile:
+ return list(ParseNm(infile))
+
+
+def main():
+ usage="""%prog [options]
+
+ Runs a spatial analysis on a given library, looking up the source locations
+ of its symbols and calculating how much space each directory, source file,
+ and so on is taking. The result is a report that can be used to pinpoint
+ sources of large portions of the binary, etceteras.
+
+ Under normal circumstances, you only need to pass two arguments, thusly:
+
+ %prog --library /path/to/library --destdir /path/to/output
+
+ In this mode, the program will dump the symbols from the specified library
+ and map those symbols back to source locations, producing a web-based
+ report in the specified output directory.
+
+ Other options are available via '--help'.
+ """
+ parser = optparse.OptionParser(usage=usage)
+ parser.add_option('--nm-in', metavar='PATH',
+ help='if specified, use nm input from <path> instead of '
+ 'generating it. Note that source locations should be '
+ 'present in the file; i.e., no addr2line symbol lookups '
+ 'will be performed when this option is specified. '
+ 'Mutually exclusive with --library.')
+ parser.add_option('--destdir', metavar='PATH',
+ help='write output to the specified directory. An HTML '
+ 'report is generated here along with supporting files; '
+ 'any existing report will be overwritten.')
+ parser.add_option('--library', metavar='PATH',
+ help='if specified, process symbols in the library at '
+ 'the specified path. Mutually exclusive with --nm-in.')
+ parser.add_option('--arch',
+ help='the architecture that the library is targeted to. '
+ 'Determines which nm/addr2line binaries are used. When '
+ '\'host-native\' is chosen, the program will use whichever '
+ 'nm/addr2line binaries are on the PATH. This is '
+ 'appropriate when you are analyzing a binary by and for '
+ 'your computer. '
+ 'This argument is only valid when using --library. '
+ 'Default is \'host-native\'.',
+ choices=['host-native', 'android-arm',
+ 'android-mips', 'android-x86'],)
+ parser.add_option('--jobs',
+ help='number of jobs to use for the parallel '
+ 'addr2line processing pool; defaults to 1. More '
+ 'jobs greatly improve throughput but eat RAM like '
+ 'popcorn, and take several gigabytes each. Start low '
+ 'and ramp this number up until your machine begins to '
+ 'struggle with RAM. '
+ 'This argument is only valid when using --library.')
+ parser.add_option('-v', dest='verbose', action='store_true',
+ help='be verbose, printing lots of status information.')
+ parser.add_option('--nm-out', metavar='PATH',
+ help='keep the nm output file, and store it at the '
+ 'specified path. This is useful if you want to see the '
+ 'fully processed nm output after the symbols have been '
+ 'mapped to source locations. By default, a tempfile is '
+ 'used and is deleted when the program terminates.'
+ 'This argument is only valid when using --library.')
+ opts, args = parser.parse_args()
+
+ if ((not opts.library) and (not opts.nm_in)) or (opts.library and opts.nm_in):
+ parser.error('exactly one of --library or --nm-in is required')
+ if (opts.nm_in):
+ if opts.jobs:
+ print >> sys.stderr, ('WARNING: --jobs has no effect '
+ 'when used with --nm-in')
+ if opts.arch:
+ print >> sys.stderr, ('WARNING: --arch has no effect '
+ 'when used with --nm-in')
+ if not opts.destdir:
+ parser.error('--destdir is required argument')
+ if not opts.jobs:
+ opts.jobs = '1'
+ if not opts.arch:
+ opts.arch = 'host-native'
+
+ symbols = GetNmSymbols(opts.nm_in, opts.nm_out, opts.library, opts.arch,
+ opts.jobs, opts.verbose is True)
+ if not os.path.exists(opts.destdir):
+ os.makedirs(opts.destdir, 0755)
+
+ DumpTreemap(symbols, os.path.join(opts.destdir, 'treemap-dump.js'))
+ DumpLargestSymbols(symbols,
+ os.path.join(opts.destdir, 'largest-symbols.js'), 100)
+ DumpLargestSources(symbols,
+ os.path.join(opts.destdir, 'largest-sources.js'), 100)
+ DumpLargestVTables(symbols,
+ os.path.join(opts.destdir, 'largest-vtables.js'), 100)
+
+ # TODO(andrewhayden): Switch to D3 for greater flexibility
+ treemap_out = os.path.join(opts.destdir, 'webtreemap')
+ if not os.path.exists(treemap_out):
+ os.makedirs(treemap_out, 0755)
+ treemap_src = os.path.join('third_party', 'webtreemap', 'src',
+ 'webtreemap-gh-pages')
Andrew Hayden (chromium.org) 2014/01/17 01:27:41 Whoops, this is desynced from the final approach o
+ shutil.copy(os.path.join(treemap_src, 'COPYING'), treemap_out)
+ shutil.copy(os.path.join(treemap_src, 'webtreemap.js'), treemap_out)
+ shutil.copy(os.path.join(treemap_src, 'webtreemap.css'), treemap_out)
+ shutil.copy(os.path.join('tools', 'binary_size', 'template', 'index.html'),
+ opts.destdir)
+ if opts.verbose:
+ print 'Report saved to ' + opts.destdir + '/index.html'
+
+
+if __name__ == '__main__':
+ sys.exit(main())
« no previous file with comments | « tools/binary_size/java/src/org/chromium/tools/binary_size/Record.java ('k') | tools/binary_size/template/.gitignore » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698