Chromium Code Reviews| Index: tools/binary_size/run_binary_size_analysis.py |
| diff --git a/tools/binary_size/run_binary_size_analysis.py b/tools/binary_size/run_binary_size_analysis.py |
| index 2a75faf0ac585289abb15587a269a27fc8937835..26be022acd956e6178be824fa33f625dd4648fbc 100755 |
| --- a/tools/binary_size/run_binary_size_analysis.py |
| +++ b/tools/binary_size/run_binary_size_analysis.py |
| @@ -13,6 +13,7 @@ you desire. |
| import collections |
| import fileinput |
| import json |
| +import multiprocessing |
| import optparse |
| import os |
| import pprint |
| @@ -21,6 +22,19 @@ import shutil |
| import subprocess |
| import sys |
| import tempfile |
| +import time |
| + |
| +import binary_size_utils |
| + |
| +elf_symbolizer_path = os.path.abspath(os.path.join( |
| + os.path.dirname(__file__), |
| + '..', |
| + '..', |
| + 'build', |
| + 'android', |
| + 'pylib')) |
| +sys.path.append(elf_symbolizer_path) |
| +import symbols.elf_symbolizer as elf_symbolizer |
| # TODO(andrewhayden): Only used for legacy reports. Delete. |
| @@ -46,66 +60,31 @@ def SymbolTypeToHuman(type): |
| 'v': 'weak symbol'}[type] |
| -def ParseNm(input): |
| - """Parse nm output. |
| - |
| - Argument: an iterable over lines of nm output. |
| - |
| - Yields: (symbol name, symbol type, symbol size, source file path). |
| - Path may be None if nm couldn't figure out the source file. |
| - """ |
| - |
| - # Match lines with size, symbol, optional location, optional discriminator |
| - sym_re = re.compile(r'^[0-9a-f]{8} ' # address (8 hex digits) |
| - '([0-9a-f]{8}) ' # size (8 hex digits) |
| - '(.) ' # symbol type, one character |
| - '([^\t]+)' # symbol name, separated from next by tab |
| - '(?:\t(.*):[\d\?]+)?.*$') # location |
| - # Match lines with addr but no size. |
| - addr_re = re.compile(r'^[0-9a-f]{8} (.) ([^\t]+)(?:\t.*)?$') |
| - # Match lines that don't have an address at all -- typically external symbols. |
| - noaddr_re = re.compile(r'^ {8} (.) (.*)$') |
| - |
| - for line in input: |
| - line = line.rstrip() |
| - match = sym_re.match(line) |
| - if match: |
| - size, type, sym = match.groups()[0:3] |
| - size = int(size, 16) |
| - if type.lower() == 'b': |
| - continue # skip all BSS for now |
| - path = match.group(4) |
| - yield sym, type, size, path |
| - continue |
| - match = addr_re.match(line) |
| - if match: |
| - type, sym = match.groups()[0:2] |
| - # No size == we don't care. |
| - continue |
| - match = noaddr_re.match(line) |
| - if match: |
| - type, sym = match.groups() |
| - if type in ('U', 'w'): |
| - # external or weak symbol |
| - continue |
| - |
| - print >>sys.stderr, 'unparsed:', repr(line) |
| - |
| - |
| def _MkChild(node, name): |
|
Primiano Tucci (use gerrit)
2014/05/20 15:22:57
_MkChild looks a "long"cut for
return node.setdefa
Daniel Bratell
2014/05/21 08:42:13
Hmm, yes. Border case. Using setdefault forces you
|
| - child = None |
| - for test in node['children']: |
| - if test['n'] == name: |
| - child = test |
| - break |
| - if not child: |
| - child = {'n': name, 'children': []} |
| - node['children'].append(child) |
| + child = node['children'].get(name) |
| + if child is None: |
| + child = {'n': name, 'children': {}} |
| + node['children'][name] = child |
| return child |
| +def MakeChildrenDictsIntoLists(node): |
| + largest_list_len = 0 |
| + if 'children' in node: |
| + largest_list_len = len(node['children']) |
| + child_list = [] |
| + for child in node['children'].itervalues(): |
| + child_largest_list_len = MakeChildrenDictsIntoLists(child) |
| + if child_largest_list_len > largest_list_len: |
| + largest_list_len = child_largest_list_len |
| + child_list.append(child) |
| + node['children'] = child_list |
| + |
| + return largest_list_len |
| + |
|
Primiano Tucci (use gerrit)
2014/05/20 15:22:57
Nit: add extra newline
|
| def MakeCompactTree(symbols): |
| - result = {'n': '/', 'children': [], 'k': 'p', 'maxDepth': 0} |
| + result = {'n': '/', 'children': {}, 'k': 'p', 'maxDepth': 0} |
| + seen_symbol_with_path = False |
| for symbol_name, symbol_type, symbol_size, file_path in symbols: |
| if 'vtable for ' in symbol_name: |
| @@ -113,6 +92,7 @@ def MakeCompactTree(symbols): |
| # Take path like '/foo/bar/baz', convert to ['foo', 'bar', 'baz'] |
| if file_path: |
| file_path = os.path.normpath(file_path) |
| + seen_symbol_with_path = True |
| else: |
| file_path = '(No Path)' |
| @@ -129,18 +109,23 @@ def MakeCompactTree(symbols): |
| continue |
| depth += 1 |
| node = _MkChild(node, path_part); |
| + assert not 'k' in node or node['k'] == 'p' |
| node['k'] = 'p' # p for path |
| # 'node' is now the file node. Find the symbol-type bucket. |
| node['lastPathElement'] = True |
| node = _MkChild(node, symbol_type) |
| + assert not 'k' in node or node['k'] == 'b' |
| node['t'] = symbol_type |
| node['k'] = 'b' # b for bucket |
| depth += 1 |
| # 'node' is now the symbol-type bucket. Make the child entry. |
| node = _MkChild(node, symbol_name) |
| - if 'children' in node: # Only possible if we're adding duplicate entries!!! |
| + if 'children' in node: |
| + if node['children']: |
| + print('A container node used as symbol for %s.' % symbol_name) |
| + # This is going to be used as a leaf so no use for child list. |
| del node['children'] |
| node['value'] = symbol_size |
| node['t'] = symbol_type |
| @@ -148,6 +133,14 @@ def MakeCompactTree(symbols): |
| depth += 1 |
| result['maxDepth'] = max(result['maxDepth'], depth); |
| + if not seen_symbol_with_path: |
| + print('Symbols lack paths. Data will not be structured.') |
|
Primiano Tucci (use gerrit)
2014/05/20 15:22:57
shouldn't all these prints be logging.warning/erro
|
| + |
| + largest_list_len = MakeChildrenDictsIntoLists(result) |
| + |
| + if largest_list_len > 1000: |
| + print('There are sections with %d nodes. Results might be unusable.' % |
| + largest_list_len) |
| return result |
| @@ -285,12 +278,11 @@ def JsonifyTree(tree, name): |
| 'children': children } |
| def DumpCompactTree(symbols, outfile): |
| - out = open(outfile, 'w') |
| - try: |
| - out.write('var tree_data = ' + json.dumps(MakeCompactTree(symbols))) |
| - finally: |
| - out.flush() |
| - out.close() |
| + tree_root = MakeCompactTree(symbols) |
| + json_string = json.dumps(tree_root) |
| + print('Writing %d bytes json' % len(json_string)) |
| + with open(outfile, 'w') as out: |
| + out.write('var tree_data = ' + json_string) |
|
Primiano Tucci (use gerrit)
2014/05/20 15:22:57
I've no idea how big this string is going to be.
I
Daniel Bratell
2014/05/21 08:42:13
It can be big so this is a nice suggestion. Won't
|
| # TODO(andrewhayden): Only used for legacy reports. Delete. |
| @@ -397,61 +389,156 @@ def DumpLargestVTables(symbols, outfile, n): |
| out.close() |
| -# TODO(andrewhayden): Switch to Primiano's python-based version. |
| -def RunParallelAddress2Line(outfile, library, arch, jobs, verbose): |
| - """Run a parallel addr2line processing engine to dump and resolve symbols.""" |
| - out_dir = os.getenv('CHROMIUM_OUT_DIR', 'out') |
| - build_type = os.getenv('BUILDTYPE', 'Release') |
| - classpath = os.path.join(out_dir, build_type, 'lib.java', |
| - 'binary_size_java.jar') |
| - cmd = ['java', |
| - '-classpath', classpath, |
| - 'org.chromium.tools.binary_size.ParallelAddress2Line', |
| - '--disambiguate', |
| - '--outfile', outfile, |
| - '--library', library, |
| - '--threads', jobs] |
| - if verbose is True: |
| - cmd.append('--verbose') |
| - prefix = os.path.join('third_party', 'android_tools', 'ndk', 'toolchains') |
| - if arch == 'android-arm': |
| - prefix = os.path.join(prefix, 'arm-linux-androideabi-4.8', 'prebuilt', |
| - 'linux-x86_64', 'bin', 'arm-linux-androideabi-') |
| - cmd.extend(['--nm', prefix + 'nm', '--addr2line', prefix + 'addr2line']) |
| - elif arch == 'android-mips': |
| - prefix = os.path.join(prefix, 'mipsel-linux-android-4.8', 'prebuilt', |
| - 'linux-x86_64', 'bin', 'mipsel-linux-android-') |
| - cmd.extend(['--nm', prefix + 'nm', '--addr2line', prefix + 'addr2line']) |
| - elif arch == 'android-x86': |
| - prefix = os.path.join(prefix, 'x86-4.8', 'prebuilt', |
| - 'linux-x86_64', 'bin', 'i686-linux-android-') |
| - cmd.extend(['--nm', prefix + 'nm', '--addr2line', prefix + 'addr2line']) |
| - # else, use whatever is in PATH (don't pass --nm or --addr2line) |
| - |
| - if verbose: |
| - print cmd |
| - |
| - return_code = subprocess.call(cmd) |
| - if return_code: |
| - raise RuntimeError('Failed to run ParallelAddress2Line: returned ' + |
| - str(return_code)) |
| - |
| - |
| -def GetNmSymbols(infile, outfile, library, arch, jobs, verbose): |
| - if infile is None: |
| - if outfile is None: |
| - infile = tempfile.NamedTemporaryFile(delete=False).name |
| +# Regex for parsing "nm" output. A sample line looks like this: |
| +# 0167b39c 00000018 t ACCESS_DESCRIPTION_free /path/file.c:95 |
| +# |
| +# The fields are: address, size, type, name, source location |
| +# Regular expression explained ( see also: https://xkcd.com/208 ): |
| +# ([0-9a-f]{8,}+) The address |
| +# [\s]+ Whitespace separator |
| +# ([0-9a-f]{8,}+) The size. From here on out it's all optional. |
| +# [\s]+ Whitespace separator |
| +# (\S?) The symbol type, which is any non-whitespace char |
| +# [\s*] Whitespace separator |
| +# ([^\t]*) Symbol name, any non-tab character (spaces ok!) |
| +# [\t]? Tab separator |
| +# (.*) The location (filename[:linennum|?][ (discriminator n)] |
| +sNmPattern = re.compile( |
| + r'([0-9a-f]{8,})[\s]+([0-9a-f]{8,})[\s]*(\S?)[\s*]([^\t]*)[\t]?(.*)') |
| + |
| +class Progress(): |
| + def __init__(self): |
| + self.count = 0 |
| + self.skip_count = 0 |
| + self.collisions = 0 |
| + self.time_last_output = time.time() |
| + self.count_last_output = 0 |
| + |
| + |
| +def RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary): |
| + nm_output = run_nm(library, nm_binary) |
| + nm_output_lines = nm_output.splitlines() |
| + nm_output_lines_len = len(nm_output_lines) |
| + address_symbol = {} |
| + progress = Progress() |
| + def map_address_symbol(symbol, addr): |
| + progress.count += 1 |
| + if addr in address_symbol: |
| + # 'Collision between %s and %s.' % (str(symbol.name), |
| + # str(address_symbol[addr].name)) |
| + progress.collisions += 1 |
| else: |
| - infile = outfile |
| + address_symbol[addr] = symbol |
| + |
| + progress_chunk = 100 |
| + if progress.count % progress_chunk == 0: |
| + time_now = time.time() |
| + time_spent = time_now - progress.time_last_output |
| + if time_spent > 1.0: |
| + # Only output at most once per second. |
| + progress.time_last_output = time_now |
| + chunk_size = progress.count - progress.count_last_output |
| + progress.count_last_output = progress.count |
| + if time_spent > 0: |
| + speed = chunk_size / time_spent |
| + else: |
| + speed = 0 |
| + progress_percent = (100.0 * (progress.count + progress.skip_count) / |
| + nm_output_lines_len) |
| + print('%.1f%%: Looked up %d symbols (%d collisions) - %.1f lookups/s.' % |
| + (progress_percent, progress.count, progress.collisions, speed)) |
| + |
| + symbolizer = elf_symbolizer.ELFSymbolizer(library, addr2line_binary, |
| + map_address_symbol, |
| + max_concurrent_jobs=2) |
| + for line in nm_output_lines: |
| + match = sNmPattern.match(line) |
| + if match: |
| + location = match.group(5) |
| + if not location: |
| + addr = int(match.group(1), 16) |
| + size = int(match.group(2), 16) |
| + if addr in address_symbol: # Already looked up, shortcut ELFSymbolizer. |
| + map_address_symbol(address_symbol[addr], addr) |
| + continue |
| + elif size == 0: |
| + # Save time by not looking up empty symbols (do they even exist?) |
| + print('Empty symbol: ' + line) |
| + else: |
| + symbolizer.SymbolizeAsync(addr, addr) |
| + continue |
| + |
| + progress.skip_count += 1 |
| + |
| + symbolizer.Join() |
| + |
| + with open(outfile, 'w') as out: |
| + for line in nm_output_lines: |
| + match = sNmPattern.match(line) |
| + if match: |
| + location = match.group(5) |
| + if not location: |
| + addr = int(match.group(1), 16) |
| + symbol = address_symbol[addr] |
| + path = '??' |
| + if symbol.source_path is not None: |
| + path = symbol.source_path |
| + line_number = 0 |
| + if symbol.source_line is not None: |
| + line_number = symbol.source_line |
| + out.write('%s\t%s:%d\n' % (line, path, line_number)) |
| + continue |
| + |
| + out.write('%s\n' % line) |
| + |
| + print('%d symbols in the results.' % len(address_symbol)) |
| + |
| + |
| +def run_nm(binary, nm_binary): |
|
Primiano Tucci (use gerrit)
2014/05/20 15:22:57
nit: RunNm
|
| + print('Starting nm') |
| + cmd = [nm_binary, '-C', '--print-size', binary] |
| + nm_process = subprocess.Popen(cmd, |
| + stdout=subprocess.PIPE, |
| + stderr=subprocess.PIPE) |
| + (process_output, err_output) = nm_process.communicate() |
| + |
| + if nm_process.returncode != 0: |
| + if err_output: |
| + raise Exception, err_output |
| + else: |
| + raise Exception, process_output |
| + |
| + print('Finished nm') |
| + return process_output |
| + |
| + |
| +def GetNmSymbols(nm_infile, outfile, library, jobs, verbose, |
| + addr2line_binary, nm_binary): |
| + if nm_infile is None: |
| + if outfile is None: |
| + outfile = tempfile.NamedTemporaryFile(delete=False).name |
| if verbose: |
| - print 'Running parallel addr2line, dumping symbols to ' + infile; |
| - RunParallelAddress2Line(outfile=infile, library=library, arch=arch, |
| - jobs=jobs, verbose=verbose) |
| + print 'Running parallel addr2line, dumping symbols to ' + outfile; |
| + RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary) |
| + |
| + nm_infile = outfile |
| + |
| elif verbose: |
| - print 'Using nm input from ' + infile |
| - with file(infile, 'r') as infile: |
| - return list(ParseNm(infile)) |
| + print 'Using nm input from ' + nm_infile |
| + with file(nm_infile, 'r') as infile: |
| + return list(binary_size_utils.ParseNm(infile)) |
| + |
| + |
| +def _find_in_system_path(binary): |
| + """Locate the full path to binary in the system path or return None |
| + if not found.""" |
| + system_path = os.environ["PATH"].split(os.pathsep) |
| + for path in system_path: |
| + binary_path = os.path.join(path, binary) |
| + if os.path.isfile(binary_path): |
| + return binary_path |
| + return None |
| def main(): |
| @@ -486,17 +573,15 @@ def main(): |
| parser.add_option('--library', metavar='PATH', |
| help='if specified, process symbols in the library at ' |
| 'the specified path. Mutually exclusive with --nm-in.') |
| - parser.add_option('--arch', |
| - help='the architecture that the library is targeted to. ' |
| - 'Determines which nm/addr2line binaries are used. When ' |
| - '\'host-native\' is chosen, the program will use whichever ' |
| - 'nm/addr2line binaries are on the PATH. This is ' |
| - 'appropriate when you are analyzing a binary by and for ' |
| - 'your computer. ' |
| - 'This argument is only valid when using --library. ' |
| - 'Default is \'host-native\'.', |
| - choices=['host-native', 'android-arm', |
| - 'android-mips', 'android-x86'],) |
| + parser.add_option('--nm-binary', |
| + help='use the specified nm binary to analyze library. ' |
| + 'This is to be used when the nm in the path is not for ' |
| + 'the right architecture or of the right version.') |
| + parser.add_option('--addr2line-binary', |
| + help='use the specified addr2line binary to analyze ' |
| + 'library. This is to be used when the addr2line in ' |
| + 'the path is not for the right architecture or ' |
| + 'of the right version.') |
| parser.add_option('--jobs', |
| help='number of jobs to use for the parallel ' |
| 'addr2line processing pool; defaults to 1. More ' |
| @@ -524,18 +609,33 @@ def main(): |
| if opts.jobs: |
| print >> sys.stderr, ('WARNING: --jobs has no effect ' |
| 'when used with --nm-in') |
| - if opts.arch: |
| - print >> sys.stderr, ('WARNING: --arch has no effect ' |
| - 'when used with --nm-in') |
| if not opts.destdir: |
| parser.error('--destdir is required argument') |
| if not opts.jobs: |
| - opts.jobs = '1' |
| - if not opts.arch: |
| - opts.arch = 'host-native' |
| + opts.jobs = str(multiprocessing.cpu_count()) |
| + |
| + if opts.addr2line_binary: |
| + assert os.path.isfile(opts.addr2line_binary) |
| + addr2line_binary = opts.addr2line_binary |
| + else: |
| + addr2line_binary = _find_in_system_path('addr2line') |
| + assert addr2line_binary, 'Unable to find addr2line in the path. '\ |
| + 'Use --addr2line-binary to specify location.' |
| + |
| + if opts.nm_binary: |
| + assert os.path.isfile(opts.nm_binary) |
| + nm_binary = opts.nm_binary |
| + else: |
| + nm_binary = _find_in_system_path('nm') |
| + assert nm_binary, 'Unable to find nm in the path. Use --nm-binary '\ |
| + 'to specify location.' |
| + |
| + print('nm: %s' % nm_binary) |
| + print('addr2line: %s' % addr2line_binary) |
| - symbols = GetNmSymbols(opts.nm_in, opts.nm_out, opts.library, opts.arch, |
| - opts.jobs, opts.verbose is True) |
| + symbols = GetNmSymbols(opts.nm_in, opts.nm_out, opts.library, |
| + opts.jobs, opts.verbose is True, |
| + addr2line_binary, nm_binary) |
| if not os.path.exists(opts.destdir): |
| os.makedirs(opts.destdir, 0755) |
| @@ -562,11 +662,15 @@ def main(): |
| d3_out = os.path.join(opts.destdir, 'd3') |
| if not os.path.exists(d3_out): |
| os.makedirs(d3_out, 0755) |
| - d3_src = os.path.join('third_party', 'd3', 'src') |
| - template_src = os.path.join('tools', 'binary_size', |
| + d3_src = os.path.join(os.path.dirname(__file__), |
| + '..', |
| + '..', |
| + 'third_party', 'd3', 'src') |
| + template_src = os.path.join(os.path.dirname(__file__), |
| 'template') |
| shutil.copy(os.path.join(d3_src, 'LICENSE'), d3_out) |
| shutil.copy(os.path.join(d3_src, 'd3.js'), d3_out) |
| + print('Copying index.html') |
| shutil.copy(os.path.join(template_src, 'index.html'), opts.destdir) |
| shutil.copy(os.path.join(template_src, 'D3SymbolTreeMap.js'), opts.destdir) |