Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(26)

Unified Diff: tools/binary_size/analyze.py

Issue 2778963003: Revert of V2 of //tools/binary_size rewrite (diffs). (Closed)
Patch Set: Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « tools/binary_size/README.md ('k') | tools/binary_size/binary_size_utils.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/binary_size/analyze.py
diff --git a/tools/binary_size/analyze.py b/tools/binary_size/analyze.py
new file mode 100755
index 0000000000000000000000000000000000000000..8a4eefc44df4e2107bc8c2a60c72cbe92e1b4c77
--- /dev/null
+++ b/tools/binary_size/analyze.py
@@ -0,0 +1,356 @@
+#!/usr/bin/env python
+# Copyright 2017 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Main Python API for analyzing binary size."""
+
+import argparse
+import ast
+import distutils.spawn
+import gzip
+import logging
+import os
+import re
+import subprocess
+
+import function_signature
+import helpers
+import mapfileparser
+import symbols
+
+
+# File format version for .size files.
+_SERIALIZATION_VERSION = 1
+
+
+def _OpenMaybeGz(path, mode=None):
+ """Calls `gzip.open()` if |path| ends in ".gz", otherwise calls `open()`."""
+ if path.endswith('.gz'):
+ if mode and 'w' in mode:
+ return gzip.GzipFile(path, mode, 1)
+ return gzip.open(path, mode)
+ return open(path, mode or 'r')
+
+
+def _EndsWithMaybeGz(path, suffix):
+ return path.endswith(suffix) or path.endswith(suffix + '.gz')
+
+
+def _IterLines(s):
+ prev_idx = -1
+ while True:
+ idx = s.find('\n', prev_idx + 1)
+ if idx == -1:
+ return
+ yield s[prev_idx + 1:idx]
+ prev_idx = idx
+
+
+def _UnmangleRemainingSymbols(symbol_group, tool_prefix):
+ """Uses c++filt to unmangle any symbols that need it."""
+ to_process = [s for s in symbol_group if s.name and s.name.startswith('_Z')]
+ if not to_process:
+ return
+
+ logging.info('Unmangling %d names', len(to_process))
+ proc = subprocess.Popen([tool_prefix + 'c++filt'], stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE)
+ stdout = proc.communicate('\n'.join(s.name for s in to_process))[0]
+ assert proc.returncode == 0
+
+ for i, line in enumerate(_IterLines(stdout)):
+ to_process[i].name = line
+
+
+def _NormalizeNames(symbol_group):
+ """Ensures that all names are formatted in a useful way.
+
+ This includes:
+ - Assigning of |function_signature| (for functions).
+ - Stripping of return types in |function_signature| and |name|.
+ - Stripping parameters from |name|.
+ - Moving "vtable for" and the like to be suffixes rather than prefixes.
+ """
+ found_prefixes = set()
+ for symbol in symbol_group:
+ if not symbol.name or symbol.name.startswith('*'):
+ # See comment in _RemoveDuplicatesAndCalculatePadding() about when this
+ # can happen.
+ continue
+
+ # E.g.: vtable for FOO
+ idx = symbol.name.find(' for ', 0, 30)
+ if idx != -1:
+ found_prefixes.add(symbol.name[:idx + 4])
+ symbol.name = symbol.name[idx + 5:] + ' [' + symbol.name[:idx] + ']'
+
+ # E.g.: virtual thunk to FOO
+ idx = symbol.name.find(' to ', 0, 30)
+ if idx != -1:
+ found_prefixes.add(symbol.name[:idx + 3])
+ symbol.name = symbol.name[idx + 4:] + ' [' + symbol.name[:idx] + ']'
+
+ # Strip out return type, and identify where parameter list starts.
+ if symbol.section == 't':
+ symbol.function_signature, symbol.name = (
+ function_signature.Parse(symbol.name))
+
+ # Remove anonymous namespaces (they just harm clustering).
+ symbol.name = symbol.name.replace('(anonymous namespace)::', '')
+
+ logging.debug('Found name prefixes of: %r', found_prefixes)
+
+
+def _NormalizeObjectPaths(symbol_group):
+ """Ensures that all paths are formatted in a useful way."""
+ for symbol in symbol_group:
+ if symbol.path:
+ if symbol.path.startswith('obj/'):
+ # Convert obj/third_party/... -> third_party/...
+ symbol.path = symbol.path[4:]
+ elif symbol.path.startswith('../../'):
+ # Convert ../../third_party/... -> third_party/...
+ symbol.path = symbol.path[6:]
+ if symbol.path.endswith(')'):
+ # Convert foo/bar.a(baz.o) -> foo/bar.a/baz.o
+ start_idx = symbol.path.index('(')
+ paren_path = symbol.path[start_idx + 1:-1]
+ symbol.path = symbol.path[:start_idx] + os.path.sep + paren_path
+
+
+def _RemoveDuplicatesAndCalculatePadding(symbol_group):
+ """Removes symbols at the same address and calculates the |padding| field.
+
+ Symbols must already be sorted by |address|.
+ """
+ i = 0
+ to_remove = set()
+ all_symbols = symbol_group.symbols
+ for i in xrange(len(all_symbols)):
+ prev_symbol = all_symbols[i - 1]
+ symbol = all_symbols[i]
+ if prev_symbol.section_name is not symbol.section_name:
+ continue
+ if symbol.address > 0 and prev_symbol.address > 0:
+ # Fold symbols that are at the same address (happens in nm output).
+ if symbol.address == prev_symbol.address:
+ symbol.size = max(prev_symbol.size, symbol.size)
+ to_remove.add(i)
+ continue
+ # Even with symbols at the same address removed, overlaps can still
+ # happen. In this case, padding will be negative (and this is fine).
+ padding = symbol.address - prev_symbol.end_address
+ # These thresholds were found by manually auditing arm32 Chrome.
+ # E.g.: Set them to 0 and see what warnings get logged.
+ # TODO(agrieve): See if these thresholds make sense for architectures
+ # other than arm32.
+ if (symbol.section in 'rd' and padding >= 256 or
+ symbol.section in 't' and padding >= 64):
+ # For nm data, this is caused by data that has no associated symbol.
+ # The linker map file lists them with no name, but with a file.
+ # Example:
+ # .data 0x02d42764 0x120 .../V8SharedWorkerGlobalScope.o
+ # Where as most look like:
+ # .data.MANGLED_NAME...
+ logging.debug('Large padding of %d between:\n A) %r\n B) %r' % (
+ padding, prev_symbol, symbol))
+ continue
+ symbol.padding = padding
+ symbol.size += padding
+ assert symbol.size >= 0, 'Symbol has negative size: %r' % symbol
+ # Map files have no overlaps, so worth special-casing the no-op case.
+ if to_remove:
+ logging.info('Removing %d overlapping symbols', len(to_remove))
+ symbol_group.symbols = (
+ [s for i, s in enumerate(all_symbols) if i not in to_remove])
+
+
+def _PrintStats(result, write_func):
+ """Prints out how accurate |result| is."""
+ for section in symbols.SECTION_TO_SECTION_NAME:
+ if section == 'd':
+ expected_size = sum(v for k, v in result.section_sizes.iteritems()
+ if k.startswith('.data'))
+ else:
+ expected_size = result.section_sizes[
+ symbols.SECTION_TO_SECTION_NAME[section]]
+
+ def one_stat(group):
+ template = ('Section %s has %.1f%% of %d bytes accounted for from '
+ '%d symbols. %d bytes are unaccounted for. Padding '
+ 'accounts for %d bytes\n')
+ actual_size = group.size
+ count = len(group)
+ padding = group.padding
+ size_percent = 100.0 * actual_size / expected_size
+ return (template % (section, size_percent, actual_size, count,
+ expected_size - actual_size, padding))
+
+ in_section = result.symbol_group.WhereInSection(section)
+ write_func(one_stat(in_section))
+
+ star_syms = in_section.WhereNameMatches(r'^\*')
+ attributed_syms = star_syms.Inverted().WhereHasAnyAttribution()
+ anonymous_syms = attributed_syms.Inverted()
+ if star_syms or anonymous_syms:
+ missing_size = star_syms.size + anonymous_syms.size
+ write_func(('+ Without %d merge sections and %d anonymous entries ('
+ 'accounting for %d bytes):\n') % (
+ len(star_syms), len(anonymous_syms), missing_size))
+ write_func('+ ' + one_stat(attributed_syms))
+
+
+def _SaveResult(result, file_obj):
+ """Saves the result to the given file object."""
+ # Store one bucket per line.
+ file_obj.write('%d\n' % _SERIALIZATION_VERSION)
+ file_obj.write('%r\n' % result.section_sizes)
+ file_obj.write('%d\n' % len(result.symbol_group))
+ prev_section_name = None
+ # Store symbol fields as tab-separated.
+ # Store only non-derived fields.
+ for symbol in result.symbol_group:
+ if symbol.section_name != prev_section_name:
+ file_obj.write('%s\n' % symbol.section_name)
+ prev_section_name = symbol.section_name
+ # Don't write padding nor name since these are derived values.
+ file_obj.write('%x\t%x\t%s\t%s\n' % (
+ symbol.address, symbol.size_without_padding,
+ symbol.function_signature or symbol.name or '',
+ symbol.path or ''))
+
+
+def _LoadResults(file_obj):
+ """Loads a result from the given file."""
+ lines = iter(file_obj)
+ actual_version = int(next(lines))
+ assert actual_version == _SERIALIZATION_VERSION, (
+ 'Version mismatch. Need to write some upgrade code.')
+
+ section_sizes = ast.literal_eval(next(lines))
+ num_syms = int(next(lines))
+ symbol_list = [None] * num_syms
+ section_name = None
+ for i in xrange(num_syms):
+ line = next(lines)[:-1]
+ if '\t' not in line:
+ section_name = intern(line)
+ line = next(lines)[:-1]
+ new_sym = symbols.Symbol.__new__(symbols.Symbol)
+ parts = line.split('\t')
+ new_sym.section_name = section_name
+ new_sym.address = int(parts[0], 16)
+ new_sym.size = int(parts[1], 16)
+ new_sym.name = parts[2] or None
+ new_sym.path = parts[3] or None
+ new_sym.padding = 0 # Derived
+ new_sym.function_signature = None # Derived
+ symbol_list[i] = new_sym
+
+ # Recompute derived values (padding and function names).
+ result = mapfileparser.ParseResult(symbol_list, section_sizes)
+ logging.info('Calculating padding')
+ _RemoveDuplicatesAndCalculatePadding(result.symbol_group)
+ logging.info('Deriving signatures')
+ # Re-parse out function parameters.
+ _NormalizeNames(result.symbol_group.WhereInSection('t'))
+ return result
+
+
+def AddOptions(parser):
+ parser.add_argument('input_file',
+ help='Path to input file. Can be a linker .map file, an '
+ 'unstripped binary, or a saved result from '
+ 'analyze.py')
+ parser.add_argument('--tool-prefix', default='',
+ help='Path prefix for c++filt.')
+ parser.add_argument('--output-directory',
+ help='Path to the root build directory.')
+
+
+def _DetectToolPrefix(tool_prefix, input_file, output_directory=None):
+ """Calls Analyze with values from args."""
+ if not output_directory:
+ abs_path = os.path.abspath(input_file)
+ release_idx = abs_path.find('Release')
+ if release_idx != -1:
+ output_directory = abs_path[:release_idx] + 'Release'
+ output_directory = os.path.relpath(abs_path[:release_idx] + '/Release')
+ logging.debug('Detected --output-directory=%s', output_directory)
+
+ if not tool_prefix and output_directory:
+ # Auto-detect from build_vars.txt
+ build_vars_path = os.path.join(output_directory, 'build_vars.txt')
+ if os.path.exists(build_vars_path):
+ with open(build_vars_path) as f:
+ build_vars = dict(l.rstrip().split('=', 1) for l in f if '=' in l)
+ logging.debug('Found --tool-prefix from build_vars.txt')
+ tool_prefix = build_vars['android_tool_prefix']
+
+ if os.path.sep not in tool_prefix:
+ full_path = distutils.spawn.find_executable(tool_prefix + 'c++filt')
+ else:
+ full_path = tool_prefix + 'c++filt'
+
+ if not os.path.isfile(full_path):
+ raise Exception('Bad --tool-prefix. Path not found: %s' % full_path)
+ logging.info('Using --tool-prefix=%s', tool_prefix)
+ return tool_prefix
+
+
+def AnalyzeWithArgs(args):
+ return Analyze(args.input_file, args.output_directory, args.tool_prefix)
+
+
+def Analyze(path, output_directory=None, tool_prefix=''):
+ if _EndsWithMaybeGz(path, '.size'):
+ logging.info('Loading cached results.')
+ with _OpenMaybeGz(path) as f:
+ result = _LoadResults(f)
+ elif not _EndsWithMaybeGz(path, '.map'):
+ raise Exception('Expected input to be a .map or a .size')
+ else:
+ # Verify tool_prefix early.
+ tool_prefix = _DetectToolPrefix(tool_prefix, path, output_directory)
+
+ with _OpenMaybeGz(path) as map_file:
+ result = mapfileparser.MapFileParser().Parse(map_file)
+
+ # Map file for some reason doesn't unmangle all names.
+ logging.info('Calculating padding')
+ _RemoveDuplicatesAndCalculatePadding(result.symbol_group)
+ # Unmangle prints its own log statement.
+ _UnmangleRemainingSymbols(result.symbol_group, tool_prefix)
+ # Resolve paths prints its own log statement.
+ logging.info('Normalizing names')
+ _NormalizeNames(result.symbol_group)
+ logging.info('Normalizing paths')
+ _NormalizeObjectPaths(result.symbol_group)
+
+ if logging.getLogger().isEnabledFor(logging.INFO):
+ _PrintStats(result, lambda l: logging.info(l.rstrip()))
+ logging.info('Finished analyzing %d symbols', len(result.symbol_group))
+ return result
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--output', required=True,
+ help='Path to store results. Must end in .size or '
+ '.size.gz')
+ AddOptions(parser)
+ args = helpers.AddCommonOptionsAndParseArgs(parser)
+ if not _EndsWithMaybeGz(args.output, '.size'):
+ raise Exception('--output must end with .size or .size.gz')
+
+ result = AnalyzeWithArgs(args)
+ logging.info('Saving result to %s', args.output)
+ with _OpenMaybeGz(args.output, 'wb') as f:
+ _SaveResult(result, f)
+
+ logging.info('Done')
+
+
+if __name__ == '__main__':
+ main()
« no previous file with comments | « tools/binary_size/README.md ('k') | tools/binary_size/binary_size_utils.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698