tools/binary_size/analyze.py - Issue 2785483002: Reland of V2 of //tools/binary_size rewrite (diffs).

Unified Diff: tools/binary_size/analyze.py

Issue 2785483002: Reland of V2 of //tools/binary_size rewrite (diffs). (Closed)

Patch Set: add missing name= Created 3 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: tools/binary_size/analyze.py

diff --git a/tools/binary_size/analyze.py b/tools/binary_size/analyze.py

deleted file mode 100755

index 8a4eefc44df4e2107bc8c2a60c72cbe92e1b4c77..0000000000000000000000000000000000000000

--- a/tools/binary_size/analyze.py

+++ /dev/null

@@ -1,356 +0,0 @@

-#!/usr/bin/env python

-# Use of this source code is governed by a BSD-style license that can be

-# found in the LICENSE file.

-"""Main Python API for analyzing binary size."""

-import argparse

-import ast

-import distutils.spawn

-import gzip

-import logging

-import os

-import re

-import subprocess

-import function_signature

-import helpers

-import mapfileparser

-import symbols

-# File format version for .size files.

-_SERIALIZATION_VERSION = 1

-def _OpenMaybeGz(path, mode=None):

- """Calls `gzip.open()` if |path| ends in ".gz", otherwise calls `open()`."""

- if path.endswith('.gz'):

- if mode and 'w' in mode:

- return gzip.GzipFile(path, mode, 1)

- return gzip.open(path, mode)

- return open(path, mode or 'r')

-def _EndsWithMaybeGz(path, suffix):

- return path.endswith(suffix) or path.endswith(suffix + '.gz')

-def _IterLines(s):

- prev_idx = -1

- while True:

- idx = s.find('\n', prev_idx + 1)

- if idx == -1:

- return

- yield s[prev_idx + 1:idx]

- prev_idx = idx

-def _UnmangleRemainingSymbols(symbol_group, tool_prefix):

- """Uses c++filt to unmangle any symbols that need it."""

- to_process = [s for s in symbol_group if s.name and s.name.startswith('_Z')]

- if not to_process:

- return

- logging.info('Unmangling %d names', len(to_process))

- proc = subprocess.Popen([tool_prefix + 'c++filt'], stdin=subprocess.PIPE,

- stdout=subprocess.PIPE)

- stdout = proc.communicate('\n'.join(s.name for s in to_process))[0]

- assert proc.returncode == 0

- for i, line in enumerate(_IterLines(stdout)):

- to_process[i].name = line

-def _NormalizeNames(symbol_group):

- """Ensures that all names are formatted in a useful way.

- This includes:

- - Assigning of |function_signature| (for functions).

- - Stripping of return types in |function_signature| and |name|.

- - Stripping parameters from |name|.

- - Moving "vtable for" and the like to be suffixes rather than prefixes.

- """

- found_prefixes = set()

- for symbol in symbol_group:

- if not symbol.name or symbol.name.startswith('*'):

- # See comment in _RemoveDuplicatesAndCalculatePadding() about when this

- # can happen.

- continue

- # E.g.: vtable for FOO

- idx = symbol.name.find(' for ', 0, 30)

- if idx != -1:

- found_prefixes.add(symbol.name[:idx + 4])

- symbol.name = symbol.name[idx + 5:] + ' [' + symbol.name[:idx] + ']'

- # E.g.: virtual thunk to FOO

- idx = symbol.name.find(' to ', 0, 30)

- if idx != -1:

- found_prefixes.add(symbol.name[:idx + 3])

- symbol.name = symbol.name[idx + 4:] + ' [' + symbol.name[:idx] + ']'

- # Strip out return type, and identify where parameter list starts.

- if symbol.section == 't':

- symbol.function_signature, symbol.name = (

- function_signature.Parse(symbol.name))

- # Remove anonymous namespaces (they just harm clustering).

- symbol.name = symbol.name.replace('(anonymous namespace)::', '')

- logging.debug('Found name prefixes of: %r', found_prefixes)

-def _NormalizeObjectPaths(symbol_group):

- """Ensures that all paths are formatted in a useful way."""

- for symbol in symbol_group:

- if symbol.path:

- if symbol.path.startswith('obj/'):

- # Convert obj/third_party/... -> third_party/...

- symbol.path = symbol.path[4:]

- elif symbol.path.startswith('../../'):

- # Convert ../../third_party/... -> third_party/...

- symbol.path = symbol.path[6:]

- if symbol.path.endswith(')'):

- # Convert foo/bar.a(baz.o) -> foo/bar.a/baz.o

- start_idx = symbol.path.index('(')

- paren_path = symbol.path[start_idx + 1:-1]

- symbol.path = symbol.path[:start_idx] + os.path.sep + paren_path

-def _RemoveDuplicatesAndCalculatePadding(symbol_group):

- """Removes symbols at the same address and calculates the |padding| field.

- Symbols must already be sorted by |address|.

- """

- i = 0

- to_remove = set()

- all_symbols = symbol_group.symbols

- for i in xrange(len(all_symbols)):

- prev_symbol = all_symbols[i - 1]

- symbol = all_symbols[i]

- if prev_symbol.section_name is not symbol.section_name:

- continue

- if symbol.address > 0 and prev_symbol.address > 0:

- # Fold symbols that are at the same address (happens in nm output).

- if symbol.address == prev_symbol.address:

- symbol.size = max(prev_symbol.size, symbol.size)

- to_remove.add(i)

- continue

- # Even with symbols at the same address removed, overlaps can still

- # happen. In this case, padding will be negative (and this is fine).

- padding = symbol.address - prev_symbol.end_address

- # These thresholds were found by manually auditing arm32 Chrome.

- # E.g.: Set them to 0 and see what warnings get logged.

- # TODO(agrieve): See if these thresholds make sense for architectures

- # other than arm32.

- if (symbol.section in 'rd' and padding >= 256 or

- symbol.section in 't' and padding >= 64):

- # For nm data, this is caused by data that has no associated symbol.

- # The linker map file lists them with no name, but with a file.

- # Example:

- # .data 0x02d42764 0x120 .../V8SharedWorkerGlobalScope.o

- # Where as most look like:

- # .data.MANGLED_NAME...

- logging.debug('Large padding of %d between:\n A) %r\n B) %r' % (

- padding, prev_symbol, symbol))

- continue

- symbol.padding = padding

- symbol.size += padding

- assert symbol.size >= 0, 'Symbol has negative size: %r' % symbol

- # Map files have no overlaps, so worth special-casing the no-op case.

- if to_remove:

- logging.info('Removing %d overlapping symbols', len(to_remove))

- symbol_group.symbols = (

- [s for i, s in enumerate(all_symbols) if i not in to_remove])

-def _PrintStats(result, write_func):

- """Prints out how accurate |result| is."""

- for section in symbols.SECTION_TO_SECTION_NAME:

- if section == 'd':

- expected_size = sum(v for k, v in result.section_sizes.iteritems()

- if k.startswith('.data'))

- else:

- expected_size = result.section_sizes[

- symbols.SECTION_TO_SECTION_NAME[section]]

- def one_stat(group):

- template = ('Section %s has %.1f%% of %d bytes accounted for from '

- '%d symbols. %d bytes are unaccounted for. Padding '

- 'accounts for %d bytes\n')

- actual_size = group.size

- count = len(group)

- padding = group.padding

- size_percent = 100.0 * actual_size / expected_size

- return (template % (section, size_percent, actual_size, count,

- expected_size - actual_size, padding))

- in_section = result.symbol_group.WhereInSection(section)

- write_func(one_stat(in_section))

- star_syms = in_section.WhereNameMatches(r'^\*')

- attributed_syms = star_syms.Inverted().WhereHasAnyAttribution()

- anonymous_syms = attributed_syms.Inverted()

- if star_syms or anonymous_syms:

- missing_size = star_syms.size + anonymous_syms.size

- write_func(('+ Without %d merge sections and %d anonymous entries ('

- 'accounting for %d bytes):\n') % (

- len(star_syms), len(anonymous_syms), missing_size))

- write_func('+ ' + one_stat(attributed_syms))

-def _SaveResult(result, file_obj):

- """Saves the result to the given file object."""

- # Store one bucket per line.

- file_obj.write('%d\n' % _SERIALIZATION_VERSION)

- file_obj.write('%r\n' % result.section_sizes)

- file_obj.write('%d\n' % len(result.symbol_group))

- prev_section_name = None

- # Store symbol fields as tab-separated.

- # Store only non-derived fields.

- for symbol in result.symbol_group:

- if symbol.section_name != prev_section_name:

- file_obj.write('%s\n' % symbol.section_name)

- prev_section_name = symbol.section_name

- # Don't write padding nor name since these are derived values.

- file_obj.write('%x\t%x\t%s\t%s\n' % (

- symbol.address, symbol.size_without_padding,

- symbol.function_signature or symbol.name or '',

- symbol.path or ''))

-def _LoadResults(file_obj):

- """Loads a result from the given file."""

- lines = iter(file_obj)

- actual_version = int(next(lines))

- assert actual_version == _SERIALIZATION_VERSION, (

- 'Version mismatch. Need to write some upgrade code.')

- section_sizes = ast.literal_eval(next(lines))

- num_syms = int(next(lines))

- symbol_list = [None] * num_syms

- section_name = None

- for i in xrange(num_syms):

- line = next(lines)[:-1]

- if '\t' not in line:

- section_name = intern(line)

- line = next(lines)[:-1]

- new_sym = symbols.Symbol.__new__(symbols.Symbol)

- parts = line.split('\t')

- new_sym.section_name = section_name

- new_sym.address = int(parts[0], 16)

- new_sym.size = int(parts[1], 16)

- new_sym.name = parts[2] or None

- new_sym.path = parts[3] or None

- new_sym.padding = 0 # Derived

- new_sym.function_signature = None # Derived

- symbol_list[i] = new_sym

- # Recompute derived values (padding and function names).

- result = mapfileparser.ParseResult(symbol_list, section_sizes)

- logging.info('Calculating padding')

- _RemoveDuplicatesAndCalculatePadding(result.symbol_group)

- logging.info('Deriving signatures')

- # Re-parse out function parameters.

- _NormalizeNames(result.symbol_group.WhereInSection('t'))

- return result

-def AddOptions(parser):

- parser.add_argument('input_file',

- help='Path to input file. Can be a linker .map file, an '

- 'unstripped binary, or a saved result from '

- 'analyze.py')

- parser.add_argument('--tool-prefix', default='',

- help='Path prefix for c++filt.')

- parser.add_argument('--output-directory',

- help='Path to the root build directory.')

-def _DetectToolPrefix(tool_prefix, input_file, output_directory=None):

- """Calls Analyze with values from args."""

- if not output_directory:

- abs_path = os.path.abspath(input_file)

- release_idx = abs_path.find('Release')

- if release_idx != -1:

- output_directory = abs_path[:release_idx] + 'Release'

- output_directory = os.path.relpath(abs_path[:release_idx] + '/Release')

- logging.debug('Detected --output-directory=%s', output_directory)

- if not tool_prefix and output_directory:

- # Auto-detect from build_vars.txt

- build_vars_path = os.path.join(output_directory, 'build_vars.txt')

- if os.path.exists(build_vars_path):

- with open(build_vars_path) as f:

- build_vars = dict(l.rstrip().split('=', 1) for l in f if '=' in l)

- logging.debug('Found --tool-prefix from build_vars.txt')

- tool_prefix = build_vars['android_tool_prefix']

- if os.path.sep not in tool_prefix:

- full_path = distutils.spawn.find_executable(tool_prefix + 'c++filt')

- else:

- full_path = tool_prefix + 'c++filt'

- if not os.path.isfile(full_path):

- raise Exception('Bad --tool-prefix. Path not found: %s' % full_path)

- logging.info('Using --tool-prefix=%s', tool_prefix)

- return tool_prefix

-def AnalyzeWithArgs(args):

- return Analyze(args.input_file, args.output_directory, args.tool_prefix)

-def Analyze(path, output_directory=None, tool_prefix=''):

- if _EndsWithMaybeGz(path, '.size'):

- logging.info('Loading cached results.')

- with _OpenMaybeGz(path) as f:

- result = _LoadResults(f)

- elif not _EndsWithMaybeGz(path, '.map'):

- raise Exception('Expected input to be a .map or a .size')

- else:

- # Verify tool_prefix early.

- tool_prefix = _DetectToolPrefix(tool_prefix, path, output_directory)

- with _OpenMaybeGz(path) as map_file:

- result = mapfileparser.MapFileParser().Parse(map_file)

- # Map file for some reason doesn't unmangle all names.

- logging.info('Calculating padding')

- _RemoveDuplicatesAndCalculatePadding(result.symbol_group)

- # Unmangle prints its own log statement.

- _UnmangleRemainingSymbols(result.symbol_group, tool_prefix)

- # Resolve paths prints its own log statement.

- logging.info('Normalizing names')

- _NormalizeNames(result.symbol_group)

- logging.info('Normalizing paths')

- _NormalizeObjectPaths(result.symbol_group)

- if logging.getLogger().isEnabledFor(logging.INFO):

- _PrintStats(result, lambda l: logging.info(l.rstrip()))

- logging.info('Finished analyzing %d symbols', len(result.symbol_group))

- return result

-def main():

- parser = argparse.ArgumentParser()

- parser.add_argument('--output', required=True,

- help='Path to store results. Must end in .size or '

- '.size.gz')

- AddOptions(parser)

- args = helpers.AddCommonOptionsAndParseArgs(parser)

- if not _EndsWithMaybeGz(args.output, '.size'):

- raise Exception('--output must end with .size or .size.gz')

- result = AnalyzeWithArgs(args)

- logging.info('Saving result to %s', args.output)

- with _OpenMaybeGz(args.output, 'wb') as f:

- _SaveResult(result, f)

- logging.info('Done')

-if __name__ == '__main__':

- main()

« no previous file with comments | « tools/binary_size/README.md ('k') | tools/binary_size/binary_size_utils.py » ('j') | no next file with comments »