tools/binary_size/map2size.py - Issue 2813963002: //tools/binary_size: Consolidate most tools into "supersize" command

Unified Diff: tools/binary_size/map2size.py

Issue 2813963002: //tools/binary_size: Consolidate most tools into "supersize" command (Closed)

Patch Set: Fix readme formatting. Make archive's --outoput-file a positional arg Created 3 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: tools/binary_size/map2size.py

diff --git a/tools/binary_size/map2size.py b/tools/binary_size/map2size.py

deleted file mode 100755

index 95310c3199fdc03141f023eeb5b640873a0a54cc..0000000000000000000000000000000000000000

--- a/tools/binary_size/map2size.py

+++ /dev/null

@@ -1,478 +0,0 @@

-#!/usr/bin/env python

-# Use of this source code is governed by a BSD-style license that can be

-# found in the LICENSE file.

-"""Main Python API for analyzing binary size."""

-import argparse

-import calendar

-import collections

-import datetime

-import gzip

-import logging

-import os

-import re

-import subprocess

-import sys

-import describe

-import file_format

-import function_signature

-import helpers

-import linker_map_parser

-import models

-import ninja_parser

-import paths

-def _OpenMaybeGz(path, mode=None):

- """Calls `gzip.open()` if |path| ends in ".gz", otherwise calls `open()`."""

- if path.endswith('.gz'):

- if mode and 'w' in mode:

- return gzip.GzipFile(path, mode, 1)

- return gzip.open(path, mode)

- return open(path, mode or 'r')

-def _UnmangleRemainingSymbols(symbols, tool_prefix):

- """Uses c++filt to unmangle any symbols that need it."""

- to_process = [s for s in symbols if s.name.startswith('_Z')]

- if not to_process:

- return

- logging.info('Unmangling %d names', len(to_process))

- proc = subprocess.Popen([tool_prefix + 'c++filt'], stdin=subprocess.PIPE,

- stdout=subprocess.PIPE)

- stdout = proc.communicate('\n'.join(s.name for s in to_process))[0]

- assert proc.returncode == 0

- for i, line in enumerate(stdout.splitlines()):

- to_process[i].name = line

-def _NormalizeNames(symbols):

- """Ensures that all names are formatted in a useful way.

- This includes:

- - Assigning of |full_name|.

- - Stripping of return types in |full_name| and |name| (for functions).

- - Stripping parameters from |name|.

- - Moving "vtable for" and the like to be suffixes rather than prefixes.

- """

- found_prefixes = set()

- for symbol in symbols:

- if symbol.name.startswith('*'):

- # See comment in _CalculatePadding() about when this

- # can happen.

- continue

- # E.g.: vtable for FOO

- idx = symbol.name.find(' for ', 0, 30)

- if idx != -1:

- found_prefixes.add(symbol.name[:idx + 4])

- symbol.name = symbol.name[idx + 5:] + ' [' + symbol.name[:idx] + ']'

- # E.g.: virtual thunk to FOO

- idx = symbol.name.find(' to ', 0, 30)

- if idx != -1:

- found_prefixes.add(symbol.name[:idx + 3])

- symbol.name = symbol.name[idx + 4:] + ' [' + symbol.name[:idx] + ']'

- # Strip out return type, and identify where parameter list starts.

- if symbol.section == 't':

- symbol.full_name, symbol.name = function_signature.Parse(symbol.name)

- # Remove anonymous namespaces (they just harm clustering).

- non_anonymous = symbol.name.replace('(anonymous namespace)::', '')

- if symbol.name != non_anonymous:

- symbol.is_anonymous = True

- symbol.name = non_anonymous

- symbol.full_name = symbol.full_name.replace(

- '(anonymous namespace)::', '')

- if symbol.section != 't' and '(' in symbol.name:

- # Pretty rare. Example:

- # blink::CSSValueKeywordsHash::findValueImpl(char const*)::value_word_list

- symbol.full_name = symbol.name

- symbol.name = re.sub(r'$.*$', '', symbol.full_name)

- # Don't bother storing both if they are the same.

- if symbol.full_name == symbol.name:

- symbol.full_name = ''

- logging.debug('Found name prefixes of: %r', found_prefixes)

-def _NormalizeObjectPaths(symbols):

- """Ensures that all paths are formatted in a useful way."""

- for symbol in symbols:

- path = symbol.object_path

- if path.startswith('obj/'):

- # Convert obj/third_party/... -> third_party/...

- path = path[4:]

- elif path.startswith('../../'):

- # Convert ../../third_party/... -> third_party/...

- path = path[6:]

- if path.endswith(')'):

- # Convert foo/bar.a(baz.o) -> foo/bar.a/baz.o

- start_idx = path.index('(')

- path = os.path.join(path[:start_idx], path[start_idx + 1:-1])

- symbol.object_path = path

-def _NormalizeSourcePath(path):

- if path.startswith('gen/'):

- # Convert gen/third_party/... -> third_party/...

- return path[4:]

- if path.startswith('../../'):

- # Convert ../../third_party/... -> third_party/...

- return path[6:]

- return path

-def _ExtractSourcePaths(symbols, output_directory):

- """Fills in the .source_path attribute of all symbols.

- Returns True if source paths were found.

- """

- all_found = True

- mapper = ninja_parser.SourceFileMapper(output_directory)

- for symbol in symbols:

- object_path = symbol.object_path

- if symbol.source_path or not object_path:

- continue

- # We don't have source info for prebuilt .a files.

- if not object_path.startswith('..'):

- source_path = mapper.FindSourceForPath(object_path)

- if source_path:

- symbol.source_path = _NormalizeSourcePath(source_path)

- else:

- all_found = False

- logging.warning('Could not find source path for %s', object_path)

- logging.debug('Parsed %d .ninja files.', mapper.GetParsedFileCount())

- return all_found

-def _CalculatePadding(symbols):

- """Populates the |padding| field based on symbol addresses.

- Symbols must already be sorted by |address|.

- """

- seen_sections = []

- for i, symbol in enumerate(symbols[1:]):

- prev_symbol = symbols[i]

- if prev_symbol.section_name != symbol.section_name:

- assert symbol.section_name not in seen_sections, (

- 'Input symbols must be sorted by section, then address.')

- seen_sections.append(symbol.section_name)

- continue

- if symbol.address <= 0 or prev_symbol.address <= 0:

- continue

- # Padding-only symbols happen for ** symbol gaps.

- prev_is_padding_only = prev_symbol.size_without_padding == 0

- if symbol.address == prev_symbol.address and not prev_is_padding_only:

- assert False, 'Found duplicate symbols:\n%r\n%r' % (prev_symbol, symbol)

- # Even with symbols at the same address removed, overlaps can still

- # happen. In this case, padding will be negative (and this is fine).

- padding = symbol.address - prev_symbol.end_address

- # These thresholds were found by manually auditing arm32 Chrome.

- # E.g.: Set them to 0 and see what warnings get logged.

- # TODO(agrieve): See if these thresholds make sense for architectures

- # other than arm32.

- if not symbol.name.startswith('*') and (

- symbol.section in 'rd' and padding >= 256 or

- symbol.section in 't' and padding >= 64):

- # For nm data, this is caused by data that has no associated symbol.

- # The linker map file lists them with no name, but with a file.

- # Example:

- # .data 0x02d42764 0x120 .../V8SharedWorkerGlobalScope.o

- # Where as most look like:

- # .data.MANGLED_NAME...

- logging.debug('Large padding of %d between:\n A) %r\n B) %r' % (

- padding, prev_symbol, symbol))

- continue

- symbol.padding = padding

- symbol.size += padding

- assert symbol.size >= 0, (

- 'Symbol has negative size (likely not sorted propertly): '

- '%r\nprev symbol: %r' % (symbol, prev_symbol))

-def _ClusterSymbols(symbols):

- """Returns a new list of symbols with some symbols moved into groups.

- Groups include:

- * Symbols that have [clone] in their name (created by compiler optimization).

- * Star symbols (such as "** merge strings", and "** symbol gap")

- """

- # http://unix.stackexchange.com/questions/223013/function-symbol-gets-part-suffix-after-compilation

- # Example name suffixes:

- # [clone .part.322]

- # [clone .isra.322]

- # [clone .constprop.1064]

- # Step 1: Create name map, find clones, collect star syms into replacements.

- logging.debug('Creating name -> symbol map')

- clone_indices = []

- indices_by_full_name = {}

- # (name, full_name) -> [(index, sym),...]

- replacements_by_name = collections.defaultdict(list)

- for i, symbol in enumerate(symbols):

- if symbol.name.startswith('**'):

- # "symbol gap 3" -> "symbol gaps"

- name = re.sub(r'\s+\d+$', 's', symbol.name)

- replacements_by_name[(name, None)].append((i, symbol))

- elif symbol.full_name:

- if symbol.full_name.endswith(']') and ' [clone ' in symbol.full_name:

- clone_indices.append(i)

- else:

- indices_by_full_name[symbol.full_name] = i

- # Step 2: Collect same-named clone symbols.

- logging.debug('Grouping all clones')

- group_names_by_index = {}

- for i in clone_indices:

- symbol = symbols[i]

- # Multiple attributes could exist, so search from left-to-right.

- stripped_name = symbol.name[:symbol.name.index(' [clone ')]

- stripped_full_name = symbol.full_name[:symbol.full_name.index(' [clone ')]

- name_tup = (stripped_name, stripped_full_name)

- replacement_list = replacements_by_name[name_tup]

- if not replacement_list:

- # First occurance, check for non-clone symbol.

- non_clone_idx = indices_by_full_name.get(stripped_name)

- if non_clone_idx is not None:

- non_clone_symbol = symbols[non_clone_idx]

- replacement_list.append((non_clone_idx, non_clone_symbol))

- group_names_by_index[non_clone_idx] = stripped_name

- replacement_list.append((i, symbol))

- group_names_by_index[i] = stripped_name

- # Step 3: Undo clustering when length=1.

- # Removing these groups means Diff() logic must know about [clone] suffix.

- to_clear = []

- for name_tup, replacement_list in replacements_by_name.iteritems():

- if len(replacement_list) == 1:

- to_clear.append(name_tup)

- for name_tup in to_clear:

- del replacements_by_name[name_tup]

- # Step 4: Replace first symbol from each cluster with a SymbolGroup.

- before_symbol_count = sum(len(x) for x in replacements_by_name.itervalues())

- logging.debug('Creating %d symbol groups from %d symbols. %d clones had only '

- 'one symbol.', len(replacements_by_name), before_symbol_count,

- len(to_clear))

- len_delta = len(replacements_by_name) - before_symbol_count

- grouped_symbols = [None] * (len(symbols) + len_delta)

- dest_index = 0

- src_index = 0

- seen_names = set()

- replacement_names_by_index = {}

- for name_tup, replacement_list in replacements_by_name.iteritems():

- for tup in replacement_list:

- replacement_names_by_index[tup[0]] = name_tup

- sorted_items = replacement_names_by_index.items()

- sorted_items.sort(key=lambda tup: tup[0])

- for index, name_tup in sorted_items:

- count = index - src_index

- grouped_symbols[dest_index:dest_index + count] = (

- symbols[src_index:src_index + count])

- src_index = index + 1

- dest_index += count

- if name_tup not in seen_names:

- seen_names.add(name_tup)

- group_symbols = [tup[1] for tup in replacements_by_name[name_tup]]

- grouped_symbols[dest_index] = models.SymbolGroup(

- group_symbols, name=name_tup[0], full_name=name_tup[1],

- section_name=group_symbols[0].section_name)

- dest_index += 1

- assert len(grouped_symbols[dest_index:None]) == len(symbols[src_index:None])

- grouped_symbols[dest_index:None] = symbols[src_index:None]

- logging.debug('Finished making groups.')

- return grouped_symbols

-def LoadAndPostProcessSizeInfo(path):

- """Returns a SizeInfo for the given |path|."""

- logging.debug('Loading results from: %s', path)

- size_info = file_format.LoadSizeInfo(path)

- _PostProcessSizeInfo(size_info)

- return size_info

-def _PostProcessSizeInfo(size_info):

- logging.info('Normalizing symbol names')

- _NormalizeNames(size_info.raw_symbols)

- logging.info('Calculating padding')

- _CalculatePadding(size_info.raw_symbols)

- logging.info('Grouping decomposed functions')

- size_info.symbols = models.SymbolGroup(

- _ClusterSymbols(size_info.raw_symbols))

- logging.info('Processed %d symbols', len(size_info.raw_symbols))

-def CreateSizeInfo(map_path, lazy_paths=None, no_source_paths=False,

- raw_only=False):

- """Creates a SizeInfo from the given map file."""

- if not no_source_paths:

- # output_directory needed for source file information.

- lazy_paths.VerifyOutputDirectory()

- # tool_prefix needed for c++filt.

- lazy_paths.VerifyToolPrefix()

- with _OpenMaybeGz(map_path) as map_file:

- section_sizes, raw_symbols = (

- linker_map_parser.MapFileParser().Parse(map_file))

- if not no_source_paths:

- logging.info('Extracting source paths from .ninja files')

- all_found = _ExtractSourcePaths(raw_symbols, lazy_paths.output_directory)

- assert all_found, (

- 'One or more source file paths could not be found. Likely caused by '

- '.ninja files being generated at a different time than the .map file.')

- # Map file for some reason doesn't unmangle all names.

- # Unmangle prints its own log statement.

- _UnmangleRemainingSymbols(raw_symbols, lazy_paths.tool_prefix)

- logging.info('Normalizing object paths')

- _NormalizeObjectPaths(raw_symbols)

- size_info = models.SizeInfo(section_sizes, raw_symbols)

- # Name normalization not strictly required, but makes for smaller files.

- if raw_only:

- logging.info('Normalizing symbol names')

- _NormalizeNames(size_info.raw_symbols)

- else:

- _PostProcessSizeInfo(size_info)

- if logging.getLogger().isEnabledFor(logging.DEBUG):

- for line in describe.DescribeSizeInfoCoverage(size_info):

- logging.info(line)

- logging.info('Recorded info for %d symbols', len(size_info.raw_symbols))

- return size_info

-def _DetectGitRevision(directory):

- try:

- git_rev = subprocess.check_output(

- ['git', '-C', directory, 'rev-parse', 'HEAD'])

- return git_rev.rstrip()

- except Exception:

- logging.warning('Failed to detect git revision for file metadata.')

- return None

-def BuildIdFromElf(elf_path, tool_prefix):

- args = [tool_prefix + 'readelf', '-n', elf_path]

- stdout = subprocess.check_output(args)

- match = re.search(r'Build ID: (\w+)', stdout)

- assert match, 'Build ID not found from running: ' + ' '.join(args)

- return match.group(1)

-def _SectionSizesFromElf(elf_path, tool_prefix):

- args = [tool_prefix + 'readelf', '-S', '--wide', elf_path]

- stdout = subprocess.check_output(args)

- section_sizes = {}

- # Matches [ 2] .hash HASH 00000000006681f0 0001f0 003154 04 A 3 0 8

- for match in re.finditer(r'\[[\s\d]+\] (\..*)$', stdout, re.MULTILINE):

- items = match.group(1).split()

- section_sizes[items[0]] = int(items[4], 16)

- return section_sizes

-def _ParseGnArgs(args_path):

- """Returns a list of normalized "key=value" strings."""

- args = {}

- with open(args_path) as f:

- for l in f:

- # Strips #s even if within string literal. Not a problem in practice.

- parts = l.split('#')[0].split('=')

- if len(parts) != 2:

- continue

- args[parts[0].strip()] = parts[1].strip()

- return ["%s=%s" % x for x in sorted(args.iteritems())]

-def main(argv):

- parser = argparse.ArgumentParser(argv)

- parser.add_argument('--elf-file', required=True,

- help='Path to input ELF file. Currently used for '

- 'capturing metadata. Pass "" to skip metadata '

- 'collection.')

- parser.add_argument('--map-file',

- help='Path to input .map(.gz) file. Defaults to '

- '{{elf_file}}.map(.gz)?')

- parser.add_argument('--output-file', required=True,

- help='Path to output .size file.')

- parser.add_argument('--no-source-paths', action='store_true',

- help='Do not use .ninja files to map '

- 'object_path -> source_path')

- paths.AddOptions(parser)

- args = helpers.AddCommonOptionsAndParseArgs(parser, argv)

- if not args.output_file.endswith('.size'):

- parser.error('output_file must end with .size')

- if args.map_file:

- if (not args.map_file.endswith('.map')

- and not args.map_file.endswith('.map.gz')):

- parser.error('Expected --map-file to end with .map or .map.gz')

- map_file_path = args.map_file

- else:

- map_file_path = args.elf_file + '.map'

- if not os.path.exists(map_file_path):

- map_file_path += '.gz'

- if not os.path.exists(map_file_path):

- parser.error('Could not find .map(.gz)? file. Use --map-file.')

- lazy_paths = paths.LazyPaths(args=args, input_file=args.elf_file)

- metadata = None

- if args.elf_file:

- logging.debug('Constructing metadata')

- git_rev = _DetectGitRevision(os.path.dirname(args.elf_file))

- build_id = BuildIdFromElf(args.elf_file, lazy_paths.tool_prefix)

- timestamp_obj = datetime.datetime.utcfromtimestamp(os.path.getmtime(

- args.elf_file))

- timestamp = calendar.timegm(timestamp_obj.timetuple())

- gn_args = _ParseGnArgs(os.path.join(lazy_paths.output_directory, 'args.gn'))

- def relative_to_out(path):

- return os.path.relpath(path, lazy_paths.VerifyOutputDirectory())

- metadata = {

- models.METADATA_GIT_REVISION: git_rev,

- models.METADATA_MAP_FILENAME: relative_to_out(map_file_path),

- models.METADATA_ELF_FILENAME: relative_to_out(args.elf_file),

- models.METADATA_ELF_MTIME: timestamp,

- models.METADATA_ELF_BUILD_ID: build_id,

- models.METADATA_GN_ARGS: gn_args,

- }

- size_info = CreateSizeInfo(map_file_path, lazy_paths,

- no_source_paths=args.no_source_paths,

- raw_only=True)

- if metadata:

- size_info.metadata = metadata

- logging.debug('Validating section sizes')

- elf_section_sizes = _SectionSizesFromElf(args.elf_file,

- lazy_paths.tool_prefix)

- for k, v in elf_section_sizes.iteritems():

- assert v == size_info.section_sizes.get(k), (

- 'ELF file and .map file do not match.')

- logging.info('Recording metadata: \n %s',

- '\n '.join(describe.DescribeMetadata(size_info.metadata)))

- logging.info('Saving result to %s', args.output_file)

- file_format.SaveSizeInfo(size_info, args.output_file)

- logging.info('Done')

-if __name__ == '__main__':

- sys.exit(main(sys.argv))

« no previous file with comments | « tools/binary_size/linker_map_parser.py ('k') | tools/binary_size/match_util.py » ('j') | no next file with comments »