| Index: tools/binary_size/libsupersize/archive.py
|
| diff --git a/tools/binary_size/libsupersize/archive.py b/tools/binary_size/libsupersize/archive.py
|
| index 13441d070c3190038cde06df0a087843acc67a2f..68b1cf5fbfe447067f51aa70aebdda54c05f0893 100644
|
| --- a/tools/binary_size/libsupersize/archive.py
|
| +++ b/tools/binary_size/libsupersize/archive.py
|
| @@ -18,13 +18,14 @@ import sys
|
| import tempfile
|
| import zipfile
|
|
|
| +import concurrent
|
| import describe
|
| import file_format
|
| import function_signature
|
| -import helpers
|
| import linker_map_parser
|
| import models
|
| import ninja_parser
|
| +import nm
|
| import paths
|
|
|
|
|
| @@ -127,21 +128,18 @@ def _NormalizeNames(symbols):
|
| logging.debug('Found name prefixes of: %r', found_prefixes)
|
|
|
|
|
| -def _NormalizeObjectPaths(symbols):
|
| - """Ensures that all paths are formatted in a useful way."""
|
| - for symbol in symbols:
|
| - path = symbol.object_path
|
| - if path.startswith('obj/'):
|
| - # Convert obj/third_party/... -> third_party/...
|
| - path = path[4:]
|
| - elif path.startswith('../../'):
|
| - # Convert ../../third_party/... -> third_party/...
|
| - path = path[6:]
|
| - if path.endswith(')'):
|
| - # Convert foo/bar.a(baz.o) -> foo/bar.a/baz.o
|
| - start_idx = path.index('(')
|
| - path = os.path.join(path[:start_idx], path[start_idx + 1:-1])
|
| - symbol.object_path = path
|
| +def _NormalizeObjectPath(path):
|
| + if path.startswith('obj/'):
|
| + # Convert obj/third_party/... -> third_party/...
|
| + path = path[4:]
|
| + elif path.startswith('../../'):
|
| + # Convert ../../third_party/... -> third_party/...
|
| + path = path[6:]
|
| + if path.endswith(')'):
|
| + # Convert foo/bar.a(baz.o) -> foo/bar.a/baz.o
|
| + start_idx = path.index('(')
|
| + path = os.path.join(path[:start_idx], path[start_idx + 1:-1])
|
| + return path
|
|
|
|
|
| def _NormalizeSourcePath(path):
|
| @@ -154,19 +152,111 @@ def _NormalizeSourcePath(path):
|
| return path
|
|
|
|
|
| +def _SourcePathForObjectPath(object_path, source_mapper):
|
| + # We don't have source info for prebuilt .a files.
|
| + if not os.path.isabs(object_path) and not object_path.startswith('..'):
|
| + source_path = source_mapper.FindSourceForPath(object_path)
|
| + if source_path:
|
| + return _NormalizeSourcePath(source_path)
|
| + return ''
|
| +
|
| +
|
| def _ExtractSourcePaths(symbols, source_mapper):
|
| - """Fills in the .source_path attribute of all symbols."""
|
| + """Fills in the |source_path| attribute."""
|
| logging.debug('Parsed %d .ninja files.', source_mapper.parsed_file_count)
|
| -
|
| for symbol in symbols:
|
| object_path = symbol.object_path
|
| - if symbol.source_path or not object_path:
|
| + if object_path and not symbol.source_path:
|
| + symbol.source_path = _SourcePathForObjectPath(object_path, source_mapper)
|
| +
|
| +
|
| +def _ComputeAnscestorPath(path_list):
|
| + """Returns the common anscestor of the given paths."""
|
| + # Ignore missing paths.
|
| + path_list = [p for p in path_list if p]
|
| + prefix = os.path.commonprefix(path_list)
|
| + # Put the path count as a subdirectory to allow for better grouping when
|
| + # path-based breakdowns.
|
| + if not prefix:
|
| + if len(path_list) < 2:
|
| + return ''
|
| + return os.path.join('{shared}', str(len(path_list)))
|
| + if prefix == path_list[0]:
|
| + return prefix
|
| + assert len(path_list) > 1, 'path_list: ' + repr(path_list)
|
| + return os.path.join(os.path.dirname(prefix), '{shared}', str(len(path_list)))
|
| +
|
| +
|
| +# This must normalize object paths at the same time because normalization
|
| +# needs to occur before finding common ancestor.
|
| +def _ComputeAnscestorPathsAndNormalizeObjectPaths(
|
| + symbols, object_paths_by_name, source_mapper):
|
| + num_found_paths = 0
|
| + num_unknown_names = 0
|
| + num_path_mismatches = 0
|
| + num_unmatched_aliases = 0
|
| + for symbol in symbols:
|
| + name = symbol.name
|
| + if (symbol.IsBss() or
|
| + not name or
|
| + name[0] in '*.' or # e.g. ** merge symbols, .Lswitch.table
|
| + name == 'startup'):
|
| + symbol.object_path = _NormalizeObjectPath(symbol.object_path)
|
| continue
|
| - # We don't have source info for prebuilt .a files.
|
| - if not os.path.isabs(object_path) and not object_path.startswith('..'):
|
| - source_path = source_mapper.FindSourceForPath(object_path)
|
| - if source_path:
|
| - symbol.source_path = _NormalizeSourcePath(source_path)
|
| +
|
| + object_paths = object_paths_by_name.get(name)
|
| + if object_paths:
|
| + num_found_paths += 1
|
| + else:
|
| + if not symbol.object_path and symbol.aliases:
|
| + # Happens when aliases are from object files where all symbols were
|
| + # pruned or de-duped as aliases. Since we are only scanning .o files
|
| + # referenced by included symbols, such files are missed.
|
| + # TODO(agrieve): This could be fixed by retrieving linker inputs from
|
| + # build.ninja, or by looking for paths within the .map file's
|
| + # discarded sections.
|
| + num_unmatched_aliases += 1
|
| + continue
|
| + if num_unknown_names < 10:
|
| + logging.warning('Symbol not found in any .o files: %r', symbol)
|
| + num_unknown_names += 1
|
| + symbol.object_path = _NormalizeObjectPath(symbol.object_path)
|
| + continue
|
| +
|
| + if symbol.object_path and symbol.object_path not in object_paths:
|
| + if num_path_mismatches < 10:
|
| + logging.warning('Symbol path reported by .map not found by nm.')
|
| + logging.warning('sym=%r', symbol)
|
| + logging.warning('paths=%r', object_paths)
|
| + num_path_mismatches += 1
|
| +
|
| + if source_mapper:
|
| + source_paths = [
|
| + _SourcePathForObjectPath(p, source_mapper) for p in object_paths]
|
| + symbol.source_path = _ComputeAnscestorPath(source_paths)
|
| +
|
| + object_paths = [_NormalizeObjectPath(p) for p in object_paths]
|
| + symbol.object_path = _ComputeAnscestorPath(object_paths)
|
| +
|
| + logging.debug('Cross-referenced %d symbols with nm output. '
|
| + 'num_unknown_names=%d num_path_mismatches=%d '
|
| + 'num_unused_aliases=%d', num_found_paths, num_unknown_names,
|
| + num_path_mismatches, num_unmatched_aliases)
|
| +
|
| +
|
| +def _DiscoverMissedObjectPaths(symbols, elf_object_paths):
|
| + # Missing object paths are caused by .a files added by -l flags, which are not
|
| + # listed as explicit inputs within .ninja rules.
|
| + parsed_inputs = set(elf_object_paths)
|
| + missed_inputs = set()
|
| + for symbol in symbols:
|
| + path = symbol.object_path
|
| + if path.endswith(')'):
|
| + # Convert foo/bar.a(baz.o) -> foo/bar.a
|
| + path = path[:path.index('(')]
|
| + if path and path not in parsed_inputs:
|
| + missed_inputs.add(path)
|
| + return missed_inputs
|
|
|
|
|
| def _CalculatePadding(symbols):
|
| @@ -184,29 +274,27 @@ def _CalculatePadding(symbols):
|
| continue
|
| if symbol.address <= 0 or prev_symbol.address <= 0:
|
| continue
|
| - # Padding-only symbols happen for ** symbol gaps.
|
| - prev_is_padding_only = prev_symbol.size_without_padding == 0
|
| - if symbol.address == prev_symbol.address and not prev_is_padding_only:
|
| - assert False, 'Found duplicate symbols:\n%r\n%r' % (prev_symbol, symbol)
|
| - # Even with symbols at the same address removed, overlaps can still
|
| - # happen. In this case, padding will be negative (and this is fine).
|
| +
|
| + if symbol.address == prev_symbol.address:
|
| + if symbol.aliases and symbol.aliases is prev_symbol.aliases:
|
| + symbol.padding = prev_symbol.padding
|
| + symbol.size = prev_symbol.size
|
| + continue
|
| + # Padding-only symbols happen for ** symbol gaps.
|
| + assert prev_symbol.size_without_padding == 0, (
|
| + 'Found duplicate symbols:\n%r\n%r' % (prev_symbol, symbol))
|
| +
|
| padding = symbol.address - prev_symbol.end_address
|
| - # These thresholds were found by manually auditing arm32 Chrome.
|
| - # E.g.: Set them to 0 and see what warnings get logged.
|
| + # These thresholds were found by experimenting with arm32 Chrome.
|
| + # E.g.: Set them to 0 and see what warnings get logged, then take max value.
|
| # TODO(agrieve): See if these thresholds make sense for architectures
|
| # other than arm32.
|
| if not symbol.name.startswith('*') and (
|
| symbol.section in 'rd' and padding >= 256 or
|
| symbol.section in 't' and padding >= 64):
|
| - # For nm data, this is caused by data that has no associated symbol.
|
| - # The linker map file lists them with no name, but with a file.
|
| - # Example:
|
| - # .data 0x02d42764 0x120 .../V8SharedWorkerGlobalScope.o
|
| - # Where as most look like:
|
| - # .data.MANGLED_NAME...
|
| - logging.debug('Large padding of %d between:\n A) %r\n B) %r' % (
|
| - padding, prev_symbol, symbol))
|
| - continue
|
| + # Should not happen.
|
| + logging.warning('Large padding of %d between:\n A) %r\n B) %r' % (
|
| + padding, prev_symbol, symbol))
|
| symbol.padding = padding
|
| symbol.size += padding
|
| assert symbol.size >= 0, (
|
| @@ -317,6 +405,51 @@ def _ClusterSymbols(symbols):
|
| return grouped_symbols
|
|
|
|
|
| +def _AddSymbolAliases(symbols, aliases_by_address):
|
| + # Step 1: Create list of (index_of_symbol, name_list).
|
| + logging.debug('Creating alias list')
|
| + replacements = []
|
| + num_new_symbols = 0
|
| + for i, s in enumerate(symbols):
|
| + # Don't alias padding-only symbols (e.g. ** symbol gap)
|
| + if s.size_without_padding == 0:
|
| + continue
|
| + name_list = aliases_by_address.get(s.address)
|
| + if name_list:
|
| + if s.name not in name_list:
|
| + logging.warning('Name missing from aliases: %s %s', s.name, name_list)
|
| + continue
|
| + replacements.append((i, name_list))
|
| + num_new_symbols += len(name_list) - 1
|
| +
|
| + # Step 2: Create new symbols as siblings to each existing one.
|
| + logging.debug('Creating %d aliases', num_new_symbols)
|
| + src_cursor_end = len(symbols)
|
| + symbols += [None] * num_new_symbols
|
| + dst_cursor_end = len(symbols)
|
| + for src_index, name_list in reversed(replacements):
|
| + # Copy over symbols that come after the current one.
|
| + chunk_size = src_cursor_end - src_index - 1
|
| + dst_cursor_end -= chunk_size
|
| + src_cursor_end -= chunk_size
|
| + symbols[dst_cursor_end:dst_cursor_end + chunk_size] = (
|
| + symbols[src_cursor_end:src_cursor_end + chunk_size])
|
| + sym = symbols[src_index]
|
| + src_cursor_end -= 1
|
| +
|
| + # Create aliases (does not bother reusing the existing symbol).
|
| + aliases = [None] * len(name_list)
|
| + for i, name in enumerate(name_list):
|
| + aliases[i] = models.Symbol(
|
| + sym.section_name, sym.size, address=sym.address, name=name,
|
| + aliases=aliases)
|
| +
|
| + dst_cursor_end -= len(aliases)
|
| + symbols[dst_cursor_end:dst_cursor_end + len(aliases)] = aliases
|
| +
|
| + assert dst_cursor_end == src_cursor_end
|
| +
|
| +
|
| def LoadAndPostProcessSizeInfo(path):
|
| """Returns a SizeInfo for the given |path|."""
|
| logging.debug('Loading results from: %s', path)
|
| @@ -336,24 +469,102 @@ def _PostProcessSizeInfo(size_info):
|
| logging.info('Processed %d symbols', len(size_info.raw_symbols))
|
|
|
|
|
| -def CreateSizeInfo(map_path, lazy_paths=None, no_source_paths=False,
|
| +def CreateMetadata(map_path, elf_path, apk_path, tool_prefix, output_directory):
|
| + metadata = None
|
| + if elf_path:
|
| + logging.debug('Constructing metadata')
|
| + git_rev = _DetectGitRevision(os.path.dirname(elf_path))
|
| + architecture = _ArchFromElf(elf_path, tool_prefix)
|
| + build_id = BuildIdFromElf(elf_path, tool_prefix)
|
| + timestamp_obj = datetime.datetime.utcfromtimestamp(os.path.getmtime(
|
| + elf_path))
|
| + timestamp = calendar.timegm(timestamp_obj.timetuple())
|
| +
|
| + metadata = {
|
| + models.METADATA_GIT_REVISION: git_rev,
|
| + models.METADATA_ELF_ARCHITECTURE: architecture,
|
| + models.METADATA_ELF_MTIME: timestamp,
|
| + models.METADATA_ELF_BUILD_ID: build_id,
|
| + }
|
| +
|
| + if output_directory:
|
| + relative_to_out = lambda path: os.path.relpath(path, output_directory)
|
| + gn_args = _ParseGnArgs(os.path.join(output_directory, 'args.gn'))
|
| + metadata[models.METADATA_MAP_FILENAME] = relative_to_out(map_path)
|
| + metadata[models.METADATA_ELF_FILENAME] = relative_to_out(elf_path)
|
| + metadata[models.METADATA_GN_ARGS] = gn_args
|
| +
|
| + if apk_path:
|
| + metadata[models.METADATA_APK_FILENAME] = relative_to_out(apk_path)
|
| + return metadata
|
| +
|
| +
|
| +def CreateSizeInfo(map_path, elf_path, tool_prefix, output_directory,
|
| raw_only=False):
|
| - """Creates a SizeInfo from the given map file."""
|
| - # tool_prefix needed for c++filt.
|
| - lazy_paths.VerifyToolPrefix()
|
| + """Creates a SizeInfo.
|
| +
|
| + Args:
|
| + map_path: Path to the linker .map(.gz) file to parse.
|
| + elf_path: Path to the corresponding unstripped ELF file. Used to find symbol
|
| + aliases and inlined functions. Can be None.
|
| + tool_prefix: Prefix for c++filt & nm (required).
|
| + output_directory: Build output directory. If None, source_paths and symbol
|
| + alias information will not be recorded.
|
| + raw_only: Fill in just the information required for creating a .size file.
|
| + """
|
| + source_mapper = None
|
| + if output_directory:
|
| + # Start by finding the elf_object_paths, so that nm can run on them while
|
| + # the linker .map is being parsed.
|
| + logging.info('Parsing ninja files.')
|
| + source_mapper, elf_object_paths = ninja_parser.Parse(
|
| + output_directory, elf_path)
|
| + assert not elf_path or elf_object_paths, (
|
| + 'Failed to find link command in ninja files for ' +
|
| + os.path.relpath(elf_path, output_directory))
|
|
|
| - if not no_source_paths:
|
| - # Parse .ninja files at the same time as parsing the .map file.
|
| - source_mapper_result = helpers.ForkAndCall(
|
| - ninja_parser.Parse, lazy_paths.VerifyOutputDirectory())
|
| + if elf_path:
|
| + # Run nm on the elf file to retrieve the list of symbol names per-address.
|
| + # This list is required because the .map file contains only a single name
|
| + # for each address, yet multiple symbols are often coalesced when they are
|
| + # identical. This coalescing happens mainly for small symbols and for C++
|
| + # templates. Such symbols make up ~500kb of libchrome.so on Android.
|
| + elf_nm_result = nm.CollectAliasesByAddressAsync(elf_path, tool_prefix)
|
| +
|
| + # Run nm on all .o/.a files to retrieve the symbol names within them.
|
| + # The list is used to detect when mutiple .o files contain the same symbol
|
| + # (e.g. inline functions), and to update the object_path / source_path
|
| + # fields accordingly.
|
| + # Looking in object files is required because the .map file choses a
|
| + # single path for these symbols.
|
| + # Rather than record all paths for each symbol, set the paths to be the
|
| + # common ancestor of all paths.
|
| + if output_directory:
|
| + bulk_analyzer = nm.BulkObjectFileAnalyzer(tool_prefix, output_directory)
|
| + bulk_analyzer.AnalyzePaths(elf_object_paths)
|
|
|
| with _OpenMaybeGz(map_path) as map_file:
|
| section_sizes, raw_symbols = (
|
| linker_map_parser.MapFileParser().Parse(map_file))
|
|
|
| - if not no_source_paths:
|
| - logging.info('Extracting source paths from .ninja files')
|
| - source_mapper = source_mapper_result.get()
|
| + if elf_path:
|
| + logging.debug('Validating section sizes')
|
| + elf_section_sizes = _SectionSizesFromElf(elf_path, tool_prefix)
|
| + for k, v in elf_section_sizes.iteritems():
|
| + if v != section_sizes.get(k):
|
| + logging.error('ELF file and .map file do not agree on section sizes.')
|
| + logging.error('.map file: %r', section_sizes)
|
| + logging.error('readelf: %r', elf_section_sizes)
|
| + sys.exit(1)
|
| +
|
| + if elf_path and output_directory:
|
| + missed_object_paths = _DiscoverMissedObjectPaths(
|
| + raw_symbols, elf_object_paths)
|
| + bulk_analyzer.AnalyzePaths(missed_object_paths)
|
| + bulk_analyzer.Close()
|
| +
|
| + if source_mapper:
|
| + logging.info('Looking up source paths from ninja files')
|
| _ExtractSourcePaths(raw_symbols, source_mapper)
|
| assert source_mapper.unmatched_paths_count == 0, (
|
| 'One or more source file paths could not be found. Likely caused by '
|
| @@ -363,9 +574,31 @@ def CreateSizeInfo(map_path, lazy_paths=None, no_source_paths=False,
|
| _StripLinkerAddedSymbolPrefixes(raw_symbols)
|
| # Map file for some reason doesn't unmangle all names.
|
| # Unmangle prints its own log statement.
|
| - _UnmangleRemainingSymbols(raw_symbols, lazy_paths.tool_prefix)
|
| - logging.info('Normalizing object paths')
|
| - _NormalizeObjectPaths(raw_symbols)
|
| + _UnmangleRemainingSymbols(raw_symbols, tool_prefix)
|
| +
|
| + if elf_path:
|
| + logging.info('Adding aliased symbols, as reported by nm')
|
| + # This normally does not block (it's finished by this time).
|
| + aliases_by_address = elf_nm_result.get()
|
| + _AddSymbolAliases(raw_symbols, aliases_by_address)
|
| +
|
| + if output_directory:
|
| + # For aliases, this provides path information where there wasn't any.
|
| + logging.info('Computing ancestor paths for inline functions and '
|
| + 'normalizing object paths')
|
| +
|
| + object_paths_by_name = bulk_analyzer.Get()
|
| + logging.debug('Fetched path information for %d symbols from %d files',
|
| + len(object_paths_by_name),
|
| + len(elf_object_paths) + len(missed_object_paths))
|
| + _ComputeAnscestorPathsAndNormalizeObjectPaths(
|
| + raw_symbols, object_paths_by_name, source_mapper)
|
| +
|
| + if not elf_path or not output_directory:
|
| + logging.info('Normalizing object paths.')
|
| + for symbol in raw_symbols:
|
| + symbol.object_path = _NormalizeObjectPath(symbol.object_path)
|
| +
|
| size_info = models.SizeInfo(section_sizes, raw_symbols)
|
|
|
| # Name normalization not strictly required, but makes for smaller files.
|
| @@ -488,7 +721,7 @@ def Run(args, parser):
|
| # secondary architectures.
|
| apk_so_path = max(lib_infos, key=lambda x:x.file_size).filename
|
| logging.debug('Sub-apk path=%s', apk_so_path)
|
| - if not elf_path:
|
| + if not elf_path and lazy_paths.output_directory:
|
| elf_path = os.path.join(
|
| lazy_paths.output_directory, 'lib.unstripped',
|
| os.path.basename(apk_so_path.replace('crazy.', '')))
|
| @@ -504,56 +737,34 @@ def Run(args, parser):
|
| if not os.path.exists(map_path):
|
| parser.error('Could not find .map(.gz)? file. Use --map-file.')
|
|
|
| - metadata = None
|
| - if elf_path:
|
| - logging.debug('Constructing metadata')
|
| - git_rev = _DetectGitRevision(os.path.dirname(elf_path))
|
| - architecture = _ArchFromElf(elf_path, lazy_paths.tool_prefix)
|
| - build_id = BuildIdFromElf(elf_path, lazy_paths.tool_prefix)
|
| - timestamp_obj = datetime.datetime.utcfromtimestamp(os.path.getmtime(
|
| - elf_path))
|
| - timestamp = calendar.timegm(timestamp_obj.timetuple())
|
| - gn_args = _ParseGnArgs(os.path.join(lazy_paths.output_directory, 'args.gn'))
|
| -
|
| - def relative_to_out(path):
|
| - return os.path.relpath(path, lazy_paths.VerifyOutputDirectory())
|
| -
|
| - metadata = {
|
| - models.METADATA_GIT_REVISION: git_rev,
|
| - models.METADATA_MAP_FILENAME: relative_to_out(map_path),
|
| - models.METADATA_ELF_ARCHITECTURE: architecture,
|
| - models.METADATA_ELF_FILENAME: relative_to_out(elf_path),
|
| - models.METADATA_ELF_MTIME: timestamp,
|
| - models.METADATA_ELF_BUILD_ID: build_id,
|
| - models.METADATA_GN_ARGS: gn_args,
|
| - }
|
| + tool_prefix = lazy_paths.VerifyToolPrefix()
|
| + output_directory = None
|
| + if not args.no_source_paths:
|
| + output_directory = lazy_paths.VerifyOutputDirectory()
|
|
|
| - if apk_path:
|
| - metadata[models.METADATA_APK_FILENAME] = relative_to_out(apk_path)
|
| - # Extraction takes around 1 second, so do it in parallel.
|
| - apk_elf_result = helpers.ForkAndCall(
|
| - _ElfInfoFromApk, apk_path, apk_so_path, lazy_paths.tool_prefix)
|
| + metadata = CreateMetadata(map_path, elf_path, apk_path, tool_prefix,
|
| + output_directory)
|
| + if apk_path and elf_path:
|
| + # Extraction takes around 1 second, so do it in parallel.
|
| + apk_elf_result = concurrent.ForkAndCall(
|
| + _ElfInfoFromApk, (apk_path, apk_so_path, tool_prefix))
|
|
|
| size_info = CreateSizeInfo(
|
| - map_path, lazy_paths, no_source_paths=args.no_source_paths, raw_only=True)
|
| + map_path, elf_path, tool_prefix, output_directory, raw_only=True)
|
|
|
| if metadata:
|
| size_info.metadata = metadata
|
| - logging.debug('Validating section sizes')
|
| - elf_section_sizes = _SectionSizesFromElf(elf_path, lazy_paths.tool_prefix)
|
| - for k, v in elf_section_sizes.iteritems():
|
| - assert v == size_info.section_sizes.get(k), (
|
| - 'ELF file and .map file do not match.')
|
|
|
| if apk_path:
|
| logging.debug('Extracting section sizes from .so within .apk')
|
| unstripped_section_sizes = size_info.section_sizes
|
| apk_build_id, size_info.section_sizes = apk_elf_result.get()
|
| - assert apk_build_id == build_id, (
|
| + assert apk_build_id == metadata[models.METADATA_ELF_BUILD_ID], (
|
| 'BuildID for %s within %s did not match the one at %s' %
|
| (apk_so_path, apk_path, elf_path))
|
|
|
| packed_section_name = None
|
| + architecture = metadata[models.METADATA_ELF_ARCHITECTURE]
|
| if architecture == 'ARM':
|
| packed_section_name = '.rel.dyn'
|
| elif architecture == 'AArch64':
|
|
|