Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(13)

Unified Diff: tools/binary_size/map2size.py

Issue 2809043003: //tools/binary_size: Group [clone] and ** symbols (Closed)
Patch Set: review comments Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « tools/binary_size/integration_test.py ('k') | tools/binary_size/models.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/binary_size/map2size.py
diff --git a/tools/binary_size/map2size.py b/tools/binary_size/map2size.py
index 205baffd35f3184cc9bae16ef062eba863b5afe2..95310c3199fdc03141f023eeb5b640873a0a54cc 100755
--- a/tools/binary_size/map2size.py
+++ b/tools/binary_size/map2size.py
@@ -7,6 +7,7 @@
import argparse
import calendar
+import collections
import datetime
import gzip
import logging
@@ -34,9 +35,9 @@ def _OpenMaybeGz(path, mode=None):
return open(path, mode or 'r')
-def _UnmangleRemainingSymbols(symbol_group, tool_prefix):
+def _UnmangleRemainingSymbols(symbols, tool_prefix):
"""Uses c++filt to unmangle any symbols that need it."""
- to_process = [s for s in symbol_group if s.name.startswith('_Z')]
+ to_process = [s for s in symbols if s.name.startswith('_Z')]
if not to_process:
return
@@ -50,7 +51,7 @@ def _UnmangleRemainingSymbols(symbol_group, tool_prefix):
to_process[i].name = line
-def _NormalizeNames(symbol_group):
+def _NormalizeNames(symbols):
"""Ensures that all names are formatted in a useful way.
This includes:
@@ -60,9 +61,9 @@ def _NormalizeNames(symbol_group):
- Moving "vtable for" and the like to be suffixes rather than prefixes.
"""
found_prefixes = set()
- for symbol in symbol_group:
+ for symbol in symbols:
if symbol.name.startswith('*'):
- # See comment in _RemoveDuplicatesAndCalculatePadding() about when this
+ # See comment in _CalculatePadding() about when this
# can happen.
continue
@@ -103,9 +104,9 @@ def _NormalizeNames(symbol_group):
logging.debug('Found name prefixes of: %r', found_prefixes)
-def _NormalizeObjectPaths(symbol_group):
+def _NormalizeObjectPaths(symbols):
"""Ensures that all paths are formatted in a useful way."""
- for symbol in symbol_group:
+ for symbol in symbols:
path = symbol.object_path
if path.startswith('obj/'):
# Convert obj/third_party/... -> third_party/...
@@ -130,7 +131,7 @@ def _NormalizeSourcePath(path):
return path
-def _ExtractSourcePaths(symbol_group, output_directory):
+def _ExtractSourcePaths(symbols, output_directory):
"""Fills in the .source_path attribute of all symbols.
Returns True if source paths were found.
@@ -138,7 +139,7 @@ def _ExtractSourcePaths(symbol_group, output_directory):
all_found = True
mapper = ninja_parser.SourceFileMapper(output_directory)
- for symbol in symbol_group:
+ for symbol in symbols:
object_path = symbol.object_path
if symbol.source_path or not object_path:
continue
@@ -154,15 +155,14 @@ def _ExtractSourcePaths(symbol_group, output_directory):
return all_found
-def _RemoveDuplicatesAndCalculatePadding(symbol_group):
- """Removes symbols at the same address and calculates the |padding| field.
+def _CalculatePadding(symbols):
+ """Populates the |padding| field based on symbol addresses.
Symbols must already be sorted by |address|.
"""
- to_remove = []
seen_sections = []
- for i, symbol in enumerate(symbol_group[1:]):
- prev_symbol = symbol_group[i]
+ for i, symbol in enumerate(symbols[1:]):
+ prev_symbol = symbols[i]
if prev_symbol.section_name != symbol.section_name:
assert symbol.section_name not in seen_sections, (
'Input symbols must be sorted by section, then address.')
@@ -170,12 +170,10 @@ def _RemoveDuplicatesAndCalculatePadding(symbol_group):
continue
if symbol.address <= 0 or prev_symbol.address <= 0:
continue
- # Fold symbols that are at the same address (happens in nm output).
+ # Padding-only symbols happen for ** symbol gaps.
prev_is_padding_only = prev_symbol.size_without_padding == 0
if symbol.address == prev_symbol.address and not prev_is_padding_only:
- symbol.size = max(prev_symbol.size, symbol.size)
- to_remove.add(symbol)
- continue
+ assert False, 'Found duplicate symbols:\n%r\n%r' % (prev_symbol, symbol)
# Even with symbols at the same address removed, overlaps can still
# happen. In this case, padding will be negative (and this is fine).
padding = symbol.address - prev_symbol.end_address
@@ -200,62 +198,163 @@ def _RemoveDuplicatesAndCalculatePadding(symbol_group):
assert symbol.size >= 0, (
'Symbol has negative size (likely not sorted propertly): '
'%r\nprev symbol: %r' % (symbol, prev_symbol))
- # Map files have no overlaps, so worth special-casing the no-op case.
- if to_remove:
- logging.info('Removing %d overlapping symbols', len(to_remove))
- symbol_group -= models.SymbolGroup(to_remove)
-def Analyze(path, lazy_paths=None):
- """Returns a SizeInfo for the given |path|.
+def _ClusterSymbols(symbols):
+ """Returns a new list of symbols with some symbols moved into groups.
- Args:
- path: Can be a .size file, or a .map(.gz). If the latter, then lazy_paths
- must be provided as well.
+ Groups include:
+ * Symbols that have [clone] in their name (created by compiler optimization).
+ * Star symbols (such as "** merge strings", and "** symbol gap")
"""
- if path.endswith('.size'):
- logging.debug('Loading results from: %s', path)
- size_info = file_format.LoadSizeInfo(path)
- # Recompute derived values (padding and function names).
- logging.info('Calculating padding')
- _RemoveDuplicatesAndCalculatePadding(size_info.symbols)
- logging.info('Deriving signatures')
- # Re-parse out function parameters.
- _NormalizeNames(size_info.symbols)
- return size_info
- elif not path.endswith('.map') and not path.endswith('.map.gz'):
- raise Exception('Expected input to be a .map or a .size')
- else:
+ # http://unix.stackexchange.com/questions/223013/function-symbol-gets-part-suffix-after-compilation
+ # Example name suffixes:
+ # [clone .part.322]
+ # [clone .isra.322]
+ # [clone .constprop.1064]
+
+ # Step 1: Create name map, find clones, collect star syms into replacements.
+ logging.debug('Creating name -> symbol map')
+ clone_indices = []
+ indices_by_full_name = {}
+ # (name, full_name) -> [(index, sym),...]
+ replacements_by_name = collections.defaultdict(list)
+ for i, symbol in enumerate(symbols):
+ if symbol.name.startswith('**'):
+ # "symbol gap 3" -> "symbol gaps"
+ name = re.sub(r'\s+\d+$', 's', symbol.name)
+ replacements_by_name[(name, None)].append((i, symbol))
+ elif symbol.full_name:
+ if symbol.full_name.endswith(']') and ' [clone ' in symbol.full_name:
+ clone_indices.append(i)
+ else:
+ indices_by_full_name[symbol.full_name] = i
+
+ # Step 2: Collect same-named clone symbols.
+ logging.debug('Grouping all clones')
+ group_names_by_index = {}
+ for i in clone_indices:
+ symbol = symbols[i]
+ # Multiple attributes could exist, so search from left-to-right.
+ stripped_name = symbol.name[:symbol.name.index(' [clone ')]
+ stripped_full_name = symbol.full_name[:symbol.full_name.index(' [clone ')]
+ name_tup = (stripped_name, stripped_full_name)
+ replacement_list = replacements_by_name[name_tup]
+
+ if not replacement_list:
+ # First occurance, check for non-clone symbol.
+ non_clone_idx = indices_by_full_name.get(stripped_name)
+ if non_clone_idx is not None:
+ non_clone_symbol = symbols[non_clone_idx]
+ replacement_list.append((non_clone_idx, non_clone_symbol))
+ group_names_by_index[non_clone_idx] = stripped_name
+
+ replacement_list.append((i, symbol))
+ group_names_by_index[i] = stripped_name
+
+ # Step 3: Undo clustering when length=1.
+ # Removing these groups means Diff() logic must know about [clone] suffix.
+ to_clear = []
+ for name_tup, replacement_list in replacements_by_name.iteritems():
+ if len(replacement_list) == 1:
+ to_clear.append(name_tup)
+ for name_tup in to_clear:
+ del replacements_by_name[name_tup]
+
+ # Step 4: Replace first symbol from each cluster with a SymbolGroup.
+ before_symbol_count = sum(len(x) for x in replacements_by_name.itervalues())
+ logging.debug('Creating %d symbol groups from %d symbols. %d clones had only '
+ 'one symbol.', len(replacements_by_name), before_symbol_count,
+ len(to_clear))
+
+ len_delta = len(replacements_by_name) - before_symbol_count
+ grouped_symbols = [None] * (len(symbols) + len_delta)
+ dest_index = 0
+ src_index = 0
+ seen_names = set()
+ replacement_names_by_index = {}
+ for name_tup, replacement_list in replacements_by_name.iteritems():
+ for tup in replacement_list:
+ replacement_names_by_index[tup[0]] = name_tup
+
+ sorted_items = replacement_names_by_index.items()
+ sorted_items.sort(key=lambda tup: tup[0])
+ for index, name_tup in sorted_items:
+ count = index - src_index
+ grouped_symbols[dest_index:dest_index + count] = (
+ symbols[src_index:src_index + count])
+ src_index = index + 1
+ dest_index += count
+ if name_tup not in seen_names:
+ seen_names.add(name_tup)
+ group_symbols = [tup[1] for tup in replacements_by_name[name_tup]]
+ grouped_symbols[dest_index] = models.SymbolGroup(
+ group_symbols, name=name_tup[0], full_name=name_tup[1],
+ section_name=group_symbols[0].section_name)
+ dest_index += 1
+
+ assert len(grouped_symbols[dest_index:None]) == len(symbols[src_index:None])
+ grouped_symbols[dest_index:None] = symbols[src_index:None]
+ logging.debug('Finished making groups.')
+ return grouped_symbols
+
+
+def LoadAndPostProcessSizeInfo(path):
+ """Returns a SizeInfo for the given |path|."""
+ logging.debug('Loading results from: %s', path)
+ size_info = file_format.LoadSizeInfo(path)
+ _PostProcessSizeInfo(size_info)
+ return size_info
+
+
+def _PostProcessSizeInfo(size_info):
+ logging.info('Normalizing symbol names')
+ _NormalizeNames(size_info.raw_symbols)
+ logging.info('Calculating padding')
+ _CalculatePadding(size_info.raw_symbols)
+ logging.info('Grouping decomposed functions')
+ size_info.symbols = models.SymbolGroup(
+ _ClusterSymbols(size_info.raw_symbols))
+ logging.info('Processed %d symbols', len(size_info.raw_symbols))
+
+
+def CreateSizeInfo(map_path, lazy_paths=None, no_source_paths=False,
+ raw_only=False):
+ """Creates a SizeInfo from the given map file."""
+ if not no_source_paths:
# output_directory needed for source file information.
lazy_paths.VerifyOutputDirectory()
- # tool_prefix needed for c++filt.
- lazy_paths.VerifyToolPrefix()
-
- with _OpenMaybeGz(path) as map_file:
- section_sizes, symbols = linker_map_parser.MapFileParser().Parse(map_file)
- size_info = models.SizeInfo(section_sizes, models.SymbolGroup(symbols))
-
- # Map file for some reason doesn't unmangle all names.
- logging.info('Calculating padding')
- _RemoveDuplicatesAndCalculatePadding(size_info.symbols)
- # Unmangle prints its own log statement.
- _UnmangleRemainingSymbols(size_info.symbols, lazy_paths.tool_prefix)
+ # tool_prefix needed for c++filt.
+ lazy_paths.VerifyToolPrefix()
+
+ with _OpenMaybeGz(map_path) as map_file:
+ section_sizes, raw_symbols = (
+ linker_map_parser.MapFileParser().Parse(map_file))
+
+ if not no_source_paths:
logging.info('Extracting source paths from .ninja files')
- all_found = _ExtractSourcePaths(size_info.symbols,
- lazy_paths.output_directory)
+ all_found = _ExtractSourcePaths(raw_symbols, lazy_paths.output_directory)
assert all_found, (
'One or more source file paths could not be found. Likely caused by '
'.ninja files being generated at a different time than the .map file.')
- # Resolve paths prints its own log statement.
- logging.info('Normalizing names')
- _NormalizeNames(size_info.symbols)
- logging.info('Normalizing paths')
- _NormalizeObjectPaths(size_info.symbols)
+ # Map file for some reason doesn't unmangle all names.
+ # Unmangle prints its own log statement.
+ _UnmangleRemainingSymbols(raw_symbols, lazy_paths.tool_prefix)
+ logging.info('Normalizing object paths')
+ _NormalizeObjectPaths(raw_symbols)
+ size_info = models.SizeInfo(section_sizes, raw_symbols)
+
+ # Name normalization not strictly required, but makes for smaller files.
+ if raw_only:
+ logging.info('Normalizing symbol names')
+ _NormalizeNames(size_info.raw_symbols)
+ else:
+ _PostProcessSizeInfo(size_info)
- if logging.getLogger().isEnabledFor(logging.INFO):
+ if logging.getLogger().isEnabledFor(logging.DEBUG):
for line in describe.DescribeSizeInfoCoverage(size_info):
logging.info(line)
- logging.info('Finished analyzing %d symbols', len(size_info.symbols))
+ logging.info('Recorded info for %d symbols', len(size_info.raw_symbols))
return size_info
@@ -303,22 +402,28 @@ def _ParseGnArgs(args_path):
def main(argv):
parser = argparse.ArgumentParser(argv)
- parser.add_argument('elf_file', help='Path to input ELF file.')
- parser.add_argument('output_file', help='Path to output .size(.gz) file.')
+ parser.add_argument('--elf-file', required=True,
+ help='Path to input ELF file. Currently used for '
+ 'capturing metadata. Pass "" to skip metadata '
+ 'collection.')
parser.add_argument('--map-file',
help='Path to input .map(.gz) file. Defaults to '
'{{elf_file}}.map(.gz)?')
+ parser.add_argument('--output-file', required=True,
+ help='Path to output .size file.')
+ parser.add_argument('--no-source-paths', action='store_true',
+ help='Do not use .ninja files to map '
+ 'object_path -> source_path')
paths.AddOptions(parser)
args = helpers.AddCommonOptionsAndParseArgs(parser, argv)
if not args.output_file.endswith('.size'):
parser.error('output_file must end with .size')
if args.map_file:
+ if (not args.map_file.endswith('.map')
+ and not args.map_file.endswith('.map.gz')):
+ parser.error('Expected --map-file to end with .map or .map.gz')
map_file_path = args.map_file
- elif args.elf_file.endswith('.size'):
- # Allow a .size file to be passed as input as well. Useful for measuring
- # serialization speed.
- pass
else:
map_file_path = args.elf_file + '.map'
if not os.path.exists(map_file_path):
@@ -328,7 +433,7 @@ def main(argv):
lazy_paths = paths.LazyPaths(args=args, input_file=args.elf_file)
metadata = None
- if args.elf_file and not args.elf_file.endswith('.size'):
+ if args.elf_file:
logging.debug('Constructing metadata')
git_rev = _DetectGitRevision(os.path.dirname(args.elf_file))
build_id = BuildIdFromElf(args.elf_file, lazy_paths.tool_prefix)
@@ -349,9 +454,12 @@ def main(argv):
models.METADATA_GN_ARGS: gn_args,
}
- size_info = Analyze(map_file_path, lazy_paths)
+ size_info = CreateSizeInfo(map_file_path, lazy_paths,
+ no_source_paths=args.no_source_paths,
+ raw_only=True)
if metadata:
+ size_info.metadata = metadata
logging.debug('Validating section sizes')
elf_section_sizes = _SectionSizesFromElf(args.elf_file,
lazy_paths.tool_prefix)
@@ -359,8 +467,6 @@ def main(argv):
assert v == size_info.section_sizes.get(k), (
'ELF file and .map file do not match.')
- size_info.metadata = metadata
-
logging.info('Recording metadata: \n %s',
'\n '.join(describe.DescribeMetadata(size_info.metadata)))
logging.info('Saving result to %s', args.output_file)
« no previous file with comments | « tools/binary_size/integration_test.py ('k') | tools/binary_size/models.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698