Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(21)

Unified Diff: tools/binary_size/libsupersize/nm.py

Issue 2851473003: supersize: Track symbol aliases and shared symbols (Closed)
Patch Set: tweak nm interfface Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: tools/binary_size/libsupersize/nm.py
diff --git a/tools/binary_size/libsupersize/nm.py b/tools/binary_size/libsupersize/nm.py
new file mode 100644
index 0000000000000000000000000000000000000000..f2017d3e302f9e383f2f27c99cc48bd4f66e7643
--- /dev/null
+++ b/tools/binary_size/libsupersize/nm.py
@@ -0,0 +1,232 @@
+# Copyright 2017 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Functions that rely on parsing output of "nm" tool."""
+
+import collections
+import logging
+import os
+import subprocess
+import sys
+
+import concurrent
+
+
+def CollectAliasesByAddress(elf_path, tool_prefix):
+ """Runs nm on |elf_path| and returns a dict of address->[names]"""
+ names_by_address = collections.defaultdict(list)
+
+ # About 60mb of output, but piping takes ~30s, and loading it into RAM
+ # directly takes 3s.
+ args = [tool_prefix + 'nm', '--no-sort', '--defined-only', '--demangle',
+ elf_path]
+ output = subprocess.check_output(args)
+ for line in output.splitlines():
+ address_str, section, name = line.split(' ', 2)
+ if section not in 'tT' or not name or name[0] == '$':
estevenson 2017/04/28 17:06:11 should readonly symbols be included in this?
agrieve 2017/04/28 19:26:59 Added a comment. Looks like no.
+ continue
+
+ address = int(address_str, 16) & 0xfffffffffffffffe
estevenson 2017/04/28 17:06:11 This needs a comment
agrieve 2017/04/28 19:26:59 Turns out it wasn't needed :P
+ if not address:
+ continue
+ # Constructors often show up twice.
+ name_list = names_by_address[address]
+ if name not in name_list:
+ name_list.append(name)
+
+ # Since this is run in a separate process, minimize data passing by returning
+ # only aliased symbols.
+ names_by_address = {k: v for k, v in names_by_address.iteritems()
+ if len(v) > 1}
+
+ return names_by_address
+
+
+def _CollectAliasesByAddressAsyncHelper(elf_path, tool_prefix):
+ result = CollectAliasesByAddress(elf_path, tool_prefix)
+ return concurrent.EncodeDictOfLists(result, key_transform=str)
+
+
+def CollectAliasesByAddressAsync(elf_path, tool_prefix):
+ """Calls CollectAliasesByAddress in a helper process. Returns a Result."""
+ def decode(encoded):
+ return concurrent.DecodeDictOfLists(
+ encoded[0], encoded[1], key_transform=int)
+ return concurrent.ForkAndCall(
+ _CollectAliasesByAddressAsyncHelper, (elf_path, tool_prefix),
+ decode_func=decode)
+
+
+def _ParseOneObjectFileOutput(lines):
+ ret = []
+ for line in lines:
+ if not line:
+ break
+ sep = line.find(' ') # Skip over address.
+ sep = line.find(' ', sep + 1) # Skip over symbol type.
+ name = line[sep + 1:]
+ # Skip lines like:
+ # 00000000 t $t
+ # 00000000 r $d
+ # 0000041b r .L.str.38
+ if name[0] not in '$.':
+ ret.append(name)
+ return ret
+
+
+def _BatchCollectNames(target, tool_prefix, output_directory):
+ is_archive = isinstance(target, basestring)
+ # Ensure tool_prefix is absolute so that CWD does not affect it
+ if os.path.sep in tool_prefix:
+ # Use abspath() on the dirname to avoid it stripping a trailing /.
+ dirname = os.path.dirname(tool_prefix)
+ tool_prefix = os.path.abspath(dirname) + tool_prefix[len(dirname):]
+
+ args = [tool_prefix + 'nm', '--no-sort', '--defined-only', '--demangle']
+ if is_archive:
+ args.append(target)
+ else:
+ args.extend(target)
+ output = subprocess.check_output(args, cwd=output_directory)
+ lines = output.splitlines()
+ if not lines:
+ return '', ''
+ is_multi_file = not lines[0]
+ lines = iter(lines)
+ if is_multi_file:
+ next(lines)
+ path = next(lines)[:-1] # Path ends with a colon.
+ else:
+ assert not is_archive
+ path = target[0]
+
+ ret = {}
+ while True:
+ if is_archive:
+ # E.g. foo/bar.a(baz.o)
+ path = '%s(%s)' % (target, path)
+ # The multiprocess API uses pickle, which is ridiculously slow. More than 2x
+ # faster to use join & split.
+ ret[path] = _ParseOneObjectFileOutput(lines)
+ path = next(lines, ':')[:-1]
+ if not path:
+ return concurrent.EncodeDictOfLists(ret)
+
+
+class _BulkObjectFileAnalyzerWorker(object):
+ """Runs nm on all given paths and returns a dict of name->[paths]"""
+
+ def __init__(self, tool_prefix, output_directory):
+ self._tool_prefix = tool_prefix
+ self._output_directory = output_directory
+ self._batches = []
+ self._result = None
+
+ def AnalyzePaths(self, paths):
+ def iter_job_params():
+ object_paths = []
+ for path in paths:
+ if path.endswith('.a'):
+ yield path, self._tool_prefix, self._output_directory
+ else:
+ object_paths.append(path)
+
+ BATCH_SIZE = 50 # Chosen arbitrarily.
+ for i in xrange(0, len(object_paths), BATCH_SIZE):
+ batch = object_paths[i:i + BATCH_SIZE]
+ yield batch, self._tool_prefix, self._output_directory
+
+ paths_by_name = collections.defaultdict(list)
+ params = list(iter_job_params())
+ for encoded_ret in concurrent.BulkForkAndCall(_BatchCollectNames, params):
+ names_by_path = concurrent.DecodeDictOfLists(*encoded_ret)
+ for path, names in names_by_path.iteritems():
+ for name in names:
+ paths_by_name[name].append(path)
+ self._batches.append(paths_by_name)
+
+ def Close(self):
+ assert self._result is None
+ assert self._batches
+ paths_by_name = self._batches[0]
+ for batch in self._batches[1:]:
+ for name, path_list in batch.iteritems():
+ paths_by_name.setdefault(name, []).extend(path_list)
+
+ # It would speed up mashalling of the values by removing all entries
+ # that have only 1 path. However, these entries are needed to give
+ # path information to symbol aliases.
+ self._result = paths_by_name
+
+ def Get(self):
+ assert self._result is not None
+ return self._result
+
+
+class _BulkObjectFileAnalyzerMaster(object):
+ """Runs BulkObjectFileAnalyzer in a subprocess."""
+
+ def __init__(self, tool_prefix, output_directory):
+ self._process = None
+ self._tool_prefix = tool_prefix
+ self._output_directory = output_directory
+
+ def _Spawn(self):
+ log_level = str(logging.getLogger().getEffectiveLevel())
+ args = [sys.executable, __file__, log_level, self._tool_prefix,
+ self._output_directory]
+ self._process = subprocess.Popen(
+ args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+
+ def AnalyzePaths(self, paths):
+ if self._process is None:
+ self._Spawn()
+
+ logging.debug('Sending batch of %d paths to subprocess', len(paths))
+ payload = '\x01'.join(paths)
+ self._process.stdin.write('{:08x}'.format(len(payload)))
+ self._process.stdin.write(payload)
+
+ def Close(self):
+ assert not self._process.stdin.closed
+ self._process.stdin.close()
+
+ def Get(self):
+ assert self._process.stdin.closed
+ logging.debug('Decoding nm results from forked process')
+
+ encoded_keys_len = int(self._process.stdout.read(8), 16)
+ encoded_keys = self._process.stdout.read(encoded_keys_len)
+ encoded_values = self._process.stdout.read()
+ return concurrent.DecodeDictOfLists(encoded_keys, encoded_values)
+
+
+BulkObjectFileAnalyzer = _BulkObjectFileAnalyzerMaster
+if concurrent.DISABLE_ASYNC:
+ BulkObjectFileAnalyzer = _BulkObjectFileAnalyzerWorker
+
+
+def _SubMain(log_level, tool_prefix, output_directory):
+ logging.basicConfig(level=int(log_level),
+ format='%(levelname).1s %(relativeCreated)6d %(message)s')
+ bulk_analyzer = _BulkObjectFileAnalyzerWorker(tool_prefix, output_directory)
+ while True:
+ payload_len = int(sys.stdin.read(8) or '0', 16)
+ if not payload_len:
+ logging.debug('nm bulk subprocess received eof.')
+ break
+ paths = sys.stdin.read(payload_len).split('\x01')
+ bulk_analyzer.AnalyzePaths(paths)
+
+ bulk_analyzer.Close()
+ paths_by_name = bulk_analyzer.Get()
+ encoded_keys, encoded_values = concurrent.EncodeDictOfLists(paths_by_name)
+ sys.stdout.write('%08x' % len(encoded_keys))
+ sys.stdout.write(encoded_keys)
+ sys.stdout.write(encoded_values)
+ logging.debug('nm bulk subprocess finished.')
+
+
+if __name__ == '__main__':
+ _SubMain(*sys.argv[1:])

Powered by Google App Engine
This is Rietveld 408576698