tools/binary_size/libsupersize/nm.py - Issue 2851473003: supersize: Track symbol aliases and shared symbols

Unified Diff: tools/binary_size/libsupersize/nm.py

Issue 2851473003: supersize: Track symbol aliases and shared symbols (Closed)

Patch Set: tweak nm interfface Created 3 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

« tools/binary_size/libsupersize/integration_test.py ('K') | « tools/binary_size/libsupersize/ninja_parser.py ('k') | tools/binary_size/libsupersize/template/D3SymbolTreeMap.js » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: tools/binary_size/libsupersize/nm.py

diff --git a/tools/binary_size/libsupersize/nm.py b/tools/binary_size/libsupersize/nm.py

new file mode 100644

index 0000000000000000000000000000000000000000..f2017d3e302f9e383f2f27c99cc48bd4f66e7643

--- /dev/null

+++ b/tools/binary_size/libsupersize/nm.py

@@ -0,0 +1,232 @@

+# Use of this source code is governed by a BSD-style license that can be

+# found in the LICENSE file.

+"""Functions that rely on parsing output of "nm" tool."""

+import collections

+import logging

+import os

+import subprocess

+import sys

+import concurrent

+def CollectAliasesByAddress(elf_path, tool_prefix):

+ """Runs nm on |elf_path| and returns a dict of address->[names]"""

+ names_by_address = collections.defaultdict(list)

+ # About 60mb of output, but piping takes ~30s, and loading it into RAM

+ # directly takes 3s.

+ args = [tool_prefix + 'nm', '--no-sort', '--defined-only', '--demangle',

+ elf_path]

+ output = subprocess.check_output(args)

+ for line in output.splitlines():

+ address_str, section, name = line.split(' ', 2)

+ if section not in 'tT' or not name or name[0] == '$':

estevenson 2017/04/28 17:06:11 should readonly symbols be included in this?

agrieve 2017/04/28 19:26:59 Added a comment. Looks like no.

+ continue

+ address = int(address_str, 16) & 0xfffffffffffffffe

estevenson 2017/04/28 17:06:11 This needs a comment

agrieve 2017/04/28 19:26:59 Turns out it wasn't needed :P

+ if not address:

+ continue

+ # Constructors often show up twice.

+ name_list = names_by_address[address]

+ if name not in name_list:

+ name_list.append(name)

+ # Since this is run in a separate process, minimize data passing by returning

+ # only aliased symbols.

+ names_by_address = {k: v for k, v in names_by_address.iteritems()

+ if len(v) > 1}

+ return names_by_address

+def _CollectAliasesByAddressAsyncHelper(elf_path, tool_prefix):

+ result = CollectAliasesByAddress(elf_path, tool_prefix)

+ return concurrent.EncodeDictOfLists(result, key_transform=str)

+def CollectAliasesByAddressAsync(elf_path, tool_prefix):

+ """Calls CollectAliasesByAddress in a helper process. Returns a Result."""

+ def decode(encoded):

+ return concurrent.DecodeDictOfLists(

+ encoded[0], encoded[1], key_transform=int)

+ return concurrent.ForkAndCall(

+ _CollectAliasesByAddressAsyncHelper, (elf_path, tool_prefix),

+ decode_func=decode)

+def _ParseOneObjectFileOutput(lines):

+ ret = []

+ for line in lines:

+ if not line:

+ break

+ sep = line.find(' ') # Skip over address.

+ sep = line.find(' ', sep + 1) # Skip over symbol type.

+ name = line[sep + 1:]

+ # Skip lines like:

+ # 00000000 t $t

+ # 00000000 r $d

+ # 0000041b r .L.str.38

+ if name[0] not in '$.':

+ ret.append(name)

+ return ret

+def _BatchCollectNames(target, tool_prefix, output_directory):

+ is_archive = isinstance(target, basestring)

+ # Ensure tool_prefix is absolute so that CWD does not affect it

+ if os.path.sep in tool_prefix:

+ # Use abspath() on the dirname to avoid it stripping a trailing /.

+ dirname = os.path.dirname(tool_prefix)

+ tool_prefix = os.path.abspath(dirname) + tool_prefix[len(dirname):]

+ args = [tool_prefix + 'nm', '--no-sort', '--defined-only', '--demangle']

+ if is_archive:

+ args.append(target)

+ else:

+ args.extend(target)

+ output = subprocess.check_output(args, cwd=output_directory)

+ lines = output.splitlines()

+ if not lines:

+ return '', ''

+ is_multi_file = not lines[0]

+ lines = iter(lines)

+ if is_multi_file:

+ next(lines)

+ path = next(lines)[:-1] # Path ends with a colon.

+ else:

+ assert not is_archive

+ path = target[0]

+ ret = {}

+ while True:

+ if is_archive:

+ # E.g. foo/bar.a(baz.o)

+ path = '%s(%s)' % (target, path)

+ # The multiprocess API uses pickle, which is ridiculously slow. More than 2x

+ # faster to use join & split.

+ ret[path] = _ParseOneObjectFileOutput(lines)

+ path = next(lines, ':')[:-1]

+ if not path:

+ return concurrent.EncodeDictOfLists(ret)

+class _BulkObjectFileAnalyzerWorker(object):

+ """Runs nm on all given paths and returns a dict of name->[paths]"""

+ def __init__(self, tool_prefix, output_directory):

+ self._tool_prefix = tool_prefix

+ self._output_directory = output_directory

+ self._batches = []

+ self._result = None

+ def AnalyzePaths(self, paths):

+ def iter_job_params():

+ object_paths = []

+ for path in paths:

+ if path.endswith('.a'):

+ yield path, self._tool_prefix, self._output_directory

+ else:

+ object_paths.append(path)

+ BATCH_SIZE = 50 # Chosen arbitrarily.

+ for i in xrange(0, len(object_paths), BATCH_SIZE):

+ batch = object_paths[i:i + BATCH_SIZE]

+ yield batch, self._tool_prefix, self._output_directory

+ paths_by_name = collections.defaultdict(list)

+ params = list(iter_job_params())

+ for encoded_ret in concurrent.BulkForkAndCall(_BatchCollectNames, params):

+ names_by_path = concurrent.DecodeDictOfLists(*encoded_ret)

+ for path, names in names_by_path.iteritems():

+ for name in names:

+ paths_by_name[name].append(path)

+ self._batches.append(paths_by_name)

+ def Close(self):

+ assert self._result is None

+ assert self._batches

+ paths_by_name = self._batches[0]

+ for batch in self._batches[1:]:

+ for name, path_list in batch.iteritems():

+ paths_by_name.setdefault(name, []).extend(path_list)

+ # It would speed up mashalling of the values by removing all entries

+ # that have only 1 path. However, these entries are needed to give

+ # path information to symbol aliases.

+ self._result = paths_by_name

+ def Get(self):

+ assert self._result is not None

+ return self._result

+class _BulkObjectFileAnalyzerMaster(object):

+ """Runs BulkObjectFileAnalyzer in a subprocess."""

+ def __init__(self, tool_prefix, output_directory):

+ self._process = None

+ self._tool_prefix = tool_prefix

+ self._output_directory = output_directory

+ def _Spawn(self):

+ log_level = str(logging.getLogger().getEffectiveLevel())

+ args = [sys.executable, __file__, log_level, self._tool_prefix,

+ self._output_directory]

+ self._process = subprocess.Popen(

+ args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)

+ def AnalyzePaths(self, paths):

+ if self._process is None:

+ self._Spawn()

+ logging.debug('Sending batch of %d paths to subprocess', len(paths))

+ payload = '\x01'.join(paths)

+ self._process.stdin.write('{:08x}'.format(len(payload)))

+ self._process.stdin.write(payload)

+ def Close(self):

+ assert not self._process.stdin.closed

+ self._process.stdin.close()

+ def Get(self):

+ assert self._process.stdin.closed

+ logging.debug('Decoding nm results from forked process')

+ encoded_keys_len = int(self._process.stdout.read(8), 16)

+ encoded_keys = self._process.stdout.read(encoded_keys_len)

+ encoded_values = self._process.stdout.read()

+ return concurrent.DecodeDictOfLists(encoded_keys, encoded_values)

+BulkObjectFileAnalyzer = _BulkObjectFileAnalyzerMaster

+if concurrent.DISABLE_ASYNC:

+ BulkObjectFileAnalyzer = _BulkObjectFileAnalyzerWorker

+def _SubMain(log_level, tool_prefix, output_directory):

+ logging.basicConfig(level=int(log_level),

+ format='%(levelname).1s %(relativeCreated)6d %(message)s')

+ bulk_analyzer = _BulkObjectFileAnalyzerWorker(tool_prefix, output_directory)

+ while True:

+ payload_len = int(sys.stdin.read(8) or '0', 16)

+ if not payload_len:

+ logging.debug('nm bulk subprocess received eof.')

+ break

+ paths = sys.stdin.read(payload_len).split('\x01')

+ bulk_analyzer.AnalyzePaths(paths)

+ bulk_analyzer.Close()

+ paths_by_name = bulk_analyzer.Get()

+ encoded_keys, encoded_values = concurrent.EncodeDictOfLists(paths_by_name)

+ sys.stdout.write('%08x' % len(encoded_keys))

+ sys.stdout.write(encoded_keys)

+ sys.stdout.write(encoded_values)

+ logging.debug('nm bulk subprocess finished.')

+if __name__ == '__main__':

+ _SubMain(*sys.argv[1:])