| Index: tools/binary_size/libsupersize/nm.py
|
| diff --git a/tools/binary_size/libsupersize/nm.py b/tools/binary_size/libsupersize/nm.py
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..e0ce13e403b4516d906f992b135ff64b06928a61
|
| --- /dev/null
|
| +++ b/tools/binary_size/libsupersize/nm.py
|
| @@ -0,0 +1,175 @@
|
| +# Copyright 2017 The Chromium Authors. All rights reserved.
|
| +# Use of this source code is governed by a BSD-style license that can be
|
| +# found in the LICENSE file.
|
| +
|
| +"""Dealing with "nm" tool."""
|
| +
|
| +import collections
|
| +import logging
|
| +import os
|
| +import subprocess
|
| +import sys
|
| +
|
| +import concurrent
|
| +
|
| +
|
| +def CollectAliasesByAddress(elf_path, tool_prefix):
|
| + names_by_address = collections.defaultdict(list)
|
| +
|
| + # About 60mb of output, but piping takes ~30s, and loading it into RAM
|
| + # directly takes 3s.
|
| + args = [tool_prefix + 'nm', '--no-sort', '--defined-only', '--demangle',
|
| + elf_path]
|
| + output = subprocess.check_output(args)
|
| + for line in output.splitlines():
|
| + address_str, section, name = line.split(' ', 2)
|
| + if section not in 'tT' or not name or name[0] == '$':
|
| + continue
|
| +
|
| + address = int(address_str, 16) & 0xfffffffffffffffe
|
| + if not address:
|
| + continue
|
| + # Constructors often show up twice.
|
| + name_list = names_by_address[address]
|
| + if name not in name_list:
|
| + name_list.append(name)
|
| +
|
| + # Since this is run in a separate process, minimize data passing by returning
|
| + # only aliased symbols.
|
| + names_by_address = {k: v for k, v in names_by_address.iteritems()
|
| + if len(v) > 1}
|
| +
|
| + return names_by_address
|
| +
|
| +
|
| +def _CollectAliasesByAddressAsyncHelper(elf_path, tool_prefix):
|
| + result = CollectAliasesByAddress(elf_path, tool_prefix)
|
| + return concurrent.EncodeDictOfLists(result, key_transform=str)
|
| +
|
| +
|
| +def CollectAliasesByAddressAsync(elf_path, tool_prefix):
|
| + def decode(encoded):
|
| + return concurrent.DecodeDictOfLists(
|
| + encoded[0], encoded[1], key_transform=int)
|
| + return concurrent.ForkAndCall(
|
| + _CollectAliasesByAddressAsyncHelper, (elf_path, tool_prefix),
|
| + decode_func=decode)
|
| +
|
| +
|
| +def _ParseOneObjectFileOutput(lines):
|
| + ret = []
|
| + for line in lines:
|
| + if not line:
|
| + break
|
| + sep = line.find(' ') # Skip over address.
|
| + sep = line.find(' ', sep + 1) # Skip over symbol type.
|
| + name = line[sep + 1:]
|
| + # Skip lines like:
|
| + # 00000000 t $t
|
| + # 00000000 r $d
|
| + # 0000041b r .L.str.38
|
| + if name[0] not in '$.':
|
| + ret.append(name)
|
| + return ret
|
| +
|
| +
|
| +def _BatchCollectNames(target, tool_prefix, output_directory):
|
| + is_archive = isinstance(target, basestring)
|
| + # Ensure tool_prefix is absolute so that CWD does not affect it
|
| + if os.path.sep in tool_prefix:
|
| + # Use abspath() on the dirname to avoid it stripping a trailing /.
|
| + dirname = os.path.dirname(tool_prefix)
|
| + tool_prefix = os.path.abspath(dirname) + tool_prefix[len(dirname):]
|
| +
|
| + args = [tool_prefix + 'nm', '--no-sort', '--defined-only', '--demangle']
|
| + if is_archive:
|
| + args.append(target)
|
| + else:
|
| + args.extend(target)
|
| + output = subprocess.check_output(args, cwd=output_directory)
|
| + lines = output.splitlines()
|
| + if not lines:
|
| + return '', ''
|
| + is_multi_file = not lines[0]
|
| + lines = iter(lines)
|
| + if is_multi_file:
|
| + next(lines)
|
| + path = next(lines)[:-1] # Path ends with a colon.
|
| + else:
|
| + assert not is_archive
|
| + path = target[0]
|
| +
|
| + ret = {}
|
| + while True:
|
| + if is_archive:
|
| + # E.g. foo/bar.a(baz.o)
|
| + path = '%s(%s)' % (target, path)
|
| + # The multiprocess API uses pickle, which is ridiculously slow. More than 2x
|
| + # faster to use join & split.
|
| + ret[path] = _ParseOneObjectFileOutput(lines)
|
| + path = next(lines, ':')[:-1]
|
| + if not path:
|
| + return concurrent.EncodeDictOfLists(ret)
|
| +
|
| +
|
| +def BulkAnalyzeObjectFiles(paths, tool_prefix, output_directory):
|
| + def iter_job_params():
|
| + object_paths = []
|
| + for path in paths:
|
| + if path.endswith('.a'):
|
| + yield path, tool_prefix, output_directory
|
| + else:
|
| + object_paths.append(path)
|
| +
|
| + BATCH_SIZE = 50 # Chosen arbitrarily.
|
| + for i in xrange(0, len(object_paths), BATCH_SIZE):
|
| + batch = object_paths[i:i + BATCH_SIZE]
|
| + yield batch, tool_prefix, output_directory
|
| +
|
| + paths_by_name = collections.defaultdict(list)
|
| + params = iter_job_params()
|
| + for encoded_ret in concurrent.IterForkAndCall(_BatchCollectNames, params):
|
| + names_by_path = concurrent.DecodeDictOfLists(*encoded_ret)
|
| + for path, names in names_by_path.iteritems():
|
| + for name in names:
|
| + paths_by_name[name].append(path)
|
| +
|
| + # TODO(agrieve): Combining the two calls to BulkAnalyzeObjectFiles() would
|
| + # allow pruning entries from this dict with only one path, and make
|
| + # marshalling it back much faster.
|
| + return paths_by_name
|
| +
|
| +
|
| +def BulkAnalyzeObjectFilesAsync(paths, tool_prefix, output_directory):
|
| + def analyze_async_internal():
|
| + # Forked processes cannot spawn further background processes, so use
|
| + # a subprocess instead.
|
| + log_level = str(logging.getLogger().getEffectiveLevel())
|
| + args = [sys.executable, __file__, log_level, tool_prefix, output_directory]
|
| + proc = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
|
| +
|
| + stdout = proc.communicate('\x01'.join(paths))[0]
|
| + assert proc.returncode == 0
|
| + logging.debug('Decoding nm results from forked process')
|
| +
|
| + encoded_len = int(stdout[:8], 16)
|
| + return concurrent.DecodeDictOfLists(stdout[8:8 + encoded_len],
|
| + stdout[8 + encoded_len:])
|
| + return concurrent.CallOnThread(analyze_async_internal)
|
| +
|
| +
|
| +def main():
|
| + log_level, tool_prefix, output_directory = sys.argv[1:]
|
| + logging.basicConfig(level=int(log_level),
|
| + format='%(levelname).1s %(relativeCreated)6d %(message)s')
|
| + paths = sys.stdin.read().split('\x01')
|
| + paths_by_name = BulkAnalyzeObjectFiles(paths, tool_prefix, output_directory)
|
| + encoded_keys, encoded_values = concurrent.EncodeDictOfLists(paths_by_name)
|
| + sys.stdout.write('%08x' % len(encoded_keys))
|
| + sys.stdout.write(encoded_keys)
|
| + sys.stdout.write(encoded_values)
|
| + logging.debug('nm bulk subprocess finished.')
|
| +
|
| +
|
| +if __name__ == '__main__':
|
| + main()
|
|
|