tools/binary_size/libsupersize/nm.py - Issue 2832253004: supersize: nm in progress

Side by Side Diff: tools/binary_size/libsupersize/nm.py

Issue 2832253004: supersize: nm in progress (Closed)

Patch Set: supersize: Track symbol aliases and shared symbols Created 3 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
(Empty)
	1 # Copyright 2017 The Chromium Authors. All rights reserved.

	2 # Use of this source code is governed by a BSD-style license that can be

	3 # found in the LICENSE file.

	4

	5 """Dealing with "nm" tool."""

	6

	7 import collections

	8 import logging

	9 import os

	10 import subprocess

	11 import sys

	12

	13 import concurrent

	14

	15

	16 def CollectAliasesByAddress(elf_path, tool_prefix):

	17 names_by_address = collections.defaultdict(list)

	18

	19 # About 60mb of output, but piping takes ~30s, and loading it into RAM

	20 # directly takes 3s.

	21 args = [tool_prefix + 'nm', '--no-sort', '--defined-only', '--demangle',

	22 elf_path]

	23 output = subprocess.check_output(args)

	24 for line in output.splitlines():

	25 address_str, section, name = line.split(' ', 2)

	26 if section not in 'tT' or not name or name[0] == '$':

	27 continue

	28

	29 address = int(address_str, 16) & 0xfffffffffffffffe

	30 if not address:

	31 continue

	32 # Constructors often show up twice.

	33 name_list = names_by_address[address]

	34 if name not in name_list:

	35 name_list.append(name)

	36

	37 # Since this is run in a separate process, minimize data passing by returning

	38 # only aliased symbols.

	39 names_by_address = {k: v for k, v in names_by_address.iteritems()

	40 if len(v) > 1}

	41

	42 return names_by_address

	43

	44

	45 def _CollectAliasesByAddressAsyncHelper(elf_path, tool_prefix):

	46 result = CollectAliasesByAddress(elf_path, tool_prefix)

	47 return concurrent.EncodeDictOfLists(result, key_transform=str)

	48

	49

	50 def CollectAliasesByAddressAsync(elf_path, tool_prefix):

	51 def decode(encoded):

	52 return concurrent.DecodeDictOfLists(

	53 encoded[0], encoded[1], key_transform=int)

	54 return concurrent.ForkAndCall(

	55 _CollectAliasesByAddressAsyncHelper, (elf_path, tool_prefix),

	56 decode_func=decode)

	57

	58

	59 def _ParseOneObjectFileOutput(lines):

	60 ret = []

	61 for line in lines:

	62 if not line:

	63 break

	64 sep = line.find(' ') # Skip over address.

	65 sep = line.find(' ', sep + 1) # Skip over symbol type.

	66 name = line[sep + 1:]

	67 # Skip lines like:

	68 # 00000000 t $t

	69 # 00000000 r $d

	70 # 0000041b r .L.str.38

	71 if name[0] not in '$.':

	72 ret.append(name)

	73 return ret

	74

	75

	76 def _BatchCollectNames(target, tool_prefix, output_directory):

	77 is_archive = isinstance(target, basestring)

	78 # Ensure tool_prefix is absolute so that CWD does not affect it

	79 if os.path.sep in tool_prefix:

	80 # Use abspath() on the dirname to avoid it stripping a trailing /.

	81 dirname = os.path.dirname(tool_prefix)

	82 tool_prefix = os.path.abspath(dirname) + tool_prefix[len(dirname):]

	83

	84 args = [tool_prefix + 'nm', '--no-sort', '--defined-only', '--demangle']

	85 if is_archive:

	86 args.append(target)

	87 else:

	88 args.extend(target)

	89 output = subprocess.check_output(args, cwd=output_directory)

	90 lines = output.splitlines()

	91 if not lines:

	92 return '', ''

	93 is_multi_file = not lines[0]

	94 lines = iter(lines)

	95 if is_multi_file:

	96 next(lines)

	97 path = next(lines)[:-1] # Path ends with a colon.

	98 else:

	99 assert not is_archive

	100 path = target[0]

	101

	102 ret = {}

	103 while True:

	104 if is_archive:

	105 # E.g. foo/bar.a(baz.o)

	106 path = '%s(%s)' % (target, path)

	107 # The multiprocess API uses pickle, which is ridiculously slow. More than 2x

	108 # faster to use join & split.

	109 ret[path] = _ParseOneObjectFileOutput(lines)

	110 path = next(lines, ':')[:-1]

	111 if not path:

	112 return concurrent.EncodeDictOfLists(ret)

	113

	114

	115 def BulkAnalyzeObjectFiles(paths, tool_prefix, output_directory):

	116 def iter_job_params():

	117 object_paths = []

	118 for path in paths:

	119 if path.endswith('.a'):

	120 yield path, tool_prefix, output_directory

	121 else:

	122 object_paths.append(path)

	123

	124 BATCH_SIZE = 50 # Chosen arbitrarily.

	125 for i in xrange(0, len(object_paths), BATCH_SIZE):

	126 batch = object_paths[i:i + BATCH_SIZE]

	127 yield batch, tool_prefix, output_directory

	128

	129 paths_by_name = collections.defaultdict(list)

	130 params = iter_job_params()

	131 for encoded_ret in concurrent.IterForkAndCall(_BatchCollectNames, params):

	132 names_by_path = concurrent.DecodeDictOfLists(*encoded_ret)

	133 for path, names in names_by_path.iteritems():

	134 for name in names:

	135 paths_by_name[name].append(path)

	136

	137 # TODO(agrieve): Combining the two calls to BulkAnalyzeObjectFiles() would

	138 # allow pruning entries from this dict with only one path, and make

	139 # marshalling it back much faster.

	140 return paths_by_name

	141

	142

	143 def BulkAnalyzeObjectFilesAsync(paths, tool_prefix, output_directory):

	144 def analyze_async_internal():

	145 # Forked processes cannot spawn further background processes, so use

	146 # a subprocess instead.

	147 log_level = str(logging.getLogger().getEffectiveLevel())

	148 args = [sys.executable, __file__, log_level, tool_prefix, output_directory]

	149 proc = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)

	150

	151 stdout = proc.communicate('\x01'.join(paths))[0]

	152 assert proc.returncode == 0

	153 logging.debug('Decoding nm results from forked process')

	154

	155 encoded_len = int(stdout[:8], 16)

	156 return concurrent.DecodeDictOfLists(stdout[8:8 + encoded_len],

	157 stdout[8 + encoded_len:])

	158 return concurrent.CallOnThread(analyze_async_internal)

	159

	160

	161 def main():

	162 log_level, tool_prefix, output_directory = sys.argv[1:]

	163 logging.basicConfig(level=int(log_level),

	164 format='%(levelname).1s %(relativeCreated)6d %(message)s')

	165 paths = sys.stdin.read().split('\x01')

	166 paths_by_name = BulkAnalyzeObjectFiles(paths, tool_prefix, output_directory)

	167 encoded_keys, encoded_values = concurrent.EncodeDictOfLists(paths_by_name)

	168 sys.stdout.write('%08x' % len(encoded_keys))

	169 sys.stdout.write(encoded_keys)

	170 sys.stdout.write(encoded_values)

	171 logging.debug('nm bulk subprocess finished.')

	172

	173

	174 if __name__ == '__main__':

	175 main()

OLD	NEW

« no previous file with comments | « tools/binary_size/libsupersize/ninja_parser.py ('k') | tools/binary_size/libsupersize/template/D3SymbolTreeMap.js » ('j') | no next file with comments »