tools/binary_size/libsupersize/nm.py - Issue 2851473003: supersize: Track symbol aliases and shared symbols

Side by Side Diff: tools/binary_size/libsupersize/nm.py

Issue 2851473003: supersize: Track symbol aliases and shared symbols (Closed)

Patch Set: tweak nm interfface Created 3 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« tools/binary_size/libsupersize/integration_test.py ('K') | « tools/binary_size/libsupersize/ninja_parser.py ('k') | tools/binary_size/libsupersize/template/D3SymbolTreeMap.js » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 # Copyright 2017 The Chromium Authors. All rights reserved.

	2 # Use of this source code is governed by a BSD-style license that can be

	3 # found in the LICENSE file.

	4

	5 """Functions that rely on parsing output of "nm" tool."""

	6

	7 import collections

	8 import logging

	9 import os

	10 import subprocess

	11 import sys

	12

	13 import concurrent

	14

	15

	16 def CollectAliasesByAddress(elf_path, tool_prefix):

	17 """Runs nm on \|elf_path\| and returns a dict of address->[names]"""

	18 names_by_address = collections.defaultdict(list)

	19

	20 # About 60mb of output, but piping takes ~30s, and loading it into RAM

	21 # directly takes 3s.

	22 args = [tool_prefix + 'nm', '--no-sort', '--defined-only', '--demangle',

	23 elf_path]

	24 output = subprocess.check_output(args)

	25 for line in output.splitlines():

	26 address_str, section, name = line.split(' ', 2)

	27 if section not in 'tT' or not name or name[0] == '$':
	estevenson 2017/04/28 17:06:11 should readonly symbols be included in this? should readonly symbols be included in this? agrieve 2017/04/28 19:26:59 Added a comment. Looks like no. Show quoted text On 2017/04/28 17:06:11, estevenson wrote: > should readonly symbols be included in this? Added a comment. Looks like no.
	28 continue

	29

	30 address = int(address_str, 16) & 0xfffffffffffffffe
	estevenson 2017/04/28 17:06:11 This needs a comment This needs a comment agrieve 2017/04/28 19:26:59 Turns out it wasn't needed :P Show quoted text On 2017/04/28 17:06:11, estevenson wrote: > This needs a comment Turns out it wasn't needed :P
	31 if not address:

	32 continue

	33 # Constructors often show up twice.

	34 name_list = names_by_address[address]

	35 if name not in name_list:

	36 name_list.append(name)

	37

	38 # Since this is run in a separate process, minimize data passing by returning

	39 # only aliased symbols.

	40 names_by_address = {k: v for k, v in names_by_address.iteritems()

	41 if len(v) > 1}

	42

	43 return names_by_address

	44

	45

	46 def _CollectAliasesByAddressAsyncHelper(elf_path, tool_prefix):

	47 result = CollectAliasesByAddress(elf_path, tool_prefix)

	48 return concurrent.EncodeDictOfLists(result, key_transform=str)

	49

	50

	51 def CollectAliasesByAddressAsync(elf_path, tool_prefix):

	52 """Calls CollectAliasesByAddress in a helper process. Returns a Result."""

	53 def decode(encoded):

	54 return concurrent.DecodeDictOfLists(

	55 encoded[0], encoded[1], key_transform=int)

	56 return concurrent.ForkAndCall(

	57 _CollectAliasesByAddressAsyncHelper, (elf_path, tool_prefix),

	58 decode_func=decode)

	59

	60

	61 def _ParseOneObjectFileOutput(lines):

	62 ret = []

	63 for line in lines:

	64 if not line:

	65 break

	66 sep = line.find(' ') # Skip over address.

	67 sep = line.find(' ', sep + 1) # Skip over symbol type.

	68 name = line[sep + 1:]

	69 # Skip lines like:

	70 # 00000000 t $t

	71 # 00000000 r $d

	72 # 0000041b r .L.str.38

	73 if name[0] not in '$.':

	74 ret.append(name)

	75 return ret

	76

	77

	78 def _BatchCollectNames(target, tool_prefix, output_directory):

	79 is_archive = isinstance(target, basestring)

	80 # Ensure tool_prefix is absolute so that CWD does not affect it

	81 if os.path.sep in tool_prefix:

	82 # Use abspath() on the dirname to avoid it stripping a trailing /.

	83 dirname = os.path.dirname(tool_prefix)

	84 tool_prefix = os.path.abspath(dirname) + tool_prefix[len(dirname):]

	85

	86 args = [tool_prefix + 'nm', '--no-sort', '--defined-only', '--demangle']

	87 if is_archive:

	88 args.append(target)

	89 else:

	90 args.extend(target)

	91 output = subprocess.check_output(args, cwd=output_directory)

	92 lines = output.splitlines()

	93 if not lines:

	94 return '', ''

	95 is_multi_file = not lines[0]

	96 lines = iter(lines)

	97 if is_multi_file:

	98 next(lines)

	99 path = next(lines)[:-1] # Path ends with a colon.

	100 else:

	101 assert not is_archive

	102 path = target[0]

	103

	104 ret = {}

	105 while True:

	106 if is_archive:

	107 # E.g. foo/bar.a(baz.o)

	108 path = '%s(%s)' % (target, path)

	109 # The multiprocess API uses pickle, which is ridiculously slow. More than 2x

	110 # faster to use join & split.

	111 ret[path] = _ParseOneObjectFileOutput(lines)

	112 path = next(lines, ':')[:-1]

	113 if not path:

	114 return concurrent.EncodeDictOfLists(ret)

	115

	116

	117 class _BulkObjectFileAnalyzerWorker(object):

	118 """Runs nm on all given paths and returns a dict of name->[paths]"""

	119

	120 def __init__(self, tool_prefix, output_directory):

	121 self._tool_prefix = tool_prefix

	122 self._output_directory = output_directory

	123 self._batches = []

	124 self._result = None

	125

	126 def AnalyzePaths(self, paths):

	127 def iter_job_params():

	128 object_paths = []

	129 for path in paths:

	130 if path.endswith('.a'):

	131 yield path, self._tool_prefix, self._output_directory

	132 else:

	133 object_paths.append(path)

	134

	135 BATCH_SIZE = 50 # Chosen arbitrarily.

	136 for i in xrange(0, len(object_paths), BATCH_SIZE):

	137 batch = object_paths[i:i + BATCH_SIZE]

	138 yield batch, self._tool_prefix, self._output_directory

	139

	140 paths_by_name = collections.defaultdict(list)

	141 params = list(iter_job_params())

	142 for encoded_ret in concurrent.BulkForkAndCall(_BatchCollectNames, params):

	143 names_by_path = concurrent.DecodeDictOfLists(*encoded_ret)

	144 for path, names in names_by_path.iteritems():

	145 for name in names:

	146 paths_by_name[name].append(path)

	147 self._batches.append(paths_by_name)

	148

	149 def Close(self):

	150 assert self._result is None

	151 assert self._batches

	152 paths_by_name = self._batches[0]

	153 for batch in self._batches[1:]:

	154 for name, path_list in batch.iteritems():

	155 paths_by_name.setdefault(name, []).extend(path_list)

	156

	157 # It would speed up mashalling of the values by removing all entries

	158 # that have only 1 path. However, these entries are needed to give

	159 # path information to symbol aliases.

	160 self._result = paths_by_name

	161

	162 def Get(self):

	163 assert self._result is not None

	164 return self._result

	165

	166

	167 class _BulkObjectFileAnalyzerMaster(object):

	168 """Runs BulkObjectFileAnalyzer in a subprocess."""

	169

	170 def __init__(self, tool_prefix, output_directory):

	171 self._process = None

	172 self._tool_prefix = tool_prefix

	173 self._output_directory = output_directory

	174

	175 def _Spawn(self):

	176 log_level = str(logging.getLogger().getEffectiveLevel())

	177 args = [sys.executable, __file__, log_level, self._tool_prefix,

	178 self._output_directory]

	179 self._process = subprocess.Popen(

	180 args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)

	181

	182 def AnalyzePaths(self, paths):

	183 if self._process is None:

	184 self._Spawn()

	185

	186 logging.debug('Sending batch of %d paths to subprocess', len(paths))

	187 payload = '\x01'.join(paths)

	188 self._process.stdin.write('{:08x}'.format(len(payload)))

	189 self._process.stdin.write(payload)

	190

	191 def Close(self):

	192 assert not self._process.stdin.closed

	193 self._process.stdin.close()

	194

	195 def Get(self):

	196 assert self._process.stdin.closed

	197 logging.debug('Decoding nm results from forked process')

	198

	199 encoded_keys_len = int(self._process.stdout.read(8), 16)

	200 encoded_keys = self._process.stdout.read(encoded_keys_len)

	201 encoded_values = self._process.stdout.read()

	202 return concurrent.DecodeDictOfLists(encoded_keys, encoded_values)

	203

	204

	205 BulkObjectFileAnalyzer = _BulkObjectFileAnalyzerMaster

	206 if concurrent.DISABLE_ASYNC:

	207 BulkObjectFileAnalyzer = _BulkObjectFileAnalyzerWorker

	208

	209

	210 def _SubMain(log_level, tool_prefix, output_directory):

	211 logging.basicConfig(level=int(log_level),

	212 format='%(levelname).1s %(relativeCreated)6d %(message)s')

	213 bulk_analyzer = _BulkObjectFileAnalyzerWorker(tool_prefix, output_directory)

	214 while True:

	215 payload_len = int(sys.stdin.read(8) or '0', 16)

	216 if not payload_len:

	217 logging.debug('nm bulk subprocess received eof.')

	218 break

	219 paths = sys.stdin.read(payload_len).split('\x01')

	220 bulk_analyzer.AnalyzePaths(paths)

	221

	222 bulk_analyzer.Close()

	223 paths_by_name = bulk_analyzer.Get()

	224 encoded_keys, encoded_values = concurrent.EncodeDictOfLists(paths_by_name)

	225 sys.stdout.write('%08x' % len(encoded_keys))

	226 sys.stdout.write(encoded_keys)

	227 sys.stdout.write(encoded_values)

	228 logging.debug('nm bulk subprocess finished.')

	229

	230

	231 if __name__ == '__main__':

	232 _SubMain(*sys.argv[1:])

OLD	NEW