tools/binary_size/libsupersize/nm.py - Issue 2851473003: supersize: Track symbol aliases and shared symbols

Side by Side Diff: tools/binary_size/libsupersize/nm.py

Issue 2851473003: supersize: Track symbol aliases and shared symbols (Closed)

Patch Set: fix regression in calculate padding introduced in ps3 Created 3 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
(Empty)
	1 # Copyright 2017 The Chromium Authors. All rights reserved.

	2 # Use of this source code is governed by a BSD-style license that can be

	3 # found in the LICENSE file.

	4

	5 """Functions that rely on parsing output of "nm" tool."""

	6

	7 import collections

	8 import logging

	9 import os

	10 import subprocess

	11 import sys

	12

	13 import concurrent

	14

	15

	16 def CollectAliasesByAddress(elf_path, tool_prefix):

	17 """Runs nm on \|elf_path\| and returns a dict of address->[names]"""

	18 names_by_address = collections.defaultdict(list)

	19

	20 # About 60mb of output, but piping takes ~30s, and loading it into RAM

	21 # directly takes 3s.

	22 args = [tool_prefix + 'nm', '--no-sort', '--defined-only', '--demangle',

	23 elf_path]

	24 output = subprocess.check_output(args)

	25 for line in output.splitlines():

	26 address_str, section, name = line.split(' ', 2)

	27 # To verify that rodata does not have aliases:

	28 # nm --no-sort --defined-only libchrome.so > nm.out

	29 # grep -v '\$' nm.out \| grep ' r ' \| sort \| cut -d' ' -f1 > addrs

	30 # wc -l < addrs; uniq < addrs \| wc -l

	31 if section not in 'tT' or not name or name[0] == '$':

	32 continue

	33

	34 address = int(address_str, 16)

	35 if not address:

	36 continue

	37 # Constructors often show up twice.

	38 name_list = names_by_address[address]

	39 if name not in name_list:

	40 name_list.append(name)

	41

	42 # Since this is run in a separate process, minimize data passing by returning

	43 # only aliased symbols.

	44 names_by_address = {k: v for k, v in names_by_address.iteritems()

	45 if len(v) > 1}

	46

	47 return names_by_address

	48

	49

	50 def _CollectAliasesByAddressAsyncHelper(elf_path, tool_prefix):

	51 result = CollectAliasesByAddress(elf_path, tool_prefix)

	52 return concurrent.EncodeDictOfLists(result, key_transform=str)

	53

	54

	55 def CollectAliasesByAddressAsync(elf_path, tool_prefix):

	56 """Calls CollectAliasesByAddress in a helper process. Returns a Result."""

	57 def decode(encoded):

	58 return concurrent.DecodeDictOfLists(

	59 encoded[0], encoded[1], key_transform=int)

	60 return concurrent.ForkAndCall(

	61 _CollectAliasesByAddressAsyncHelper, (elf_path, tool_prefix),

	62 decode_func=decode)

	63

	64

	65 def _ParseOneObjectFileOutput(lines):

	66 ret = []

	67 for line in lines:

	68 if not line:

	69 break

	70 sep = line.find(' ') # Skip over address.

	71 sep = line.find(' ', sep + 1) # Skip over symbol type.

	72 name = line[sep + 1:]

	73 # Skip lines like:

	74 # 00000000 t $t

	75 # 00000000 r $d

	76 # 0000041b r .L.str.38

	77 if name[0] not in '$.':

	78 ret.append(name)

	79 return ret

	80

	81

	82 def _BatchCollectNames(target, tool_prefix, output_directory):

	83 is_archive = isinstance(target, basestring)

	84 # Ensure tool_prefix is absolute so that CWD does not affect it

	85 if os.path.sep in tool_prefix:

	86 # Use abspath() on the dirname to avoid it stripping a trailing /.

	87 dirname = os.path.dirname(tool_prefix)

	88 tool_prefix = os.path.abspath(dirname) + tool_prefix[len(dirname):]

	89

	90 args = [tool_prefix + 'nm', '--no-sort', '--defined-only', '--demangle']

	91 if is_archive:

	92 args.append(target)

	93 else:

	94 args.extend(target)

	95 output = subprocess.check_output(args, cwd=output_directory)

	96 lines = output.splitlines()

	97 if not lines:

	98 return '', ''

	99 is_multi_file = not lines[0]

	100 lines = iter(lines)

	101 if is_multi_file:

	102 next(lines)

	103 path = next(lines)[:-1] # Path ends with a colon.

	104 else:

	105 assert not is_archive

	106 path = target[0]

	107

	108 ret = {}

	109 while True:

	110 if is_archive:

	111 # E.g. foo/bar.a(baz.o)

	112 path = '%s(%s)' % (target, path)

	113 # The multiprocess API uses pickle, which is ridiculously slow. More than 2x

	114 # faster to use join & split.

	115 ret[path] = _ParseOneObjectFileOutput(lines)

	116 path = next(lines, ':')[:-1]

	117 if not path:

	118 return concurrent.EncodeDictOfLists(ret)

	119

	120

	121 class _BulkObjectFileAnalyzerWorker(object):

	122 """Runs nm on all given paths and returns a dict of name->[paths]"""

	123

	124 def __init__(self, tool_prefix, output_directory):

	125 self._tool_prefix = tool_prefix

	126 self._output_directory = output_directory

	127 self._batches = []

	128 self._result = None

	129

	130 def AnalyzePaths(self, paths):

	131 def iter_job_params():

	132 object_paths = []

	133 for path in paths:

	134 if path.endswith('.a'):

	135 yield path, self._tool_prefix, self._output_directory

	136 else:

	137 object_paths.append(path)

	138

	139 BATCH_SIZE = 50 # Chosen arbitrarily.

	140 for i in xrange(0, len(object_paths), BATCH_SIZE):

	141 batch = object_paths[i:i + BATCH_SIZE]

	142 yield batch, self._tool_prefix, self._output_directory

	143

	144 paths_by_name = collections.defaultdict(list)

	145 params = list(iter_job_params())

	146 for encoded_ret in concurrent.BulkForkAndCall(_BatchCollectNames, params):

	147 names_by_path = concurrent.DecodeDictOfLists(*encoded_ret)

	148 for path, names in names_by_path.iteritems():

	149 for name in names:

	150 paths_by_name[name].append(path)

	151 self._batches.append(paths_by_name)

	152

	153 def Close(self):

	154 assert self._result is None

	155 assert self._batches

	156 paths_by_name = self._batches[0]

	157 for batch in self._batches[1:]:

	158 for name, path_list in batch.iteritems():

	159 paths_by_name.setdefault(name, []).extend(path_list)

	160

	161 # It would speed up mashalling of the values by removing all entries

	162 # that have only 1 path. However, these entries are needed to give

	163 # path information to symbol aliases.

	164 self._result = paths_by_name

	165

	166 def Get(self):

	167 assert self._result is not None

	168 return self._result

	169

	170

	171 class _BulkObjectFileAnalyzerMaster(object):

	172 """Runs BulkObjectFileAnalyzer in a subprocess."""

	173

	174 def __init__(self, tool_prefix, output_directory):

	175 self._process = None

	176 self._tool_prefix = tool_prefix

	177 self._output_directory = output_directory

	178

	179 def _Spawn(self):

	180 log_level = str(logging.getLogger().getEffectiveLevel())

	181 args = [sys.executable, __file__, log_level, self._tool_prefix,

	182 self._output_directory]

	183 self._process = subprocess.Popen(

	184 args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)

	185

	186 def AnalyzePaths(self, paths):

	187 if self._process is None:

	188 self._Spawn()

	189

	190 logging.debug('Sending batch of %d paths to subprocess', len(paths))

	191 payload = '\x01'.join(paths)

	192 self._process.stdin.write('{:08x}'.format(len(payload)))

	193 self._process.stdin.write(payload)

	194

	195 def Close(self):

	196 assert not self._process.stdin.closed

	197 self._process.stdin.close()

	198

	199 def Get(self):

	200 assert self._process.stdin.closed

	201 logging.debug('Decoding nm results from forked process')

	202

	203 encoded_keys_len = int(self._process.stdout.read(8), 16)

	204 encoded_keys = self._process.stdout.read(encoded_keys_len)

	205 encoded_values = self._process.stdout.read()

	206 return concurrent.DecodeDictOfLists(encoded_keys, encoded_values)

	207

	208

	209 BulkObjectFileAnalyzer = _BulkObjectFileAnalyzerMaster

	210 if concurrent.DISABLE_ASYNC:

	211 BulkObjectFileAnalyzer = _BulkObjectFileAnalyzerWorker

	212

	213

	214 def _SubMain(log_level, tool_prefix, output_directory):

	215 logging.basicConfig(level=int(log_level),

	216 format='%(levelname).1s %(relativeCreated)6d %(message)s')

	217 bulk_analyzer = _BulkObjectFileAnalyzerWorker(tool_prefix, output_directory)

	218 while True:

	219 payload_len = int(sys.stdin.read(8) or '0', 16)

	220 if not payload_len:

	221 logging.debug('nm bulk subprocess received eof.')

	222 break

	223 paths = sys.stdin.read(payload_len).split('\x01')

	224 bulk_analyzer.AnalyzePaths(paths)

	225

	226 bulk_analyzer.Close()

	227 paths_by_name = bulk_analyzer.Get()

	228 encoded_keys, encoded_values = concurrent.EncodeDictOfLists(paths_by_name)

	229 sys.stdout.write('%08x' % len(encoded_keys))

	230 sys.stdout.write(encoded_keys)

	231 sys.stdout.write(encoded_values)

	232 logging.debug('nm bulk subprocess finished.')

	233

	234

	235 if __name__ == '__main__':

	236 _SubMain(*sys.argv[1:])

OLD	NEW

« no previous file with comments | « tools/binary_size/libsupersize/ninja_parser.py ('k') | tools/binary_size/libsupersize/template/D3SymbolTreeMap.js » ('j') | no next file with comments »