Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2)

Side by Side Diff: tools/binary_size/libsupersize/nm.py

Issue 2851473003: supersize: Track symbol aliases and shared symbols (Closed)
Patch Set: tweak nm interfface Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # Copyright 2017 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4
5 """Functions that rely on parsing output of "nm" tool."""
6
7 import collections
8 import logging
9 import os
10 import subprocess
11 import sys
12
13 import concurrent
14
15
16 def CollectAliasesByAddress(elf_path, tool_prefix):
17 """Runs nm on |elf_path| and returns a dict of address->[names]"""
18 names_by_address = collections.defaultdict(list)
19
20 # About 60mb of output, but piping takes ~30s, and loading it into RAM
21 # directly takes 3s.
22 args = [tool_prefix + 'nm', '--no-sort', '--defined-only', '--demangle',
23 elf_path]
24 output = subprocess.check_output(args)
25 for line in output.splitlines():
26 address_str, section, name = line.split(' ', 2)
27 if section not in 'tT' or not name or name[0] == '$':
estevenson 2017/04/28 17:06:11 should readonly symbols be included in this?
agrieve 2017/04/28 19:26:59 Added a comment. Looks like no.
28 continue
29
30 address = int(address_str, 16) & 0xfffffffffffffffe
estevenson 2017/04/28 17:06:11 This needs a comment
agrieve 2017/04/28 19:26:59 Turns out it wasn't needed :P
31 if not address:
32 continue
33 # Constructors often show up twice.
34 name_list = names_by_address[address]
35 if name not in name_list:
36 name_list.append(name)
37
38 # Since this is run in a separate process, minimize data passing by returning
39 # only aliased symbols.
40 names_by_address = {k: v for k, v in names_by_address.iteritems()
41 if len(v) > 1}
42
43 return names_by_address
44
45
46 def _CollectAliasesByAddressAsyncHelper(elf_path, tool_prefix):
47 result = CollectAliasesByAddress(elf_path, tool_prefix)
48 return concurrent.EncodeDictOfLists(result, key_transform=str)
49
50
51 def CollectAliasesByAddressAsync(elf_path, tool_prefix):
52 """Calls CollectAliasesByAddress in a helper process. Returns a Result."""
53 def decode(encoded):
54 return concurrent.DecodeDictOfLists(
55 encoded[0], encoded[1], key_transform=int)
56 return concurrent.ForkAndCall(
57 _CollectAliasesByAddressAsyncHelper, (elf_path, tool_prefix),
58 decode_func=decode)
59
60
61 def _ParseOneObjectFileOutput(lines):
62 ret = []
63 for line in lines:
64 if not line:
65 break
66 sep = line.find(' ') # Skip over address.
67 sep = line.find(' ', sep + 1) # Skip over symbol type.
68 name = line[sep + 1:]
69 # Skip lines like:
70 # 00000000 t $t
71 # 00000000 r $d
72 # 0000041b r .L.str.38
73 if name[0] not in '$.':
74 ret.append(name)
75 return ret
76
77
78 def _BatchCollectNames(target, tool_prefix, output_directory):
79 is_archive = isinstance(target, basestring)
80 # Ensure tool_prefix is absolute so that CWD does not affect it
81 if os.path.sep in tool_prefix:
82 # Use abspath() on the dirname to avoid it stripping a trailing /.
83 dirname = os.path.dirname(tool_prefix)
84 tool_prefix = os.path.abspath(dirname) + tool_prefix[len(dirname):]
85
86 args = [tool_prefix + 'nm', '--no-sort', '--defined-only', '--demangle']
87 if is_archive:
88 args.append(target)
89 else:
90 args.extend(target)
91 output = subprocess.check_output(args, cwd=output_directory)
92 lines = output.splitlines()
93 if not lines:
94 return '', ''
95 is_multi_file = not lines[0]
96 lines = iter(lines)
97 if is_multi_file:
98 next(lines)
99 path = next(lines)[:-1] # Path ends with a colon.
100 else:
101 assert not is_archive
102 path = target[0]
103
104 ret = {}
105 while True:
106 if is_archive:
107 # E.g. foo/bar.a(baz.o)
108 path = '%s(%s)' % (target, path)
109 # The multiprocess API uses pickle, which is ridiculously slow. More than 2x
110 # faster to use join & split.
111 ret[path] = _ParseOneObjectFileOutput(lines)
112 path = next(lines, ':')[:-1]
113 if not path:
114 return concurrent.EncodeDictOfLists(ret)
115
116
117 class _BulkObjectFileAnalyzerWorker(object):
118 """Runs nm on all given paths and returns a dict of name->[paths]"""
119
120 def __init__(self, tool_prefix, output_directory):
121 self._tool_prefix = tool_prefix
122 self._output_directory = output_directory
123 self._batches = []
124 self._result = None
125
126 def AnalyzePaths(self, paths):
127 def iter_job_params():
128 object_paths = []
129 for path in paths:
130 if path.endswith('.a'):
131 yield path, self._tool_prefix, self._output_directory
132 else:
133 object_paths.append(path)
134
135 BATCH_SIZE = 50 # Chosen arbitrarily.
136 for i in xrange(0, len(object_paths), BATCH_SIZE):
137 batch = object_paths[i:i + BATCH_SIZE]
138 yield batch, self._tool_prefix, self._output_directory
139
140 paths_by_name = collections.defaultdict(list)
141 params = list(iter_job_params())
142 for encoded_ret in concurrent.BulkForkAndCall(_BatchCollectNames, params):
143 names_by_path = concurrent.DecodeDictOfLists(*encoded_ret)
144 for path, names in names_by_path.iteritems():
145 for name in names:
146 paths_by_name[name].append(path)
147 self._batches.append(paths_by_name)
148
149 def Close(self):
150 assert self._result is None
151 assert self._batches
152 paths_by_name = self._batches[0]
153 for batch in self._batches[1:]:
154 for name, path_list in batch.iteritems():
155 paths_by_name.setdefault(name, []).extend(path_list)
156
157 # It would speed up mashalling of the values by removing all entries
158 # that have only 1 path. However, these entries are needed to give
159 # path information to symbol aliases.
160 self._result = paths_by_name
161
162 def Get(self):
163 assert self._result is not None
164 return self._result
165
166
167 class _BulkObjectFileAnalyzerMaster(object):
168 """Runs BulkObjectFileAnalyzer in a subprocess."""
169
170 def __init__(self, tool_prefix, output_directory):
171 self._process = None
172 self._tool_prefix = tool_prefix
173 self._output_directory = output_directory
174
175 def _Spawn(self):
176 log_level = str(logging.getLogger().getEffectiveLevel())
177 args = [sys.executable, __file__, log_level, self._tool_prefix,
178 self._output_directory]
179 self._process = subprocess.Popen(
180 args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
181
182 def AnalyzePaths(self, paths):
183 if self._process is None:
184 self._Spawn()
185
186 logging.debug('Sending batch of %d paths to subprocess', len(paths))
187 payload = '\x01'.join(paths)
188 self._process.stdin.write('{:08x}'.format(len(payload)))
189 self._process.stdin.write(payload)
190
191 def Close(self):
192 assert not self._process.stdin.closed
193 self._process.stdin.close()
194
195 def Get(self):
196 assert self._process.stdin.closed
197 logging.debug('Decoding nm results from forked process')
198
199 encoded_keys_len = int(self._process.stdout.read(8), 16)
200 encoded_keys = self._process.stdout.read(encoded_keys_len)
201 encoded_values = self._process.stdout.read()
202 return concurrent.DecodeDictOfLists(encoded_keys, encoded_values)
203
204
205 BulkObjectFileAnalyzer = _BulkObjectFileAnalyzerMaster
206 if concurrent.DISABLE_ASYNC:
207 BulkObjectFileAnalyzer = _BulkObjectFileAnalyzerWorker
208
209
210 def _SubMain(log_level, tool_prefix, output_directory):
211 logging.basicConfig(level=int(log_level),
212 format='%(levelname).1s %(relativeCreated)6d %(message)s')
213 bulk_analyzer = _BulkObjectFileAnalyzerWorker(tool_prefix, output_directory)
214 while True:
215 payload_len = int(sys.stdin.read(8) or '0', 16)
216 if not payload_len:
217 logging.debug('nm bulk subprocess received eof.')
218 break
219 paths = sys.stdin.read(payload_len).split('\x01')
220 bulk_analyzer.AnalyzePaths(paths)
221
222 bulk_analyzer.Close()
223 paths_by_name = bulk_analyzer.Get()
224 encoded_keys, encoded_values = concurrent.EncodeDictOfLists(paths_by_name)
225 sys.stdout.write('%08x' % len(encoded_keys))
226 sys.stdout.write(encoded_keys)
227 sys.stdout.write(encoded_values)
228 logging.debug('nm bulk subprocess finished.')
229
230
231 if __name__ == '__main__':
232 _SubMain(*sys.argv[1:])
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698