Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2)

Side by Side Diff: tools/binary_size/libsupersize/nm.py

Issue 2851473003: supersize: Track symbol aliases and shared symbols (Closed)
Patch Set: fix regression in calculate padding introduced in ps3 Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # Copyright 2017 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4
5 """Functions that rely on parsing output of "nm" tool."""
6
7 import collections
8 import logging
9 import os
10 import subprocess
11 import sys
12
13 import concurrent
14
15
16 def CollectAliasesByAddress(elf_path, tool_prefix):
17 """Runs nm on |elf_path| and returns a dict of address->[names]"""
18 names_by_address = collections.defaultdict(list)
19
20 # About 60mb of output, but piping takes ~30s, and loading it into RAM
21 # directly takes 3s.
22 args = [tool_prefix + 'nm', '--no-sort', '--defined-only', '--demangle',
23 elf_path]
24 output = subprocess.check_output(args)
25 for line in output.splitlines():
26 address_str, section, name = line.split(' ', 2)
27 # To verify that rodata does not have aliases:
28 # nm --no-sort --defined-only libchrome.so > nm.out
29 # grep -v '\$' nm.out | grep ' r ' | sort | cut -d' ' -f1 > addrs
30 # wc -l < addrs; uniq < addrs | wc -l
31 if section not in 'tT' or not name or name[0] == '$':
32 continue
33
34 address = int(address_str, 16)
35 if not address:
36 continue
37 # Constructors often show up twice.
38 name_list = names_by_address[address]
39 if name not in name_list:
40 name_list.append(name)
41
42 # Since this is run in a separate process, minimize data passing by returning
43 # only aliased symbols.
44 names_by_address = {k: v for k, v in names_by_address.iteritems()
45 if len(v) > 1}
46
47 return names_by_address
48
49
50 def _CollectAliasesByAddressAsyncHelper(elf_path, tool_prefix):
51 result = CollectAliasesByAddress(elf_path, tool_prefix)
52 return concurrent.EncodeDictOfLists(result, key_transform=str)
53
54
55 def CollectAliasesByAddressAsync(elf_path, tool_prefix):
56 """Calls CollectAliasesByAddress in a helper process. Returns a Result."""
57 def decode(encoded):
58 return concurrent.DecodeDictOfLists(
59 encoded[0], encoded[1], key_transform=int)
60 return concurrent.ForkAndCall(
61 _CollectAliasesByAddressAsyncHelper, (elf_path, tool_prefix),
62 decode_func=decode)
63
64
65 def _ParseOneObjectFileOutput(lines):
66 ret = []
67 for line in lines:
68 if not line:
69 break
70 sep = line.find(' ') # Skip over address.
71 sep = line.find(' ', sep + 1) # Skip over symbol type.
72 name = line[sep + 1:]
73 # Skip lines like:
74 # 00000000 t $t
75 # 00000000 r $d
76 # 0000041b r .L.str.38
77 if name[0] not in '$.':
78 ret.append(name)
79 return ret
80
81
82 def _BatchCollectNames(target, tool_prefix, output_directory):
83 is_archive = isinstance(target, basestring)
84 # Ensure tool_prefix is absolute so that CWD does not affect it
85 if os.path.sep in tool_prefix:
86 # Use abspath() on the dirname to avoid it stripping a trailing /.
87 dirname = os.path.dirname(tool_prefix)
88 tool_prefix = os.path.abspath(dirname) + tool_prefix[len(dirname):]
89
90 args = [tool_prefix + 'nm', '--no-sort', '--defined-only', '--demangle']
91 if is_archive:
92 args.append(target)
93 else:
94 args.extend(target)
95 output = subprocess.check_output(args, cwd=output_directory)
96 lines = output.splitlines()
97 if not lines:
98 return '', ''
99 is_multi_file = not lines[0]
100 lines = iter(lines)
101 if is_multi_file:
102 next(lines)
103 path = next(lines)[:-1] # Path ends with a colon.
104 else:
105 assert not is_archive
106 path = target[0]
107
108 ret = {}
109 while True:
110 if is_archive:
111 # E.g. foo/bar.a(baz.o)
112 path = '%s(%s)' % (target, path)
113 # The multiprocess API uses pickle, which is ridiculously slow. More than 2x
114 # faster to use join & split.
115 ret[path] = _ParseOneObjectFileOutput(lines)
116 path = next(lines, ':')[:-1]
117 if not path:
118 return concurrent.EncodeDictOfLists(ret)
119
120
121 class _BulkObjectFileAnalyzerWorker(object):
122 """Runs nm on all given paths and returns a dict of name->[paths]"""
123
124 def __init__(self, tool_prefix, output_directory):
125 self._tool_prefix = tool_prefix
126 self._output_directory = output_directory
127 self._batches = []
128 self._result = None
129
130 def AnalyzePaths(self, paths):
131 def iter_job_params():
132 object_paths = []
133 for path in paths:
134 if path.endswith('.a'):
135 yield path, self._tool_prefix, self._output_directory
136 else:
137 object_paths.append(path)
138
139 BATCH_SIZE = 50 # Chosen arbitrarily.
140 for i in xrange(0, len(object_paths), BATCH_SIZE):
141 batch = object_paths[i:i + BATCH_SIZE]
142 yield batch, self._tool_prefix, self._output_directory
143
144 paths_by_name = collections.defaultdict(list)
145 params = list(iter_job_params())
146 for encoded_ret in concurrent.BulkForkAndCall(_BatchCollectNames, params):
147 names_by_path = concurrent.DecodeDictOfLists(*encoded_ret)
148 for path, names in names_by_path.iteritems():
149 for name in names:
150 paths_by_name[name].append(path)
151 self._batches.append(paths_by_name)
152
153 def Close(self):
154 assert self._result is None
155 assert self._batches
156 paths_by_name = self._batches[0]
157 for batch in self._batches[1:]:
158 for name, path_list in batch.iteritems():
159 paths_by_name.setdefault(name, []).extend(path_list)
160
161 # It would speed up mashalling of the values by removing all entries
162 # that have only 1 path. However, these entries are needed to give
163 # path information to symbol aliases.
164 self._result = paths_by_name
165
166 def Get(self):
167 assert self._result is not None
168 return self._result
169
170
171 class _BulkObjectFileAnalyzerMaster(object):
172 """Runs BulkObjectFileAnalyzer in a subprocess."""
173
174 def __init__(self, tool_prefix, output_directory):
175 self._process = None
176 self._tool_prefix = tool_prefix
177 self._output_directory = output_directory
178
179 def _Spawn(self):
180 log_level = str(logging.getLogger().getEffectiveLevel())
181 args = [sys.executable, __file__, log_level, self._tool_prefix,
182 self._output_directory]
183 self._process = subprocess.Popen(
184 args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
185
186 def AnalyzePaths(self, paths):
187 if self._process is None:
188 self._Spawn()
189
190 logging.debug('Sending batch of %d paths to subprocess', len(paths))
191 payload = '\x01'.join(paths)
192 self._process.stdin.write('{:08x}'.format(len(payload)))
193 self._process.stdin.write(payload)
194
195 def Close(self):
196 assert not self._process.stdin.closed
197 self._process.stdin.close()
198
199 def Get(self):
200 assert self._process.stdin.closed
201 logging.debug('Decoding nm results from forked process')
202
203 encoded_keys_len = int(self._process.stdout.read(8), 16)
204 encoded_keys = self._process.stdout.read(encoded_keys_len)
205 encoded_values = self._process.stdout.read()
206 return concurrent.DecodeDictOfLists(encoded_keys, encoded_values)
207
208
209 BulkObjectFileAnalyzer = _BulkObjectFileAnalyzerMaster
210 if concurrent.DISABLE_ASYNC:
211 BulkObjectFileAnalyzer = _BulkObjectFileAnalyzerWorker
212
213
214 def _SubMain(log_level, tool_prefix, output_directory):
215 logging.basicConfig(level=int(log_level),
216 format='%(levelname).1s %(relativeCreated)6d %(message)s')
217 bulk_analyzer = _BulkObjectFileAnalyzerWorker(tool_prefix, output_directory)
218 while True:
219 payload_len = int(sys.stdin.read(8) or '0', 16)
220 if not payload_len:
221 logging.debug('nm bulk subprocess received eof.')
222 break
223 paths = sys.stdin.read(payload_len).split('\x01')
224 bulk_analyzer.AnalyzePaths(paths)
225
226 bulk_analyzer.Close()
227 paths_by_name = bulk_analyzer.Get()
228 encoded_keys, encoded_values = concurrent.EncodeDictOfLists(paths_by_name)
229 sys.stdout.write('%08x' % len(encoded_keys))
230 sys.stdout.write(encoded_keys)
231 sys.stdout.write(encoded_values)
232 logging.debug('nm bulk subprocess finished.')
233
234
235 if __name__ == '__main__':
236 _SubMain(*sys.argv[1:])
OLDNEW
« no previous file with comments | « tools/binary_size/libsupersize/ninja_parser.py ('k') | tools/binary_size/libsupersize/template/D3SymbolTreeMap.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698