Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(274)

Side by Side Diff: tools/binary_size/libsupersize/nm.py

Issue 2832253004: supersize: nm in progress (Closed)
Patch Set: supersize: Track symbol aliases and shared symbols Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # Copyright 2017 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4
5 """Dealing with "nm" tool."""
6
7 import collections
8 import logging
9 import os
10 import subprocess
11 import sys
12
13 import concurrent
14
15
16 def CollectAliasesByAddress(elf_path, tool_prefix):
17 names_by_address = collections.defaultdict(list)
18
19 # About 60mb of output, but piping takes ~30s, and loading it into RAM
20 # directly takes 3s.
21 args = [tool_prefix + 'nm', '--no-sort', '--defined-only', '--demangle',
22 elf_path]
23 output = subprocess.check_output(args)
24 for line in output.splitlines():
25 address_str, section, name = line.split(' ', 2)
26 if section not in 'tT' or not name or name[0] == '$':
27 continue
28
29 address = int(address_str, 16) & 0xfffffffffffffffe
30 if not address:
31 continue
32 # Constructors often show up twice.
33 name_list = names_by_address[address]
34 if name not in name_list:
35 name_list.append(name)
36
37 # Since this is run in a separate process, minimize data passing by returning
38 # only aliased symbols.
39 names_by_address = {k: v for k, v in names_by_address.iteritems()
40 if len(v) > 1}
41
42 return names_by_address
43
44
45 def _CollectAliasesByAddressAsyncHelper(elf_path, tool_prefix):
46 result = CollectAliasesByAddress(elf_path, tool_prefix)
47 return concurrent.EncodeDictOfLists(result, key_transform=str)
48
49
50 def CollectAliasesByAddressAsync(elf_path, tool_prefix):
51 def decode(encoded):
52 return concurrent.DecodeDictOfLists(
53 encoded[0], encoded[1], key_transform=int)
54 return concurrent.ForkAndCall(
55 _CollectAliasesByAddressAsyncHelper, (elf_path, tool_prefix),
56 decode_func=decode)
57
58
59 def _ParseOneObjectFileOutput(lines):
60 ret = []
61 for line in lines:
62 if not line:
63 break
64 sep = line.find(' ') # Skip over address.
65 sep = line.find(' ', sep + 1) # Skip over symbol type.
66 name = line[sep + 1:]
67 # Skip lines like:
68 # 00000000 t $t
69 # 00000000 r $d
70 # 0000041b r .L.str.38
71 if name[0] not in '$.':
72 ret.append(name)
73 return ret
74
75
76 def _BatchCollectNames(target, tool_prefix, output_directory):
77 is_archive = isinstance(target, basestring)
78 # Ensure tool_prefix is absolute so that CWD does not affect it
79 if os.path.sep in tool_prefix:
80 # Use abspath() on the dirname to avoid it stripping a trailing /.
81 dirname = os.path.dirname(tool_prefix)
82 tool_prefix = os.path.abspath(dirname) + tool_prefix[len(dirname):]
83
84 args = [tool_prefix + 'nm', '--no-sort', '--defined-only', '--demangle']
85 if is_archive:
86 args.append(target)
87 else:
88 args.extend(target)
89 output = subprocess.check_output(args, cwd=output_directory)
90 lines = output.splitlines()
91 if not lines:
92 return '', ''
93 is_multi_file = not lines[0]
94 lines = iter(lines)
95 if is_multi_file:
96 next(lines)
97 path = next(lines)[:-1] # Path ends with a colon.
98 else:
99 assert not is_archive
100 path = target[0]
101
102 ret = {}
103 while True:
104 if is_archive:
105 # E.g. foo/bar.a(baz.o)
106 path = '%s(%s)' % (target, path)
107 # The multiprocess API uses pickle, which is ridiculously slow. More than 2x
108 # faster to use join & split.
109 ret[path] = _ParseOneObjectFileOutput(lines)
110 path = next(lines, ':')[:-1]
111 if not path:
112 return concurrent.EncodeDictOfLists(ret)
113
114
115 def BulkAnalyzeObjectFiles(paths, tool_prefix, output_directory):
116 def iter_job_params():
117 object_paths = []
118 for path in paths:
119 if path.endswith('.a'):
120 yield path, tool_prefix, output_directory
121 else:
122 object_paths.append(path)
123
124 BATCH_SIZE = 50 # Chosen arbitrarily.
125 for i in xrange(0, len(object_paths), BATCH_SIZE):
126 batch = object_paths[i:i + BATCH_SIZE]
127 yield batch, tool_prefix, output_directory
128
129 paths_by_name = collections.defaultdict(list)
130 params = iter_job_params()
131 for encoded_ret in concurrent.IterForkAndCall(_BatchCollectNames, params):
132 names_by_path = concurrent.DecodeDictOfLists(*encoded_ret)
133 for path, names in names_by_path.iteritems():
134 for name in names:
135 paths_by_name[name].append(path)
136
137 # TODO(agrieve): Combining the two calls to BulkAnalyzeObjectFiles() would
138 # allow pruning entries from this dict with only one path, and make
139 # marshalling it back much faster.
140 return paths_by_name
141
142
143 def BulkAnalyzeObjectFilesAsync(paths, tool_prefix, output_directory):
144 def analyze_async_internal():
145 # Forked processes cannot spawn further background processes, so use
146 # a subprocess instead.
147 log_level = str(logging.getLogger().getEffectiveLevel())
148 args = [sys.executable, __file__, log_level, tool_prefix, output_directory]
149 proc = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
150
151 stdout = proc.communicate('\x01'.join(paths))[0]
152 assert proc.returncode == 0
153 logging.debug('Decoding nm results from forked process')
154
155 encoded_len = int(stdout[:8], 16)
156 return concurrent.DecodeDictOfLists(stdout[8:8 + encoded_len],
157 stdout[8 + encoded_len:])
158 return concurrent.CallOnThread(analyze_async_internal)
159
160
161 def main():
162 log_level, tool_prefix, output_directory = sys.argv[1:]
163 logging.basicConfig(level=int(log_level),
164 format='%(levelname).1s %(relativeCreated)6d %(message)s')
165 paths = sys.stdin.read().split('\x01')
166 paths_by_name = BulkAnalyzeObjectFiles(paths, tool_prefix, output_directory)
167 encoded_keys, encoded_values = concurrent.EncodeDictOfLists(paths_by_name)
168 sys.stdout.write('%08x' % len(encoded_keys))
169 sys.stdout.write(encoded_keys)
170 sys.stdout.write(encoded_values)
171 logging.debug('nm bulk subprocess finished.')
172
173
174 if __name__ == '__main__':
175 main()
OLDNEW
« no previous file with comments | « tools/binary_size/libsupersize/ninja_parser.py ('k') | tools/binary_size/libsupersize/template/D3SymbolTreeMap.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698