OLD | NEW |
1 #!/usr/bin/python | 1 #!/usr/bin/env python |
2 # Copyright 2014 The Chromium Authors. All rights reserved. | 2 # Copyright 2014 The Chromium Authors. All rights reserved. |
3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
5 | 5 |
6 """Generate a spatial analysis against an arbitrary library. | 6 """Generate a spatial analysis against an arbitrary library. |
7 | 7 |
8 To use, build the 'binary_size_tool' target. Then run this tool, passing | 8 To use, build the 'binary_size_tool' target. Then run this tool, passing |
9 in the location of the library to be analyzed along with any other options | 9 in the location of the library to be analyzed along with any other options |
10 you desire. | 10 you desire. |
11 """ | 11 """ |
12 | 12 |
13 import collections | 13 import collections |
14 import fileinput | |
15 import json | 14 import json |
| 15 import logging |
| 16 import multiprocessing |
16 import optparse | 17 import optparse |
17 import os | 18 import os |
18 import pprint | |
19 import re | 19 import re |
20 import shutil | 20 import shutil |
21 import subprocess | 21 import subprocess |
22 import sys | 22 import sys |
23 import tempfile | 23 import tempfile |
| 24 import time |
| 25 |
| 26 import binary_size_utils |
| 27 |
| 28 # This path changee is not beautiful. Temporary (I hope) measure until |
| 29 # the chromium project has figured out a proper way to organize the |
| 30 # library of python tools. http://crbug.com/375725 |
| 31 elf_symbolizer_path = os.path.abspath(os.path.join( |
| 32 os.path.dirname(__file__), |
| 33 '..', |
| 34 '..', |
| 35 'build', |
| 36 'android', |
| 37 'pylib')) |
| 38 sys.path.append(elf_symbolizer_path) |
| 39 import symbols.elf_symbolizer as elf_symbolizer |
24 | 40 |
25 | 41 |
26 # TODO(andrewhayden): Only used for legacy reports. Delete. | 42 # TODO(andrewhayden): Only used for legacy reports. Delete. |
27 def FormatBytes(bytes): | 43 def FormatBytes(byte_count): |
28 """Pretty-print a number of bytes.""" | 44 """Pretty-print a number of bytes.""" |
29 if bytes > 1e6: | 45 if byte_count > 1e6: |
30 bytes = bytes / 1.0e6 | 46 byte_count = byte_count / 1.0e6 |
31 return '%.1fm' % bytes | 47 return '%.1fm' % byte_count |
32 if bytes > 1e3: | 48 if byte_count > 1e3: |
33 bytes = bytes / 1.0e3 | 49 byte_count = byte_count / 1.0e3 |
34 return '%.1fk' % bytes | 50 return '%.1fk' % byte_count |
35 return str(bytes) | 51 return str(byte_count) |
36 | 52 |
37 | 53 |
38 # TODO(andrewhayden): Only used for legacy reports. Delete. | 54 # TODO(andrewhayden): Only used for legacy reports. Delete. |
39 def SymbolTypeToHuman(type): | 55 def SymbolTypeToHuman(symbol_type): |
40 """Convert a symbol type as printed by nm into a human-readable name.""" | 56 """Convert a symbol type as printed by nm into a human-readable name.""" |
41 return {'b': 'bss', | 57 return {'b': 'bss', |
42 'd': 'data', | 58 'd': 'data', |
43 'r': 'read-only data', | 59 'r': 'read-only data', |
44 't': 'code', | 60 't': 'code', |
45 'w': 'weak symbol', | 61 'w': 'weak symbol', |
46 'v': 'weak symbol'}[type] | 62 'v': 'weak symbol'}[symbol_type] |
47 | |
48 | |
49 def ParseNm(input): | |
50 """Parse nm output. | |
51 | |
52 Argument: an iterable over lines of nm output. | |
53 | |
54 Yields: (symbol name, symbol type, symbol size, source file path). | |
55 Path may be None if nm couldn't figure out the source file. | |
56 """ | |
57 | |
58 # Match lines with size, symbol, optional location, optional discriminator | |
59 sym_re = re.compile(r'^[0-9a-f]{8} ' # address (8 hex digits) | |
60 '([0-9a-f]{8}) ' # size (8 hex digits) | |
61 '(.) ' # symbol type, one character | |
62 '([^\t]+)' # symbol name, separated from next by tab | |
63 '(?:\t(.*):[\d\?]+)?.*$') # location | |
64 # Match lines with addr but no size. | |
65 addr_re = re.compile(r'^[0-9a-f]{8} (.) ([^\t]+)(?:\t.*)?$') | |
66 # Match lines that don't have an address at all -- typically external symbols. | |
67 noaddr_re = re.compile(r'^ {8} (.) (.*)$') | |
68 | |
69 for line in input: | |
70 line = line.rstrip() | |
71 match = sym_re.match(line) | |
72 if match: | |
73 size, type, sym = match.groups()[0:3] | |
74 size = int(size, 16) | |
75 if type.lower() == 'b': | |
76 continue # skip all BSS for now | |
77 path = match.group(4) | |
78 yield sym, type, size, path | |
79 continue | |
80 match = addr_re.match(line) | |
81 if match: | |
82 type, sym = match.groups()[0:2] | |
83 # No size == we don't care. | |
84 continue | |
85 match = noaddr_re.match(line) | |
86 if match: | |
87 type, sym = match.groups() | |
88 if type in ('U', 'w'): | |
89 # external or weak symbol | |
90 continue | |
91 | |
92 print >>sys.stderr, 'unparsed:', repr(line) | |
93 | 63 |
94 | 64 |
95 def _MkChild(node, name): | 65 def _MkChild(node, name): |
96 child = None | 66 child = node['children'].get(name) |
97 for test in node['children']: | 67 if child is None: |
98 if test['n'] == name: | 68 child = {'n': name, 'children': {}} |
99 child = test | 69 node['children'][name] = child |
100 break | |
101 if not child: | |
102 child = {'n': name, 'children': []} | |
103 node['children'].append(child) | |
104 return child | 70 return child |
105 | 71 |
106 | 72 |
| 73 def MakeChildrenDictsIntoLists(node): |
| 74 largest_list_len = 0 |
| 75 if 'children' in node: |
| 76 largest_list_len = len(node['children']) |
| 77 child_list = [] |
| 78 for child in node['children'].itervalues(): |
| 79 child_largest_list_len = MakeChildrenDictsIntoLists(child) |
| 80 if child_largest_list_len > largest_list_len: |
| 81 largest_list_len = child_largest_list_len |
| 82 child_list.append(child) |
| 83 node['children'] = child_list |
| 84 |
| 85 return largest_list_len |
| 86 |
| 87 |
107 def MakeCompactTree(symbols): | 88 def MakeCompactTree(symbols): |
108 result = {'n': '/', 'children': [], 'k': 'p', 'maxDepth': 0} | 89 result = {'n': '/', 'children': {}, 'k': 'p', 'maxDepth': 0} |
| 90 seen_symbol_with_path = False |
109 for symbol_name, symbol_type, symbol_size, file_path in symbols: | 91 for symbol_name, symbol_type, symbol_size, file_path in symbols: |
110 | 92 |
111 if 'vtable for ' in symbol_name: | 93 if 'vtable for ' in symbol_name: |
112 symbol_type = '@' # hack to categorize these separately | 94 symbol_type = '@' # hack to categorize these separately |
113 # Take path like '/foo/bar/baz', convert to ['foo', 'bar', 'baz'] | 95 # Take path like '/foo/bar/baz', convert to ['foo', 'bar', 'baz'] |
114 if file_path: | 96 if file_path: |
115 file_path = os.path.normpath(file_path) | 97 file_path = os.path.normpath(file_path) |
| 98 seen_symbol_with_path = True |
116 else: | 99 else: |
117 file_path = '(No Path)' | 100 file_path = '(No Path)' |
118 | 101 |
119 if file_path.startswith('/'): | 102 if file_path.startswith('/'): |
120 file_path = file_path[1:] | 103 file_path = file_path[1:] |
121 path_parts = file_path.split('/') | 104 path_parts = file_path.split('/') |
122 | 105 |
123 # Find pre-existing node in tree, or update if it already exists | 106 # Find pre-existing node in tree, or update if it already exists |
124 node = result | 107 node = result |
125 depth = 0 | 108 depth = 0 |
126 while len(path_parts) > 0: | 109 while len(path_parts) > 0: |
127 path_part = path_parts.pop(0) | 110 path_part = path_parts.pop(0) |
128 if len(path_part) == 0: | 111 if len(path_part) == 0: |
129 continue | 112 continue |
130 depth += 1 | 113 depth += 1 |
131 node = _MkChild(node, path_part); | 114 node = _MkChild(node, path_part) |
| 115 assert not 'k' in node or node['k'] == 'p' |
132 node['k'] = 'p' # p for path | 116 node['k'] = 'p' # p for path |
133 | 117 |
134 # 'node' is now the file node. Find the symbol-type bucket. | 118 # 'node' is now the file node. Find the symbol-type bucket. |
135 node['lastPathElement'] = True | 119 node['lastPathElement'] = True |
136 node = _MkChild(node, symbol_type) | 120 node = _MkChild(node, symbol_type) |
| 121 assert not 'k' in node or node['k'] == 'b' |
137 node['t'] = symbol_type | 122 node['t'] = symbol_type |
138 node['k'] = 'b' # b for bucket | 123 node['k'] = 'b' # b for bucket |
139 depth += 1 | 124 depth += 1 |
140 | 125 |
141 # 'node' is now the symbol-type bucket. Make the child entry. | 126 # 'node' is now the symbol-type bucket. Make the child entry. |
142 node = _MkChild(node, symbol_name) | 127 node = _MkChild(node, symbol_name) |
143 if 'children' in node: # Only possible if we're adding duplicate entries!!! | 128 if 'children' in node: |
| 129 if node['children']: |
| 130 logging.warning('A container node used as symbol for %s.' % symbol_name) |
| 131 # This is going to be used as a leaf so no use for child list. |
144 del node['children'] | 132 del node['children'] |
145 node['value'] = symbol_size | 133 node['value'] = symbol_size |
146 node['t'] = symbol_type | 134 node['t'] = symbol_type |
147 node['k'] = 's' # s for symbol | 135 node['k'] = 's' # s for symbol |
148 depth += 1 | 136 depth += 1 |
149 result['maxDepth'] = max(result['maxDepth'], depth); | 137 result['maxDepth'] = max(result['maxDepth'], depth) |
150 | 138 |
| 139 if not seen_symbol_with_path: |
| 140 logging.warning('Symbols lack paths. Data will not be structured.') |
| 141 |
| 142 largest_list_len = MakeChildrenDictsIntoLists(result) |
| 143 |
| 144 if largest_list_len > 1000: |
| 145 logging.warning('There are sections with %d nodes. ' |
| 146 'Results might be unusable.' % largest_list_len) |
151 return result | 147 return result |
152 | 148 |
153 | 149 |
154 # TODO(andrewhayden): Only used for legacy reports. Delete. | 150 # TODO(andrewhayden): Only used for legacy reports. Delete. |
155 def TreeifySymbols(symbols): | 151 def TreeifySymbols(symbols): |
156 """Convert symbols into a path-based tree, calculating size information | 152 """Convert symbols into a path-based tree, calculating size information |
157 along the way. | 153 along the way. |
158 | 154 |
159 The result is a dictionary that contains two kinds of nodes: | 155 The result is a dictionary that contains two kinds of nodes: |
160 1. Leaf nodes, representing source code locations (e.g., c++ files) | 156 1. Leaf nodes, representing source code locations (e.g., c++ files) |
161 These nodes have the following dictionary entries: | 157 These nodes have the following dictionary entries: |
162 sizes: a dictionary whose keys are categories (such as code, data, | 158 sizes: a dictionary whose keys are categories (such as code, data, |
163 vtable, etceteras) and whose values are the size, in bytes, of | 159 vtable, etceteras) and whose values are the size, in bytes, of |
164 those categories; | 160 those categories; |
165 size: the total size, in bytes, of all the entries in the sizes dict | 161 size: the total size, in bytes, of all the entries in the sizes dict |
166 2. Non-leaf nodes, representing directories | 162 2. Non-leaf nodes, representing directories |
167 These nodes have the following dictionary entries: | 163 These nodes have the following dictionary entries: |
168 children: a dictionary whose keys are names (path entries; either | 164 children: a dictionary whose keys are names (path entries; either |
169 directory or file names) and whose values are other nodes; | 165 directory or file names) and whose values are other nodes; |
170 size: the total size, in bytes, of all the leaf nodes that are | 166 size: the total size, in bytes, of all the leaf nodes that are |
171 contained within the children dict (recursively expanded) | 167 contained within the children dict (recursively expanded) |
172 | 168 |
173 The result object is itself a dictionary that represents the common ancestor | 169 The result object is itself a dictionary that represents the common ancestor |
174 of all child nodes, e.g. a path to which all other nodes beneath it are | 170 of all child nodes, e.g. a path to which all other nodes beneath it are |
175 relative. The 'size' attribute of this dict yields the sum of the size of all | 171 relative. The 'size' attribute of this dict yields the sum of the size of all |
176 leaf nodes within the data structure. | 172 leaf nodes within the data structure. |
177 """ | 173 """ |
178 dirs = {'children': {}, 'size': 0} | 174 dirs = {'children': {}, 'size': 0} |
179 for sym, type, size, path in symbols: | 175 for sym, symbol_type, size, path in symbols: |
180 dirs['size'] += size | 176 dirs['size'] += size |
181 if path: | 177 if path: |
182 path = os.path.normpath(path) | 178 path = os.path.normpath(path) |
183 if path.startswith('/'): | 179 if path.startswith('/'): |
184 path = path[1:] | 180 path = path[1:] |
185 | 181 |
186 parts = None | 182 parts = None |
187 if path: | 183 if path: |
188 parts = path.split('/') | 184 parts = path.split('/') |
189 | 185 |
(...skipping 12 matching lines...) Expand all Loading... |
202 | 198 |
203 # Get (creating if necessary) the node for the file | 199 # Get (creating if necessary) the node for the file |
204 # This node doesn't have a 'children' attribute | 200 # This node doesn't have a 'children' attribute |
205 if file_key not in tree['children']: | 201 if file_key not in tree['children']: |
206 tree['children'][file_key] = {'sizes': collections.defaultdict(int), | 202 tree['children'][file_key] = {'sizes': collections.defaultdict(int), |
207 'size': 0} | 203 'size': 0} |
208 tree = tree['children'][file_key] | 204 tree = tree['children'][file_key] |
209 tree['size'] += size | 205 tree['size'] += size |
210 | 206 |
211 # Accumulate size into a bucket within the file | 207 # Accumulate size into a bucket within the file |
212 type = type.lower() | 208 symbol_type = symbol_type.lower() |
213 if 'vtable for ' in sym: | 209 if 'vtable for ' in sym: |
214 tree['sizes']['[vtable]'] += size | 210 tree['sizes']['[vtable]'] += size |
215 elif 'r' == type: | 211 elif 'r' == symbol_type: |
216 tree['sizes']['[rodata]'] += size | 212 tree['sizes']['[rodata]'] += size |
217 elif 'd' == type: | 213 elif 'd' == symbol_type: |
218 tree['sizes']['[data]'] += size | 214 tree['sizes']['[data]'] += size |
219 elif 'b' == type: | 215 elif 'b' == symbol_type: |
220 tree['sizes']['[bss]'] += size | 216 tree['sizes']['[bss]'] += size |
221 elif 't' == type: | 217 elif 't' == symbol_type: |
222 # 'text' in binary parlance means 'code'. | 218 # 'text' in binary parlance means 'code'. |
223 tree['sizes']['[code]'] += size | 219 tree['sizes']['[code]'] += size |
224 elif 'w' == type: | 220 elif 'w' == symbol_type: |
225 tree['sizes']['[weak]'] += size | 221 tree['sizes']['[weak]'] += size |
226 else: | 222 else: |
227 tree['sizes']['[other]'] += size | 223 tree['sizes']['[other]'] += size |
228 except: | 224 except: |
229 print >>sys.stderr, sym, parts, key | 225 print >> sys.stderr, sym, parts, file_key |
230 raise | 226 raise |
231 else: | 227 else: |
232 key = 'symbols without paths' | 228 key = 'symbols without paths' |
233 if key not in dirs['children']: | 229 if key not in dirs['children']: |
234 dirs['children'][key] = {'sizes': collections.defaultdict(int), | 230 dirs['children'][key] = {'sizes': collections.defaultdict(int), |
235 'size': 0} | 231 'size': 0} |
236 tree = dirs['children'][key] | 232 tree = dirs['children'][key] |
237 subkey = 'misc' | 233 subkey = 'misc' |
238 if (sym.endswith('::__FUNCTION__') or | 234 if (sym.endswith('::__FUNCTION__') or |
239 sym.endswith('::__PRETTY_FUNCTION__')): | 235 sym.endswith('::__PRETTY_FUNCTION__')): |
(...skipping 25 matching lines...) Expand all Loading... |
265 if 'children' in tree: | 261 if 'children' in tree: |
266 # Non-leaf node. Recurse. | 262 # Non-leaf node. Recurse. |
267 for child_name, child in tree['children'].iteritems(): | 263 for child_name, child in tree['children'].iteritems(): |
268 children.append(JsonifyTree(child, child_name)) | 264 children.append(JsonifyTree(child, child_name)) |
269 else: | 265 else: |
270 # Leaf node; dump per-file stats as entries in the treemap | 266 # Leaf node; dump per-file stats as entries in the treemap |
271 for kind, size in tree['sizes'].iteritems(): | 267 for kind, size in tree['sizes'].iteritems(): |
272 child_json = {'name': kind + ' (' + FormatBytes(size) + ')', | 268 child_json = {'name': kind + ' (' + FormatBytes(size) + ')', |
273 'data': { '$area': size }} | 269 'data': { '$area': size }} |
274 css_class = css_class_map.get(kind) | 270 css_class = css_class_map.get(kind) |
275 if css_class is not None: child_json['data']['$symbol'] = css_class | 271 if css_class is not None: |
| 272 child_json['data']['$symbol'] = css_class |
276 children.append(child_json) | 273 children.append(child_json) |
277 # Sort children by size, largest to smallest. | 274 # Sort children by size, largest to smallest. |
278 children.sort(key=lambda child: -child['data']['$area']) | 275 children.sort(key=lambda child: -child['data']['$area']) |
279 | 276 |
280 # For leaf nodes, the 'size' attribute is the size of the leaf; | 277 # For leaf nodes, the 'size' attribute is the size of the leaf; |
281 # Non-leaf nodes don't really have a size, but their 'size' attribute is | 278 # Non-leaf nodes don't really have a size, but their 'size' attribute is |
282 # the sum of the sizes of all their children. | 279 # the sum of the sizes of all their children. |
283 return {'name': name + ' (' + FormatBytes(tree['size']) + ')', | 280 return {'name': name + ' (' + FormatBytes(tree['size']) + ')', |
284 'data': { '$area': tree['size'] }, | 281 'data': { '$area': tree['size'] }, |
285 'children': children } | 282 'children': children } |
286 | 283 |
287 def DumpCompactTree(symbols, outfile): | 284 def DumpCompactTree(symbols, outfile): |
288 out = open(outfile, 'w') | 285 tree_root = MakeCompactTree(symbols) |
289 try: | 286 with open(outfile, 'w') as out: |
290 out.write('var tree_data = ' + json.dumps(MakeCompactTree(symbols))) | 287 out.write('var tree_data = ') |
291 finally: | 288 json.dump(tree_root, out) |
292 out.flush() | 289 print('Writing %d bytes json' % os.path.getsize(outfile)) |
293 out.close() | |
294 | 290 |
295 | 291 |
296 # TODO(andrewhayden): Only used for legacy reports. Delete. | 292 # TODO(andrewhayden): Only used for legacy reports. Delete. |
297 def DumpTreemap(symbols, outfile): | 293 def DumpTreemap(symbols, outfile): |
298 dirs = TreeifySymbols(symbols) | 294 dirs = TreeifySymbols(symbols) |
299 out = open(outfile, 'w') | 295 out = open(outfile, 'w') |
300 try: | 296 try: |
301 out.write('var kTree = ' + json.dumps(JsonifyTree(dirs, '/'))) | 297 out.write('var kTree = ' + json.dumps(JsonifyTree(dirs, '/'))) |
302 finally: | 298 finally: |
303 out.flush() | 299 out.flush() |
304 out.close() | 300 out.close() |
305 | 301 |
306 | 302 |
307 # TODO(andrewhayden): Only used for legacy reports. Delete. | 303 # TODO(andrewhayden): Only used for legacy reports. Delete. |
308 def DumpLargestSymbols(symbols, outfile, n): | 304 def DumpLargestSymbols(symbols, outfile, n): |
309 # a list of (sym, type, size, path); sort by size. | 305 # a list of (sym, symbol_type, size, path); sort by size. |
310 symbols = sorted(symbols, key=lambda x: -x[2]) | 306 symbols = sorted(symbols, key=lambda x: -x[2]) |
311 dumped = 0 | 307 dumped = 0 |
312 out = open(outfile, 'w') | 308 out = open(outfile, 'w') |
313 try: | 309 try: |
314 out.write('var largestSymbols = [\n') | 310 out.write('var largestSymbols = [\n') |
315 for sym, type, size, path in symbols: | 311 for sym, symbol_type, size, path in symbols: |
316 if type in ('b', 'w'): | 312 if symbol_type in ('b', 'w'): |
317 continue # skip bss and weak symbols | 313 continue # skip bss and weak symbols |
318 if path is None: | 314 if path is None: |
319 path = '' | 315 path = '' |
320 entry = {'size': FormatBytes(size), | 316 entry = {'size': FormatBytes(size), |
321 'symbol': sym, | 317 'symbol': sym, |
322 'type': SymbolTypeToHuman(type), | 318 'type': SymbolTypeToHuman(symbol_type), |
323 'location': path } | 319 'location': path } |
324 out.write(json.dumps(entry)) | 320 out.write(json.dumps(entry)) |
325 out.write(',\n') | 321 out.write(',\n') |
326 dumped += 1 | 322 dumped += 1 |
327 if dumped >= n: | 323 if dumped >= n: |
328 return | 324 return |
329 finally: | 325 finally: |
330 out.write('];\n') | 326 out.write('];\n') |
331 out.flush() | 327 out.flush() |
332 out.close() | 328 out.close() |
333 | 329 |
334 | 330 |
335 def MakeSourceMap(symbols): | 331 def MakeSourceMap(symbols): |
336 sources = {} | 332 sources = {} |
337 for sym, type, size, path in symbols: | 333 for _sym, _symbol_type, size, path in symbols: |
338 key = None | 334 key = None |
339 if path: | 335 if path: |
340 key = os.path.normpath(path) | 336 key = os.path.normpath(path) |
341 else: | 337 else: |
342 key = '[no path]' | 338 key = '[no path]' |
343 if key not in sources: | 339 if key not in sources: |
344 sources[key] = {'path': path, 'symbol_count': 0, 'size': 0} | 340 sources[key] = {'path': path, 'symbol_count': 0, 'size': 0} |
345 record = sources[key] | 341 record = sources[key] |
346 record['size'] += size | 342 record['size'] += size |
347 record['symbol_count'] += 1 | 343 record['symbol_count'] += 1 |
348 return sources | 344 return sources |
349 | 345 |
350 | 346 |
351 # TODO(andrewhayden): Only used for legacy reports. Delete. | 347 # TODO(andrewhayden): Only used for legacy reports. Delete. |
352 def DumpLargestSources(symbols, outfile, n): | 348 def DumpLargestSources(symbols, outfile, n): |
353 map = MakeSourceMap(symbols) | 349 source_map = MakeSourceMap(symbols) |
354 sources = sorted(map.values(), key=lambda x: -x['size']) | 350 sources = sorted(source_map.values(), key=lambda x: -x['size']) |
355 dumped = 0 | 351 dumped = 0 |
356 out = open(outfile, 'w') | 352 out = open(outfile, 'w') |
357 try: | 353 try: |
358 out.write('var largestSources = [\n') | 354 out.write('var largestSources = [\n') |
359 for record in sources: | 355 for record in sources: |
360 entry = {'size': FormatBytes(record['size']), | 356 entry = {'size': FormatBytes(record['size']), |
361 'symbol_count': str(record['symbol_count']), | 357 'symbol_count': str(record['symbol_count']), |
362 'location': record['path']} | 358 'location': record['path']} |
363 out.write(json.dumps(entry)) | 359 out.write(json.dumps(entry)) |
364 out.write(',\n') | 360 out.write(',\n') |
365 dumped += 1 | 361 dumped += 1 |
366 if dumped >= n: | 362 if dumped >= n: |
367 return | 363 return |
368 finally: | 364 finally: |
369 out.write('];\n') | 365 out.write('];\n') |
370 out.flush() | 366 out.flush() |
371 out.close() | 367 out.close() |
372 | 368 |
373 | 369 |
374 # TODO(andrewhayden): Only used for legacy reports. Delete. | 370 # TODO(andrewhayden): Only used for legacy reports. Delete. |
375 def DumpLargestVTables(symbols, outfile, n): | 371 def DumpLargestVTables(symbols, outfile, n): |
376 vtables = [] | 372 vtables = [] |
377 for symbol, type, size, path in symbols: | 373 for symbol, _type, size, path in symbols: |
378 if 'vtable for ' in symbol: | 374 if 'vtable for ' in symbol: |
379 vtables.append({'symbol': symbol, 'path': path, 'size': size}) | 375 vtables.append({'symbol': symbol, 'path': path, 'size': size}) |
380 vtables = sorted(vtables, key=lambda x: -x['size']) | 376 vtables = sorted(vtables, key=lambda x: -x['size']) |
381 dumped = 0 | 377 dumped = 0 |
382 out = open(outfile, 'w') | 378 out = open(outfile, 'w') |
383 try: | 379 try: |
384 out.write('var largestVTables = [\n') | 380 out.write('var largestVTables = [\n') |
385 for record in vtables: | 381 for record in vtables: |
386 entry = {'size': FormatBytes(record['size']), | 382 entry = {'size': FormatBytes(record['size']), |
387 'symbol': record['symbol'], | 383 'symbol': record['symbol'], |
388 'location': record['path']} | 384 'location': record['path']} |
389 out.write(json.dumps(entry)) | 385 out.write(json.dumps(entry)) |
390 out.write(',\n') | 386 out.write(',\n') |
391 dumped += 1 | 387 dumped += 1 |
392 if dumped >= n: | 388 if dumped >= n: |
393 return | 389 return |
394 finally: | 390 finally: |
395 out.write('];\n') | 391 out.write('];\n') |
396 out.flush() | 392 out.flush() |
397 out.close() | 393 out.close() |
398 | 394 |
399 | 395 |
400 # TODO(andrewhayden): Switch to Primiano's python-based version. | 396 # Regex for parsing "nm" output. A sample line looks like this: |
401 def RunParallelAddress2Line(outfile, library, arch, jobs, verbose): | 397 # 0167b39c 00000018 t ACCESS_DESCRIPTION_free /path/file.c:95 |
402 """Run a parallel addr2line processing engine to dump and resolve symbols.""" | 398 # |
403 out_dir = os.getenv('CHROMIUM_OUT_DIR', 'out') | 399 # The fields are: address, size, type, name, source location |
404 build_type = os.getenv('BUILDTYPE', 'Release') | 400 # Regular expression explained ( see also: https://xkcd.com/208 ): |
405 classpath = os.path.join(out_dir, build_type, 'lib.java', | 401 # ([0-9a-f]{8,}+) The address |
406 'binary_size_java.jar') | 402 # [\s]+ Whitespace separator |
407 cmd = ['java', | 403 # ([0-9a-f]{8,}+) The size. From here on out it's all optional. |
408 '-classpath', classpath, | 404 # [\s]+ Whitespace separator |
409 'org.chromium.tools.binary_size.ParallelAddress2Line', | 405 # (\S?) The symbol type, which is any non-whitespace char |
410 '--disambiguate', | 406 # [\s*] Whitespace separator |
411 '--outfile', outfile, | 407 # ([^\t]*) Symbol name, any non-tab character (spaces ok!) |
412 '--library', library, | 408 # [\t]? Tab separator |
413 '--threads', jobs] | 409 # (.*) The location (filename[:linennum|?][ (discriminator n)] |
414 if verbose is True: | 410 sNmPattern = re.compile( |
415 cmd.append('--verbose') | 411 r'([0-9a-f]{8,})[\s]+([0-9a-f]{8,})[\s]*(\S?)[\s*]([^\t]*)[\t]?(.*)') |
416 prefix = os.path.join('third_party', 'android_tools', 'ndk', 'toolchains') | |
417 if arch == 'android-arm': | |
418 prefix = os.path.join(prefix, 'arm-linux-androideabi-4.8', 'prebuilt', | |
419 'linux-x86_64', 'bin', 'arm-linux-androideabi-') | |
420 cmd.extend(['--nm', prefix + 'nm', '--addr2line', prefix + 'addr2line']) | |
421 elif arch == 'android-mips': | |
422 prefix = os.path.join(prefix, 'mipsel-linux-android-4.8', 'prebuilt', | |
423 'linux-x86_64', 'bin', 'mipsel-linux-android-') | |
424 cmd.extend(['--nm', prefix + 'nm', '--addr2line', prefix + 'addr2line']) | |
425 elif arch == 'android-x86': | |
426 prefix = os.path.join(prefix, 'x86-4.8', 'prebuilt', | |
427 'linux-x86_64', 'bin', 'i686-linux-android-') | |
428 cmd.extend(['--nm', prefix + 'nm', '--addr2line', prefix + 'addr2line']) | |
429 # else, use whatever is in PATH (don't pass --nm or --addr2line) | |
430 | 412 |
431 if verbose: | 413 class Progress(): |
432 print cmd | 414 def __init__(self): |
433 | 415 self.count = 0 |
434 return_code = subprocess.call(cmd) | 416 self.skip_count = 0 |
435 if return_code: | 417 self.collisions = 0 |
436 raise RuntimeError('Failed to run ParallelAddress2Line: returned ' + | 418 self.time_last_output = time.time() |
437 str(return_code)) | 419 self.count_last_output = 0 |
438 | 420 |
439 | 421 |
440 def GetNmSymbols(infile, outfile, library, arch, jobs, verbose): | 422 def RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary, jobs): |
441 if infile is None: | 423 nm_output = RunNm(library, nm_binary) |
| 424 nm_output_lines = nm_output.splitlines() |
| 425 nm_output_lines_len = len(nm_output_lines) |
| 426 address_symbol = {} |
| 427 progress = Progress() |
| 428 def map_address_symbol(symbol, addr): |
| 429 progress.count += 1 |
| 430 if addr in address_symbol: |
| 431 # 'Collision between %s and %s.' % (str(symbol.name), |
| 432 # str(address_symbol[addr].name)) |
| 433 progress.collisions += 1 |
| 434 else: |
| 435 address_symbol[addr] = symbol |
| 436 |
| 437 progress_chunk = 100 |
| 438 if progress.count % progress_chunk == 0: |
| 439 time_now = time.time() |
| 440 time_spent = time_now - progress.time_last_output |
| 441 if time_spent > 1.0: |
| 442 # Only output at most once per second. |
| 443 progress.time_last_output = time_now |
| 444 chunk_size = progress.count - progress.count_last_output |
| 445 progress.count_last_output = progress.count |
| 446 if time_spent > 0: |
| 447 speed = chunk_size / time_spent |
| 448 else: |
| 449 speed = 0 |
| 450 progress_percent = (100.0 * (progress.count + progress.skip_count) / |
| 451 nm_output_lines_len) |
| 452 print('%.1f%%: Looked up %d symbols (%d collisions) - %.1f lookups/s.' % |
| 453 (progress_percent, progress.count, progress.collisions, speed)) |
| 454 |
| 455 symbolizer = elf_symbolizer.ELFSymbolizer(library, addr2line_binary, |
| 456 map_address_symbol, |
| 457 max_concurrent_jobs=jobs) |
| 458 for line in nm_output_lines: |
| 459 match = sNmPattern.match(line) |
| 460 if match: |
| 461 location = match.group(5) |
| 462 if not location: |
| 463 addr = int(match.group(1), 16) |
| 464 size = int(match.group(2), 16) |
| 465 if addr in address_symbol: # Already looked up, shortcut ELFSymbolizer. |
| 466 map_address_symbol(address_symbol[addr], addr) |
| 467 continue |
| 468 elif size == 0: |
| 469 # Save time by not looking up empty symbols (do they even exist?) |
| 470 print('Empty symbol: ' + line) |
| 471 else: |
| 472 symbolizer.SymbolizeAsync(addr, addr) |
| 473 continue |
| 474 |
| 475 progress.skip_count += 1 |
| 476 |
| 477 symbolizer.Join() |
| 478 |
| 479 with open(outfile, 'w') as out: |
| 480 for line in nm_output_lines: |
| 481 match = sNmPattern.match(line) |
| 482 if match: |
| 483 location = match.group(5) |
| 484 if not location: |
| 485 addr = int(match.group(1), 16) |
| 486 symbol = address_symbol[addr] |
| 487 path = '??' |
| 488 if symbol.source_path is not None: |
| 489 path = symbol.source_path |
| 490 line_number = 0 |
| 491 if symbol.source_line is not None: |
| 492 line_number = symbol.source_line |
| 493 out.write('%s\t%s:%d\n' % (line, path, line_number)) |
| 494 continue |
| 495 |
| 496 out.write('%s\n' % line) |
| 497 |
| 498 print('%d symbols in the results.' % len(address_symbol)) |
| 499 |
| 500 |
| 501 def RunNm(binary, nm_binary): |
| 502 print('Starting nm') |
| 503 cmd = [nm_binary, '-C', '--print-size', binary] |
| 504 nm_process = subprocess.Popen(cmd, |
| 505 stdout=subprocess.PIPE, |
| 506 stderr=subprocess.PIPE) |
| 507 (process_output, err_output) = nm_process.communicate() |
| 508 |
| 509 if nm_process.returncode != 0: |
| 510 if err_output: |
| 511 raise Exception, err_output |
| 512 else: |
| 513 raise Exception, process_output |
| 514 |
| 515 print('Finished nm') |
| 516 return process_output |
| 517 |
| 518 |
| 519 def GetNmSymbols(nm_infile, outfile, library, jobs, verbose, |
| 520 addr2line_binary, nm_binary): |
| 521 if nm_infile is None: |
442 if outfile is None: | 522 if outfile is None: |
443 infile = tempfile.NamedTemporaryFile(delete=False).name | 523 outfile = tempfile.NamedTemporaryFile(delete=False).name |
444 else: | |
445 infile = outfile | |
446 | 524 |
447 if verbose: | 525 if verbose: |
448 print 'Running parallel addr2line, dumping symbols to ' + infile; | 526 print 'Running parallel addr2line, dumping symbols to ' + outfile |
449 RunParallelAddress2Line(outfile=infile, library=library, arch=arch, | 527 RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary, jobs) |
450 jobs=jobs, verbose=verbose) | 528 |
| 529 nm_infile = outfile |
| 530 |
451 elif verbose: | 531 elif verbose: |
452 print 'Using nm input from ' + infile | 532 print 'Using nm input from ' + nm_infile |
453 with file(infile, 'r') as infile: | 533 with file(nm_infile, 'r') as infile: |
454 return list(ParseNm(infile)) | 534 return list(binary_size_utils.ParseNm(infile)) |
| 535 |
| 536 |
| 537 def _find_in_system_path(binary): |
| 538 """Locate the full path to binary in the system path or return None |
| 539 if not found.""" |
| 540 system_path = os.environ["PATH"].split(os.pathsep) |
| 541 for path in system_path: |
| 542 binary_path = os.path.join(path, binary) |
| 543 if os.path.isfile(binary_path): |
| 544 return binary_path |
| 545 return None |
455 | 546 |
456 | 547 |
457 def main(): | 548 def main(): |
458 usage="""%prog [options] | 549 usage = """%prog [options] |
459 | 550 |
460 Runs a spatial analysis on a given library, looking up the source locations | 551 Runs a spatial analysis on a given library, looking up the source locations |
461 of its symbols and calculating how much space each directory, source file, | 552 of its symbols and calculating how much space each directory, source file, |
462 and so on is taking. The result is a report that can be used to pinpoint | 553 and so on is taking. The result is a report that can be used to pinpoint |
463 sources of large portions of the binary, etceteras. | 554 sources of large portions of the binary, etceteras. |
464 | 555 |
465 Under normal circumstances, you only need to pass two arguments, thusly: | 556 Under normal circumstances, you only need to pass two arguments, thusly: |
466 | 557 |
467 %prog --library /path/to/library --destdir /path/to/output | 558 %prog --library /path/to/library --destdir /path/to/output |
468 | 559 |
(...skipping 10 matching lines...) Expand all Loading... |
479 'present in the file; i.e., no addr2line symbol lookups ' | 570 'present in the file; i.e., no addr2line symbol lookups ' |
480 'will be performed when this option is specified. ' | 571 'will be performed when this option is specified. ' |
481 'Mutually exclusive with --library.') | 572 'Mutually exclusive with --library.') |
482 parser.add_option('--destdir', metavar='PATH', | 573 parser.add_option('--destdir', metavar='PATH', |
483 help='write output to the specified directory. An HTML ' | 574 help='write output to the specified directory. An HTML ' |
484 'report is generated here along with supporting files; ' | 575 'report is generated here along with supporting files; ' |
485 'any existing report will be overwritten.') | 576 'any existing report will be overwritten.') |
486 parser.add_option('--library', metavar='PATH', | 577 parser.add_option('--library', metavar='PATH', |
487 help='if specified, process symbols in the library at ' | 578 help='if specified, process symbols in the library at ' |
488 'the specified path. Mutually exclusive with --nm-in.') | 579 'the specified path. Mutually exclusive with --nm-in.') |
489 parser.add_option('--arch', | 580 parser.add_option('--nm-binary', |
490 help='the architecture that the library is targeted to. ' | 581 help='use the specified nm binary to analyze library. ' |
491 'Determines which nm/addr2line binaries are used. When ' | 582 'This is to be used when the nm in the path is not for ' |
492 '\'host-native\' is chosen, the program will use whichever ' | 583 'the right architecture or of the right version.') |
493 'nm/addr2line binaries are on the PATH. This is ' | 584 parser.add_option('--addr2line-binary', |
494 'appropriate when you are analyzing a binary by and for ' | 585 help='use the specified addr2line binary to analyze ' |
495 'your computer. ' | 586 'library. This is to be used when the addr2line in ' |
496 'This argument is only valid when using --library. ' | 587 'the path is not for the right architecture or ' |
497 'Default is \'host-native\'.', | 588 'of the right version.') |
498 choices=['host-native', 'android-arm', | |
499 'android-mips', 'android-x86'],) | |
500 parser.add_option('--jobs', | 589 parser.add_option('--jobs', |
501 help='number of jobs to use for the parallel ' | 590 help='number of jobs to use for the parallel ' |
502 'addr2line processing pool; defaults to 1. More ' | 591 'addr2line processing pool; defaults to 1. More ' |
503 'jobs greatly improve throughput but eat RAM like ' | 592 'jobs greatly improve throughput but eat RAM like ' |
504 'popcorn, and take several gigabytes each. Start low ' | 593 'popcorn, and take several gigabytes each. Start low ' |
505 'and ramp this number up until your machine begins to ' | 594 'and ramp this number up until your machine begins to ' |
506 'struggle with RAM. ' | 595 'struggle with RAM. ' |
507 'This argument is only valid when using --library.') | 596 'This argument is only valid when using --library.') |
508 parser.add_option('-v', dest='verbose', action='store_true', | 597 parser.add_option('-v', dest='verbose', action='store_true', |
509 help='be verbose, printing lots of status information.') | 598 help='be verbose, printing lots of status information.') |
510 parser.add_option('--nm-out', metavar='PATH', | 599 parser.add_option('--nm-out', metavar='PATH', |
511 help='keep the nm output file, and store it at the ' | 600 help='keep the nm output file, and store it at the ' |
512 'specified path. This is useful if you want to see the ' | 601 'specified path. This is useful if you want to see the ' |
513 'fully processed nm output after the symbols have been ' | 602 'fully processed nm output after the symbols have been ' |
514 'mapped to source locations. By default, a tempfile is ' | 603 'mapped to source locations. By default, a tempfile is ' |
515 'used and is deleted when the program terminates.' | 604 'used and is deleted when the program terminates.' |
516 'This argument is only valid when using --library.') | 605 'This argument is only valid when using --library.') |
517 parser.add_option('--legacy', action='store_true', | 606 parser.add_option('--legacy', action='store_true', |
518 help='emit legacy binary size report instead of modern') | 607 help='emit legacy binary size report instead of modern') |
519 opts, args = parser.parse_args() | 608 opts, _args = parser.parse_args() |
520 | 609 |
521 if ((not opts.library) and (not opts.nm_in)) or (opts.library and opts.nm_in): | 610 if ((not opts.library) and (not opts.nm_in)) or (opts.library and opts.nm_in): |
522 parser.error('exactly one of --library or --nm-in is required') | 611 parser.error('exactly one of --library or --nm-in is required') |
523 if (opts.nm_in): | 612 if (opts.nm_in): |
524 if opts.jobs: | 613 if opts.jobs: |
525 print >> sys.stderr, ('WARNING: --jobs has no effect ' | 614 print >> sys.stderr, ('WARNING: --jobs has no effect ' |
526 'when used with --nm-in') | 615 'when used with --nm-in') |
527 if opts.arch: | |
528 print >> sys.stderr, ('WARNING: --arch has no effect ' | |
529 'when used with --nm-in') | |
530 if not opts.destdir: | 616 if not opts.destdir: |
531 parser.error('--destdir is required argument') | 617 parser.error('--destdir is required argument') |
532 if not opts.jobs: | 618 if not opts.jobs: |
533 opts.jobs = '1' | 619 # Use the number of processors but cap between 2 and 4 since raw |
534 if not opts.arch: | 620 # CPU power isn't the limiting factor. It's I/O limited, memory |
535 opts.arch = 'host-native' | 621 # bus limited and available-memory-limited. Too many processes and |
| 622 # the computer will run out of memory and it will be slow. |
| 623 opts.jobs = max(2, min(4, str(multiprocessing.cpu_count()))) |
536 | 624 |
537 symbols = GetNmSymbols(opts.nm_in, opts.nm_out, opts.library, opts.arch, | 625 if opts.addr2line_binary: |
538 opts.jobs, opts.verbose is True) | 626 assert os.path.isfile(opts.addr2line_binary) |
| 627 addr2line_binary = opts.addr2line_binary |
| 628 else: |
| 629 addr2line_binary = _find_in_system_path('addr2line') |
| 630 assert addr2line_binary, 'Unable to find addr2line in the path. '\ |
| 631 'Use --addr2line-binary to specify location.' |
| 632 |
| 633 if opts.nm_binary: |
| 634 assert os.path.isfile(opts.nm_binary) |
| 635 nm_binary = opts.nm_binary |
| 636 else: |
| 637 nm_binary = _find_in_system_path('nm') |
| 638 assert nm_binary, 'Unable to find nm in the path. Use --nm-binary '\ |
| 639 'to specify location.' |
| 640 |
| 641 print('nm: %s' % nm_binary) |
| 642 print('addr2line: %s' % addr2line_binary) |
| 643 |
| 644 symbols = GetNmSymbols(opts.nm_in, opts.nm_out, opts.library, |
| 645 opts.jobs, opts.verbose is True, |
| 646 addr2line_binary, nm_binary) |
539 if not os.path.exists(opts.destdir): | 647 if not os.path.exists(opts.destdir): |
540 os.makedirs(opts.destdir, 0755) | 648 os.makedirs(opts.destdir, 0755) |
541 | 649 |
542 | 650 |
543 if opts.legacy: # legacy report | 651 if opts.legacy: # legacy report |
544 DumpTreemap(symbols, os.path.join(opts.destdir, 'treemap-dump.js')) | 652 DumpTreemap(symbols, os.path.join(opts.destdir, 'treemap-dump.js')) |
545 DumpLargestSymbols(symbols, | 653 DumpLargestSymbols(symbols, |
546 os.path.join(opts.destdir, 'largest-symbols.js'), 100) | 654 os.path.join(opts.destdir, 'largest-symbols.js'), 100) |
547 DumpLargestSources(symbols, | 655 DumpLargestSources(symbols, |
548 os.path.join(opts.destdir, 'largest-sources.js'), 100) | 656 os.path.join(opts.destdir, 'largest-sources.js'), 100) |
549 DumpLargestVTables(symbols, | 657 DumpLargestVTables(symbols, |
550 os.path.join(opts.destdir, 'largest-vtables.js'), 100) | 658 os.path.join(opts.destdir, 'largest-vtables.js'), 100) |
551 treemap_out = os.path.join(opts.destdir, 'webtreemap') | 659 treemap_out = os.path.join(opts.destdir, 'webtreemap') |
552 if not os.path.exists(treemap_out): | 660 if not os.path.exists(treemap_out): |
553 os.makedirs(treemap_out, 0755) | 661 os.makedirs(treemap_out, 0755) |
554 treemap_src = os.path.join('third_party', 'webtreemap', 'src') | 662 treemap_src = os.path.join('third_party', 'webtreemap', 'src') |
555 shutil.copy(os.path.join(treemap_src, 'COPYING'), treemap_out) | 663 shutil.copy(os.path.join(treemap_src, 'COPYING'), treemap_out) |
556 shutil.copy(os.path.join(treemap_src, 'webtreemap.js'), treemap_out) | 664 shutil.copy(os.path.join(treemap_src, 'webtreemap.js'), treemap_out) |
557 shutil.copy(os.path.join(treemap_src, 'webtreemap.css'), treemap_out) | 665 shutil.copy(os.path.join(treemap_src, 'webtreemap.css'), treemap_out) |
558 shutil.copy(os.path.join('tools', 'binary_size', 'legacy_template', | 666 shutil.copy(os.path.join('tools', 'binary_size', 'legacy_template', |
559 'index.html'), opts.destdir) | 667 'index.html'), opts.destdir) |
560 else: # modern report | 668 else: # modern report |
561 DumpCompactTree(symbols, os.path.join(opts.destdir, 'data.js')) | 669 DumpCompactTree(symbols, os.path.join(opts.destdir, 'data.js')) |
562 d3_out = os.path.join(opts.destdir, 'd3') | 670 d3_out = os.path.join(opts.destdir, 'd3') |
563 if not os.path.exists(d3_out): | 671 if not os.path.exists(d3_out): |
564 os.makedirs(d3_out, 0755) | 672 os.makedirs(d3_out, 0755) |
565 d3_src = os.path.join('third_party', 'd3', 'src') | 673 d3_src = os.path.join(os.path.dirname(__file__), |
566 template_src = os.path.join('tools', 'binary_size', | 674 '..', |
| 675 '..', |
| 676 'third_party', 'd3', 'src') |
| 677 template_src = os.path.join(os.path.dirname(__file__), |
567 'template') | 678 'template') |
568 shutil.copy(os.path.join(d3_src, 'LICENSE'), d3_out) | 679 shutil.copy(os.path.join(d3_src, 'LICENSE'), d3_out) |
569 shutil.copy(os.path.join(d3_src, 'd3.js'), d3_out) | 680 shutil.copy(os.path.join(d3_src, 'd3.js'), d3_out) |
| 681 print('Copying index.html') |
570 shutil.copy(os.path.join(template_src, 'index.html'), opts.destdir) | 682 shutil.copy(os.path.join(template_src, 'index.html'), opts.destdir) |
571 shutil.copy(os.path.join(template_src, 'D3SymbolTreeMap.js'), opts.destdir) | 683 shutil.copy(os.path.join(template_src, 'D3SymbolTreeMap.js'), opts.destdir) |
572 | 684 |
573 if opts.verbose: | 685 if opts.verbose: |
574 print 'Report saved to ' + opts.destdir + '/index.html' | 686 print 'Report saved to ' + opts.destdir + '/index.html' |
575 | 687 |
576 | 688 |
577 if __name__ == '__main__': | 689 if __name__ == '__main__': |
578 sys.exit(main()) | 690 sys.exit(main()) |
OLD | NEW |