OLD | NEW |
---|---|
(Empty) | |
1 #!/usr/bin/python | |
2 # Copyright 2014 The Chromium Authors. All rights reserved. | |
3 # Use of this source code is governed by a BSD-style license that can be | |
4 # found in the LICENSE file. | |
5 | |
6 """Generate a spatial analysis against an arbitrary library. | |
7 | |
8 To use, build the 'binary_size_tool' target. Then run this tool, passing | |
9 in the location of the library to be analyzed along with any other options | |
10 you desire. | |
11 """ | |
12 | |
13 import collections | |
14 import fileinput | |
15 import json | |
16 import optparse | |
17 import os | |
18 import pprint | |
19 import re | |
20 import shutil | |
21 import subprocess | |
22 import sys | |
23 import tempfile | |
24 | |
25 | |
26 def FormatBytes(bytes): | |
27 """Pretty-print a number of bytes.""" | |
28 if bytes > 1e6: | |
29 bytes = bytes / 1.0e6 | |
30 return '%.1fm' % bytes | |
31 if bytes > 1e3: | |
32 bytes = bytes / 1.0e3 | |
33 return '%.1fk' % bytes | |
34 return str(bytes) | |
35 | |
36 | |
37 def SymbolTypeToHuman(type): | |
38 """Convert a symbol type as printed by nm into a human-readable name.""" | |
39 return {'b': 'bss', | |
40 'd': 'data', | |
41 'r': 'read-only data', | |
42 't': 'code', | |
43 'w': 'weak symbol', | |
44 'v': 'weak symbol'}[type] | |
45 | |
46 | |
47 def ParseNm(input): | |
48 """Parse nm output. | |
49 | |
50 Argument: an iterable over lines of nm output. | |
51 | |
52 Yields: (symbol name, symbol type, symbol size, source file path). | |
53 Path may be None if nm couldn't figure out the source file. | |
54 """ | |
55 | |
56 # Match lines with size, symbol, optional location, optional discriminator | |
57 sym_re = re.compile(r'^[0-9a-f]{8} ' # address (8 hex digits) | |
58 '([0-9a-f]{8}) ' # size (8 hex digits) | |
59 '(.) ' # symbol type, one character | |
60 '([^\t]+)' # symbol name, separated from next by tab | |
61 '(?:\t(.*):[\d\?]+)?.*$') # location | |
62 # Match lines with addr but no size. | |
63 addr_re = re.compile(r'^[0-9a-f]{8} (.) ([^\t]+)(?:\t.*)?$') | |
64 # Match lines that don't have an address at all -- typically external symbols. | |
65 noaddr_re = re.compile(r'^ {8} (.) (.*)$') | |
66 | |
67 for line in input: | |
68 line = line.rstrip() | |
69 match = sym_re.match(line) | |
70 if match: | |
71 size, type, sym = match.groups()[0:3] | |
72 size = int(size, 16) | |
73 type = type.lower() | |
74 if type == 'v': | |
75 type = 'w' # just call them all weak | |
76 if type == 'b': | |
77 continue # skip all BSS for now | |
78 path = match.group(4) | |
79 yield sym, type, size, path | |
80 continue | |
81 match = addr_re.match(line) | |
82 if match: | |
83 type, sym = match.groups()[0:2] | |
84 # No size == we don't care. | |
85 continue | |
86 match = noaddr_re.match(line) | |
87 if match: | |
88 type, sym = match.groups() | |
89 if type in ('U', 'w'): | |
90 # external or weak symbol | |
91 continue | |
92 | |
93 print >>sys.stderr, 'unparsed:', repr(line) | |
94 | |
95 | |
96 def TreeifySymbols(symbols): | |
97 """Convert symbols into a path-based tree, calculating size information | |
98 along the way. | |
99 | |
100 The result is a dictionary that contains two kinds of nodes: | |
101 1. Leaf nodes, representing source code locations (e.g., c++ files) | |
102 These nodes have the following dictionary entries: | |
103 sizes: a dictionary whose keys are categories (such as code, data, | |
104 vtable, etceteras) and whose values are the size, in bytes, of | |
105 those categories; | |
106 size: the total size, in bytes, of all the entries in the sizes dict | |
107 2. Non-leaf nodes, representing directories | |
108 These nodes have the following dictionary entries: | |
109 children: a dictionary whose keys are names (path entries; either | |
110 directory or file names) and whose values are other nodes; | |
111 size: the total size, in bytes, of all the leaf nodes that are | |
112 contained within the children dict (recursively expanded) | |
113 | |
114 The result object is itself a dictionary that represents the common ancestor | |
115 of all child nodes, e.g. a path to which all other nodes beneath it are | |
116 relative. The 'size' attribute of this dict yields the sum of the size of all | |
117 leaf nodes within the data structure. | |
118 """ | |
119 dirs = {'children': {}, 'size': 0} | |
120 for sym, type, size, path in symbols: | |
121 dirs['size'] += size | |
122 if path: | |
123 path = os.path.normpath(path) | |
124 if path.startswith('/'): | |
125 path = path[1:] | |
126 | |
127 parts = None | |
128 if path: | |
129 parts = path.split('/') | |
130 | |
131 if parts: | |
132 assert path | |
133 file_key = parts.pop() | |
134 tree = dirs | |
135 try: | |
136 # Traverse the tree to the parent of the file node, creating as needed | |
137 for part in parts: | |
138 assert part != '' | |
139 if part not in tree['children']: | |
140 tree['children'][part] = {'children': {}, 'size': 0} | |
141 tree = tree['children'][part] | |
142 tree['size'] += size | |
143 | |
144 # Get (creating if necessary) the node for the file | |
145 # This node doesn't have a 'children' attribute | |
146 if file_key not in tree['children']: | |
147 tree['children'][file_key] = {'sizes': collections.defaultdict(int), | |
148 'size': 0} | |
149 tree = tree['children'][file_key] | |
150 tree['size'] += size | |
151 | |
152 # Accumulate size into a bucket within the file | |
153 if 'vtable for ' in sym: | |
154 tree['sizes']['[vtable]'] += size | |
155 elif 'r' == type or 'R' == type: | |
bulach
2014/01/16 15:01:59
ok, let's leave the map for a v2, but it'd be simp
Andrew Hayden (chromium.org)
2014/01/16 15:13:00
Ha, uh duh, yes. Sorry :)
| |
156 tree['sizes']['[rodata]'] += size | |
157 elif 'd' == type or 'D' == type: | |
158 tree['sizes']['[data]'] += size | |
159 elif 'b' == type or 'B' == type: | |
160 tree['sizes']['[bss]'] += size | |
161 elif 't' == type or 'T' == type: | |
162 # 'text' in binary parlance means 'code'. | |
163 tree['sizes']['[code]'] += size | |
164 elif 'w' == type or 'W' == type: | |
165 tree['sizes']['[weak]'] += size | |
166 else: | |
167 tree['sizes']['[other]'] += size | |
168 except: | |
169 print >>sys.stderr, sym, parts, key | |
170 raise | |
171 else: | |
172 key = 'symbols without paths' | |
173 if key not in dirs['children']: | |
174 dirs['children'][key] = {'sizes': collections.defaultdict(int), | |
175 'size': 0} | |
176 tree = dirs['children'][key] | |
177 subkey = 'misc' | |
178 if (sym.endswith('::__FUNCTION__') or | |
179 sym.endswith('::__PRETTY_FUNCTION__')): | |
180 subkey = '__FUNCTION__' | |
181 elif sym.startswith('CSWTCH.'): | |
182 subkey = 'CSWTCH' | |
183 elif '::' in sym: | |
184 subkey = sym[0:sym.find('::') + 2] | |
185 tree['sizes'][subkey] = tree['sizes'].get(subkey, 0) + size | |
186 tree['size'] += size | |
187 return dirs | |
188 | |
189 | |
190 def JsonifyTree(tree, name): | |
191 """Convert TreeifySymbols output to a JSON treemap. | |
192 | |
193 The format is very similar, with the notable exceptions being | |
bulach
2014/01/16 15:01:59
nit: needs to be aligned with the """, i.e., inden
Andrew Hayden (chromium.org)
2014/01/16 15:13:00
Done.
| |
194 lists of children instead of maps and some different attribute names.""" | |
195 children = [] | |
196 if 'children' in tree: | |
197 # Non-leaf node. Recurse. | |
198 for child_name, child in tree['children'].iteritems(): | |
199 children.append(JsonifyTree(child, child_name)) | |
200 else: | |
201 # Leaf node; dump per-file stats as entries in the treemap | |
202 for kind, size in tree['sizes'].iteritems(): | |
203 child_json = {'name': kind + ' (' + FormatBytes(size) + ')', | |
204 'data': { '$area': size }} | |
205 css_class = { | |
bulach
2014/01/16 15:01:59
nit: may want to put this as a constant at the top
Andrew Hayden (chromium.org)
2014/01/16 15:13:00
Done.
| |
206 '[vtable]': 'vtable', | |
207 '[rodata]': 'read-only_data', | |
208 '[data]': 'data', | |
209 '[bss]': 'bss', | |
210 '[code]': 'code', | |
211 '[weak]': 'weak_symbol' | |
212 }.get(kind) | |
213 if css_class is not None: child_json['data']['$symbol'] = css_class | |
214 children.append(child_json) | |
215 # Sort children by size, largest to smallest. | |
216 children.sort(key=lambda child: -child['data']['$area']) | |
217 | |
218 # For leaf nodes, the 'size' attribute is the size of the leaf; | |
219 # Non-leaf nodes don't really have a size, but their 'size' attribute is | |
220 # the sum of the sizes of all their children. | |
221 return {'name': name + ' (' + FormatBytes(tree['size']) + ')', | |
222 'data': { '$area': tree['size'] }, | |
223 'children': children } | |
224 | |
225 | |
226 def DumpTreemap(symbols, outfile): | |
227 dirs = TreeifySymbols(symbols) | |
228 out = open(outfile, 'w') | |
229 try: | |
230 out.write('var kTree = ' + json.dumps(JsonifyTree(dirs, '/'))) | |
231 finally: | |
232 out.flush() | |
233 out.close() | |
234 | |
235 | |
236 def DumpLargestSymbols(symbols, outfile, n): | |
237 # a list of (sym, type, size, path); sort by size. | |
238 symbols = sorted(symbols, key=lambda x: -x[2]) | |
239 dumped = 0 | |
240 out = open(outfile, 'w') | |
241 try: | |
242 out.write('var largestSymbols = [\n') | |
243 for sym, type, size, path in symbols: | |
244 if type in ('b', 'w'): | |
245 continue # skip bss and weak symbols | |
246 if path is None: | |
247 path = '' | |
248 entry = {'size': FormatBytes(size), | |
249 'symbol': sym, | |
250 'type': SymbolTypeToHuman(type), | |
251 'location': path } | |
252 out.write(json.dumps(entry)) | |
253 out.write(',\n') | |
254 dumped += 1 | |
255 if dumped >= n: | |
256 return | |
257 finally: | |
258 out.write('];\n') | |
259 out.flush() | |
260 out.close() | |
261 | |
262 | |
263 def MakeSourceMap(symbols): | |
264 sources = {} | |
265 for sym, type, size, path in symbols: | |
266 key = None | |
267 if path: | |
268 key = os.path.normpath(path) | |
269 else: | |
270 key = '[no path]' | |
271 if key not in sources: | |
272 sources[key] = {'path': path, 'symbol_count': 0, 'size': 0} | |
273 record = sources[key] | |
274 record['size'] += size | |
275 record['symbol_count'] += 1 | |
276 return sources | |
277 | |
278 | |
279 def DumpLargestSources(symbols, outfile, n): | |
280 map = MakeSourceMap(symbols) | |
281 sources = sorted(map.values(), key=lambda x: -x['size']) | |
282 dumped = 0 | |
283 out = open(outfile, 'w') | |
284 try: | |
285 out.write('var largestSources = [\n') | |
286 for record in sources: | |
287 entry = {'size': FormatBytes(record['size']), | |
288 'symbol_count': str(record['symbol_count']), | |
289 'location': record['path']} | |
290 out.write(json.dumps(entry)) | |
291 out.write(',\n') | |
292 dumped += 1 | |
293 if dumped >= n: | |
294 return | |
295 finally: | |
296 out.write('];\n') | |
297 out.flush() | |
298 out.close() | |
299 | |
300 | |
301 def DumpLargestVTables(symbols, outfile, n): | |
302 vtables = [] | |
303 for symbol, type, size, path in symbols: | |
304 if 'vtable for ' in symbol: | |
305 vtables.append({'symbol': symbol, 'path': path, 'size': size}) | |
306 vtables = sorted(vtables, key=lambda x: -x['size']) | |
307 dumped = 0 | |
308 out = open(outfile, 'w') | |
309 try: | |
310 out.write('var largestVTables = [\n') | |
311 for record in vtables: | |
312 entry = {'size': FormatBytes(record['size']), | |
313 'symbol': record['symbol'], | |
314 'location': record['path']} | |
315 out.write(json.dumps(entry)) | |
316 out.write(',\n') | |
317 dumped += 1 | |
318 if dumped >= n: | |
319 return | |
320 finally: | |
321 out.write('];\n') | |
322 out.flush() | |
323 out.close() | |
324 | |
325 | |
326 def RunParallelAddress2Line(outfile, library, arch, jobs, verbose): | |
327 """Run a parallel addr2line processing engine to dump and resolve symbols.""" | |
328 out_dir = os.getenv('CHROMIUM_OUT_DIR', 'out') | |
329 build_type = os.getenv('BUILDTYPE', 'Release') | |
330 classpath = os.path.join(out_dir, build_type, 'lib.java', | |
331 'binary_size_java.jar') | |
332 cmd = ['java', | |
333 '-classpath', classpath, | |
334 'org.chromium.tools.binary_size.ParallelAddress2Line', | |
335 '--disambiguate', | |
336 '--outfile', outfile, | |
337 '--library', library, | |
338 '--threads', jobs] | |
339 if verbose is True: | |
340 cmd.append('--verbose') | |
341 prefix = os.path.join('third_party', 'android_tools', 'ndk', 'toolchains') | |
342 if arch == 'android-arm': | |
343 prefix = os.path.join(prefix, 'arm-linux-androideabi-4.7', 'prebuilt', | |
344 'linux-x86_64', 'bin', 'arm-linux-androideabi-') | |
345 cmd.extend(['--nm', prefix + 'nm', '--addr2line', prefix + 'addr2line']) | |
346 elif arch == 'android-mips': | |
347 prefix = os.path.join(prefix, 'mipsel-linux-android-4.7', 'prebuilt', | |
348 'linux-x86_64', 'bin', 'mipsel-linux-android-') | |
349 cmd.extend(['--nm', prefix + 'nm', '--addr2line', prefix + 'addr2line']) | |
350 elif arch == 'android-x86': | |
351 prefix = os.path.join(prefix, 'x86-4.7', 'prebuilt', | |
352 'linux-x86_64', 'bin', 'i686-linux-android-') | |
353 cmd.extend(['--nm', prefix + 'nm', '--addr2line', prefix + 'addr2line']) | |
354 # else, use whatever is in PATH (don't pass --nm or --addr2line) | |
355 | |
356 if verbose: | |
357 print cmd | |
358 | |
359 return_code = subprocess.call(cmd) | |
360 if return_code: | |
361 raise RuntimeError('Failed to run ParallelAddress2Line: returned ' + | |
362 str(return_code)) | |
363 | |
364 | |
365 def GetNmSymbols(infile, outfile, library, arch, jobs, verbose): | |
366 if infile is None: | |
367 if outfile is None: | |
368 infile = tempfile.NamedTemporaryFile(delete=False).name | |
369 else: | |
370 infile = outfile | |
371 | |
372 if verbose: | |
373 print 'Running parallel addr2line, dumping symbols to ' + infile; | |
374 RunParallelAddress2Line(outfile=infile, library=library, arch=arch, | |
375 jobs=jobs, verbose=verbose) | |
376 elif verbose: | |
377 print 'Using nm input from ' + infile | |
378 with file(infile, 'r') as infile: | |
379 return list(ParseNm(infile)) | |
380 | |
381 | |
382 def main(): | |
383 usage="""%prog [options] | |
384 | |
385 Runs a spatial analysis on a given library, looking up the source locations | |
386 of its symbols and calculating how much space each directory, source file, | |
387 and so on is taking. The result is a report that can be used to pinpoint | |
388 sources of large portions of the binary, etceteras. | |
389 | |
390 Under normal circumstances, you only need to pass two arguments, thusly: | |
391 | |
392 %prog --library /path/to/library --destdir /path/to/output | |
393 | |
394 In this mode, the program will dump the symbols from the specified library | |
395 and map those symbols back to source locations, producing a web-based | |
396 report in the specified output directory. | |
397 | |
398 Other options are available via '--help'. | |
399 """ | |
400 parser = optparse.OptionParser(usage=usage) | |
401 parser.add_option('--nm-in', metavar='PATH', | |
402 help='if specified, use nm input from <path> instead of ' | |
403 'generating it. Note that source locations should be ' | |
404 'present in the file; i.e., no addr2line symbol lookups ' | |
405 'will be performed when this option is specified. ' | |
406 'Mutually exclusive with --library.') | |
407 parser.add_option('--destdir', metavar='PATH', | |
408 help='write output to the specified directory. An HTML ' | |
409 'report is generated here along with supporting files; ' | |
410 'any existing report will be overwritten.') | |
411 parser.add_option('--library', metavar='PATH', | |
412 help='if specified, process symbols in the library at ' | |
413 'the specified path. Mutually exclusive with --nm-in.') | |
414 parser.add_option('--arch', | |
415 help='the architecture that the library is targeted to. ' | |
416 'Determines which nm/addr2line binaries are used. When ' | |
417 '\'host-native\' is chosen, the program will use whichever ' | |
418 'nm/addr2line binaries are on the PATH. This is ' | |
419 'appropriate when you are analyzing a binary by and for ' | |
420 'your computer. ' | |
421 'This argument is only valid when using --library. ' | |
422 'Default is \'host-native\'.', | |
423 choices=['host-native', 'android-arm', | |
424 'android-mips', 'android-x86'],) | |
425 parser.add_option('--jobs', | |
426 help='number of jobs to use for the parallel ' | |
427 'addr2line processing pool; defaults to 1. More ' | |
428 'jobs greatly improve throughput but eat RAM like ' | |
429 'popcorn, and take several gigabytes each. Start low ' | |
430 'and ramp this number up until your machine begins to ' | |
431 'struggle with RAM. ' | |
432 'This argument is only valid when using --library.') | |
433 parser.add_option('-v', dest='verbose', action='store_true', | |
434 help='be verbose, printing lots of status information.') | |
435 parser.add_option('--nm-out', metavar='PATH', | |
436 help='keep the nm output file, and store it at the ' | |
437 'specified path. This is useful if you want to see the ' | |
438 'fully processed nm output after the symbols have been ' | |
439 'mapped to source locations. By default, a tempfile is ' | |
440 'used and is deleted when the program terminates.' | |
441 'This argument is only valid when using --library.') | |
442 opts, args = parser.parse_args() | |
443 | |
444 if ((not opts.library) and (not opts.nm_in)) or (opts.library and opts.nm_in): | |
445 parser.error('exactly one of --library or --nm-in is required') | |
446 if (opts.nm_in): | |
447 if opts.jobs: | |
448 print >> sys.stderr, ('WARNING: --jobs has no effect ' | |
449 'when used with --nm-in') | |
450 if opts.arch: | |
451 print >> sys.stderr, ('WARNING: --arch has no effect ' | |
452 'when used with --nm-in') | |
453 if not opts.destdir: | |
454 parser.error('--destdir is required argument') | |
455 if not opts.jobs: | |
456 opts.jobs = '1' | |
457 if not opts.arch: | |
458 opts.arch = 'host-native' | |
459 | |
460 symbols = GetNmSymbols(opts.nm_in, opts.nm_out, opts.library, opts.arch, | |
461 opts.jobs, opts.verbose is True) | |
462 if not os.path.exists(opts.destdir): | |
463 os.makedirs(opts.destdir, 0755) | |
464 | |
465 DumpTreemap(symbols, os.path.join(opts.destdir, 'treemap-dump.js')) | |
466 DumpLargestSymbols(symbols, | |
467 os.path.join(opts.destdir, 'largest-symbols.js'), 100) | |
468 DumpLargestSources(symbols, | |
469 os.path.join(opts.destdir, 'largest-sources.js'), 100) | |
470 DumpLargestVTables(symbols, | |
471 os.path.join(opts.destdir, 'largest-vtables.js'), 100) | |
472 | |
473 # TODO(andrewhayden): Switch to D3 for greater flexibility | |
474 treemap_out = os.path.join(opts.destdir, 'webtreemap') | |
475 if not os.path.exists(treemap_out): | |
476 os.makedirs(treemap_out, 0755) | |
477 treemap_src = os.path.join('third_party', 'webtreemap', 'src', | |
478 'webtreemap-gh-pages') | |
479 shutil.copy(os.path.join(treemap_src, 'COPYING'), treemap_out) | |
480 shutil.copy(os.path.join(treemap_src, 'webtreemap.js'), treemap_out) | |
481 shutil.copy(os.path.join(treemap_src, 'webtreemap.css'), treemap_out) | |
482 shutil.copy(os.path.join('tools', 'binary_size', 'template', 'index.html'), | |
483 opts.destdir) | |
484 if opts.verbose: | |
485 print 'Report saved to ' + opts.destdir + '/index.html' | |
486 | |
487 | |
488 if __name__ == '__main__': | |
489 sys.exit(main()) | |
OLD | NEW |