Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(37)

Side by Side Diff: tools/binary_size/run_binary_size_analysis.py

Issue 119083006: Add tool to help analyze binary size (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Add chromium notice to index.html Created 6 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 #!/usr/bin/python
2 # Copyright 2014 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 """Generate a spatial analysis against an arbitrary library.
7
8 To use, build the 'binary_size_tool' target. Then run this tool, passing
9 in the location of the library to be analyzed along with any other options
10 you desire.
11 """
12
13 import fileinput
14 import json
15 import optparse
16 import os
17 import pprint
18 import re
19 import shutil
20 import subprocess
21 import sys
22 import tempfile
23
bulach 2014/01/10 11:23:24 nit: need another \n
Andrew Hayden (chromium.org) 2014/01/16 14:26:49 Done.
24 def FormatBytes(bytes):
25 """Pretty-print a number of bytes."""
26 if bytes > 1e6:
27 bytes = bytes / 1.0e6
28 return '%.1fm' % bytes
29 if bytes > 1e3:
30 bytes = bytes / 1.0e3
31 return '%.1fk' % bytes
32 return str(bytes)
33
34
35 def SymbolTypeToHuman(type):
36 """Convert a symbol type as printed by nm into a human-readable name."""
37 return {'b': 'bss',
38 'd': 'data',
39 'r': 'read-only data',
40 't': 'code',
41 'w': 'weak symbol',
42 'v': 'weak symbol'}[type]
43
44
45 def ParseNm(input):
46 """Parse nm output.
47
48 Argument: an iterable over lines of nm output.
49
50 Yields: (symbol name, symbol type, symbol size, source file path).
51 Path may be None if nm couldn't figure out the source file.
52 """
53
54 # Match lines with size, symbol, optional location, optional discriminator
55 sym_re = re.compile(r'^[0-9a-f]{8} ' # address (8 hex digits)
56 '([0-9a-f]{8}) ' # size (8 hex digits)
57 '(.) ' # symbol type, one character
58 '([^\t]+)' # symbol name, separated from next by tab
59 '(?:\t(.*):[\d\?]+)?.*$') # location
60 # Match lines with addr but no size.
61 addr_re = re.compile(r'^[0-9a-f]{8} (.) ([^\t]+)(?:\t.*)?$')
62 # Match lines that don't have an address at all -- typically external symbols.
63 noaddr_re = re.compile(r'^ {8} (.) (.*)$')
64
65 for line in input:
66 line = line.rstrip()
67 match = sym_re.match(line)
68 if match:
69 size, type, sym = match.groups()[0:3]
70 size = int(size, 16)
71 type = type.lower()
72 if type == 'v':
73 type = 'w' # just call them all weak
74 if type == 'b':
75 continue # skip all BSS for now
76 path = match.group(4)
77 yield sym, type, size, path
78 continue
79 match = addr_re.match(line)
80 if match:
81 type, sym = match.groups()[0:2]
82 # No size == we don't care.
83 continue
84 match = noaddr_re.match(line)
85 if match:
86 type, sym = match.groups()
87 if type in ('U', 'w'):
88 # external or weak symbol
89 continue
90
91 print >>sys.stderr, 'unparsed:', repr(line)
92
93
94 def TreeifySymbols(symbols):
95 """Convert symbols into a path-based tree, calculating size information
96 along the way.
97
98 The result is a dictionary that contains two kinds of nodes:
99 1. Leaf nodes, representing source code locations (e.g., c++ files)
100 These nodes have the following dictionary entries:
101 sizes: a dictionary whose keys are categories (such as code, data,
102 vtable, etceteras) and whose values are the size, in bytes, of
103 those categories;
104 size: the total size, in bytes, of all the entries in the sizes dict
105 2. Non-leaf nodes, representing directories
106 These nodes have the following dictionary entries:
107 children: a dictionary whose keys are names (path entries; either
108 directory or file names) and whose values are other nodes;
109 size: the total size, in bytes, of all the leaf nodes that are
110 contained within the children dict (recursively expanded)
111
112 The result object is itself a dictionary that represents the common ancestor
113 of all child nodes, e.g. a path to which all other nodes beneath it are
114 relative. The 'size' attribute of this dict yields the sum of the size of all
115 leaf nodes within the data structure.
116 """
117 dirs = {'children': {}, 'size': 0}
118 for sym, type, size, path in symbols:
119 dirs['size'] += size
120 if path:
121 path = os.path.normpath(path)
122 if path.startswith('/'):
123 path = path[1:]
124
125 parts = None
126 if path:
127 parts = path.split('/')
128
129 if parts:
130 assert path
131 fileKey = parts.pop()
132 tree = dirs
133 try:
134 # Traverse the tree to the parent of the file node, creating as needed
135 for part in parts:
136 assert part != ''
137 if part not in tree['children']:
138 tree['children'][part] = {'children': {}, 'size': 0}
139 tree = tree['children'][part]
140 tree['size'] += size
141
142 # Get (creating if necessary) the node for the file
143 # This node doesn't have a 'children' attribute
144 if fileKey not in tree['children']:
bulach 2014/01/10 11:23:24 nit: file_key
Andrew Hayden (chromium.org) 2014/01/16 14:26:49 Done.
145 tree['children'][fileKey] = {'sizes': {}, 'size': 0}
bulach 2014/01/10 11:23:24 nit: would be simpler with "import collections" ab
Andrew Hayden (chromium.org) 2014/01/16 14:26:49 I had never heard of defaultdict, but yes, this ma
146 tree = tree['children'][fileKey]
147 tree['size'] += size
148
149 # Accumulate size into a bucket within the file
150 if 'vtable for ' in sym:
151 if not '[vtable]' in tree['sizes']:
152 tree['sizes']['[vtable]'] = 0
153 tree['sizes']['[vtable]'] += size
154 elif 'r' == type or 'R' == type:
155 if not '[rodata]' in tree['sizes']:
156 tree['sizes']['[rodata]'] = 0
157 tree['sizes']['[rodata]'] += size
158 elif 'd' == type or 'D' == type:
159 if not '[data]' in tree['sizes']:
160 tree['sizes']['[data]'] = 0
161 tree['sizes']['[data]'] += size
162 elif 'b' == type or 'B' == type:
163 if not '[bss]' in tree['sizes']:
164 tree['sizes']['[bss]'] = 0
165 tree['sizes']['[bss]'] += size
166 elif 't' == type or 'T' == type:
167 # 'text' in binary parlance means 'code'.
168 if not '[code]' in tree['sizes']:
169 tree['sizes']['[code]'] = 0
170 tree['sizes']['[code]'] += size
171 elif 'w' == type or 'W' == type:
172 if not '[weak]' in tree['sizes']:
173 tree['sizes']['[weak]'] = 0
174 tree['sizes']['[weak]'] += size
175 else:
176 if not '[other]' in tree['sizes']:
177 tree['sizes']['[other]'] = 0
178 tree['sizes']['[other]'] += size
179 except:
180 print >>sys.stderr, sym, parts, key
181 raise
182 else:
183 key = 'symbols without paths'
184 if key not in dirs['children']:
185 dirs['children'][key] = {'sizes': {}, 'size': 0}
186 tree = dirs['children'][key]
187 subkey = 'misc'
188 if (sym.endswith('::__FUNCTION__') or
189 sym.endswith('::__PRETTY_FUNCTION__')):
190 subkey = '__FUNCTION__'
191 elif sym.startswith('CSWTCH.'):
192 subkey = 'CSWTCH'
193 elif '::' in sym:
194 subkey = sym[0:sym.find('::') + 2]
195 tree['sizes'][subkey] = tree['sizes'].get(subkey, 0) + size
196 tree['size'] += size
197 return dirs
198
199
200 def JsonifyTree(tree, name):
201 """ Convert the output of TreeifySymbols to a format suitable for a
bulach 2014/01/10 11:23:24 nit: (g)pylint would complain, the first line of t
Andrew Hayden (chromium.org) 2014/01/16 14:26:49 Done.
202 JSON treemap. The format is very similar, with the notable exceptions being
203 lists of children instead of maps, and some different attribute names."""
204 children = []
205 if 'children' in tree:
206 # Non-leaf node. Recurse.
207 for childName, child in tree['children'].iteritems():
bulach 2014/01/10 11:23:24 nit: child_name
Andrew Hayden (chromium.org) 2014/01/16 14:26:49 Done.
208 children.append(JsonifyTree(child, childName))
209 else:
210 # Leaf node; dump per-file stats as entries in the treemap
211 for kind, size in tree['sizes'].iteritems():
212 childJson = {'name': kind + ' (' + FormatBytes(size) + ')',
bulach 2014/01/10 11:23:24 nit: child_json
Andrew Hayden (chromium.org) 2014/01/16 14:26:49 Done.
213 'data': { '$area': size }}
214 cssClass = None
bulach 2014/01/10 11:23:24 nit: css_class, but again, this would be simpler w
Andrew Hayden (chromium.org) 2014/01/16 14:26:49 Done.
215 if kind == '[vtable]': cssClass='vtable'
216 elif kind == '[rodata]': cssClass='read-only_data'
217 elif kind == '[data]': cssClass='data'
218 elif kind == '[bss]': cssClass='bss'
219 elif kind == '[code]': cssClass='code'
220 elif kind == '[weak]': cssClass='weak_symbol'
221 if cssClass is not None: childJson['data']['$symbol'] = cssClass
222 children.append(childJson)
223 # Sort children by size, largest to smallest.
224 children.sort(key=lambda child: -child['data']['$area'])
225
226 # For leaf nodes, the 'size' attribute is the size of the leaf;
227 # Non-leaf nodes don't really have a size, but their 'size' attribute is
228 # the sum of the sizes of all their children.
229 return {'name': name + ' (' + FormatBytes(tree['size']) + ')',
230 'data': { '$area': tree['size'] },
231 'children': children }
232
233
234 def DumpTreemap(symbols, outfile):
235 dirs = TreeifySymbols(symbols)
236 out = open(outfile, 'w')
237 try:
238 out.write('var kTree = ' + json.dumps(JsonifyTree(dirs, '/')))
239 finally:
240 out.flush()
241 out.close()
242
243
244 def DumpLargestSymbols(symbols, outfile, n):
245 # a list of (sym, type, size, path); sort by size.
246 symbols = sorted(symbols, key=lambda x: -x[2])
247 dumped = 0
248 out = open(outfile, 'w')
249 try:
250 out.write('var largestSymbols = [\n')
251 for sym, type, size, path in symbols:
252 if type in ('b', 'w'):
253 continue # skip bss and weak symbols
254 if path is None:
255 path = ''
256 entry = {'size': FormatBytes(size),
257 'symbol': sym,
258 'type': SymbolTypeToHuman(type),
259 'location': path }
260 out.write(json.dumps(entry))
261 out.write(',\n')
262 dumped += 1
263 if dumped >= n:
264 return
265 finally:
266 out.write('];\n')
267 out.flush()
268 out.close()
269
270
271 def MakeSourceMap(symbols):
272 sources = {}
273 for sym, type, size, path in symbols:
274 key = None
275 if path:
276 key = os.path.normpath(path)
277 else:
278 key = '[no path]'
279 if key not in sources:
280 sources[key] = {'path': path, 'symbol_count': 0, 'size': 0}
281 record = sources[key]
282 record['size'] += size
283 record['symbol_count'] += 1
284 return sources
285
286
287 def DumpLargestSources(symbols, outfile, n):
288 map = MakeSourceMap(symbols)
289 sources = sorted(map.values(), key=lambda x: -x['size'])
290 dumped = 0
291 out = open(outfile, 'w')
292 try:
293 out.write('var largestSources = [\n')
294 for record in sources:
295 entry = {'size': FormatBytes(record['size']),
296 'symbol_count': str(record['symbol_count']),
297 'location': record['path']}
298 out.write(json.dumps(entry))
299 out.write(',\n')
300 dumped += 1
301 if dumped >= n:
302 return
303 finally:
304 out.write('];\n')
305 out.flush()
306 out.close()
307
308
309 def DumpLargestVTables(symbols, outfile, n):
310 vtables = []
311 for symbol, type, size, path in symbols:
312 if 'vtable for ' in symbol:
313 vtables.append({'symbol': symbol, 'path': path, 'size': size})
314 vtables = sorted(vtables, key=lambda x: -x['size'])
315 dumped = 0
316 out = open(outfile, 'w')
317 try:
318 out.write('var largestVTables = [\n')
319 for record in vtables:
320 entry = {'size': FormatBytes(record['size']),
321 'symbol': record['symbol'],
322 'location': record['path']}
323 out.write(json.dumps(entry))
324 out.write(',\n')
325 dumped += 1
326 if dumped >= n:
327 return
328 finally:
329 out.write('];\n')
330 out.flush()
331 out.close()
332
333
334 def RunPA2L(outfile, library, arch, threads, verbose):
bulach 2014/01/10 11:23:24 nit: perhaps RunParallelAddress2Line would be clea
Andrew Hayden (chromium.org) 2014/01/16 14:26:49 Done.
335 """Run a parallel addr2line processing engine to dump and resolve symbols"""
336 out_dir = os.getenv('CHROMIUM_OUT_DIR', 'out')
337 build_type = os.getenv('BUILDTYPE', 'Release')
338 classpath = os.path.join(out_dir, build_type, 'lib.java',
339 'binary_size_java.jar')
340 cmd = ['java',
341 '-classpath', classpath,
342 'org.chromium.tools.binary_size.ParallelAddress2Line',
343 '--disambiguate',
344 '--outfile', outfile,
345 '--library', library,
346 '--threads', threads]
347 if verbose is True:
348 cmd.append('--verbose')
349 prefix = os.path.join('third_party', 'android_tools', 'ndk', 'toolchains')
350 if arch == 'android-arm':
351 prefix = os.path.join(prefix, 'arm-linux-androideabi-4.7', 'prebuilt',
352 'linux-x86_64', 'bin', 'arm-linux-androideabi-')
353 cmd.extend(['--nm', prefix + 'nm', '--addr2line', prefix + 'addr2line'])
354 elif arch == 'android-mips':
355 prefix = os.path.join(prefix, 'mipsel-linux-android-4.7', 'prebuilt',
356 'linux-x86_64', 'bin', 'mipsel-linux-android-')
357 cmd.extend(['--nm', prefix + 'nm', '--addr2line', prefix + 'addr2line'])
358 elif arch == 'android-x86':
359 prefix = os.path.join(prefix, 'x86-4.7', 'prebuilt',
360 'linux-x86_64', 'bin', 'i686-linux-android-')
361 cmd.extend(['--nm', prefix + 'nm', '--addr2line', prefix + 'addr2line'])
362 # else, use whatever is in PATH (don't pass --nm or --addr2line)
363
364 if verbose:
365 print cmd
366
367 return_code = subprocess.call(cmd)
368 if return_code:
369 raise RuntimeError('Failed to run ParallelAddress2Line: returned ' +
370 str(return_code))
371
372
373 def GetNmSymbols(infile, outfile, library, arch, threads, verbose):
374 if infile is None:
375 if outfile is None:
376 infile = tempfile.NamedTemporaryFile(delete=False).name
377 else:
378 infile = outfile
379
380 if verbose:
381 print 'Running parallel addr2line, dumping symbols to ' + infile;
382 RunPA2L(outfile=infile, library=library, arch=arch,
383 threads=threads, verbose=verbose)
384 elif verbose:
385 print 'Using nm input from ' + infile
386 with file(infile, 'r') as infile:
387 return list(ParseNm(infile))
388
389
390 def main():
391 usage="""%prog [options]
392
393 Runs a spatial analysis on a given library, looking up the source locations
394 of its symbols and calculating how much space each directory, source file,
395 and so on is taking. The result is a report that can be used to pinpoint
396 sources of large portions of the binary, etceteras.
397
398 Under normal circumstances, you only need to pass two arguments, thusly:
399
400 %prog --library /path/to/library --destdir /path/to/output
401
402 In this mode, the program will dump the symbols from the specified library
403 and map those symbols back to source locations, producing a web-based
404 report in the specified output directory.
405
406 Other options are available via '--help'.
407 """
408 parser = optparse.OptionParser(usage=usage)
409 parser.add_option('--nm-in', dest='nm_in', metavar='PATH',
bulach 2014/01/10 11:23:24 nit: dest is automatically computed out of name, n
Andrew Hayden (chromium.org) 2014/01/16 14:26:49 Done.
410 help='if specified, use nm input from <path> instead of '
411 'generating it. Note that source locations should be '
412 'present in the file; i.e., no addr2line symbol lookups '
413 'will be performed when this option is specified. '
414 'Mutually exclusive with --library.')
415 parser.add_option('--destdir', metavar='PATH',
416 help='write output to the specified directory. An HTML '
417 'report is generated here along with supporting files; '
418 'any existing report will be overwritten.')
419 parser.add_option('--library', metavar='PATH',
420 help='if specified, process symbols in the library at '
421 'the specified path. Mutually exclusive with --nm-in.')
422 parser.add_option('--arch',
423 help='the architecture that the library is targeted to. '
424 'Currently supports the following: '
425 'host-native, android-arm, android-mips, android-x86.'
426 'the default is host-native. This determines '
427 'what nm/addr2line binaries are used. When host-native '
428 'is chosen (the default), the program will use whichever '
429 'nm/addr2line binaries are on the PATH. This is '
430 'appropriate when you are analyzing a binary by and for '
431 'your computer. '
432 'This argument is only valid when using --library.')
bulach 2014/01/10 11:23:24 nit: default='host-native' also, could do with ch
Andrew Hayden (chromium.org) 2014/01/16 14:26:49 We don't just use the default because I emit warni
433 parser.add_option('--pa2l-threads', dest='threads',
bulach 2014/01/10 11:23:24 nit: perhaps just --jobs ? I think that's more inl
Andrew Hayden (chromium.org) 2014/01/16 14:26:49 Same discussion about defaults in the face of --nm
434 help='number of threads to use for the parallel '
435 'addr2line processing pool; defaults to 1. More '
436 'threads greatly improve throughput but eat RAM like '
437 'popcorn, and take several gigabytes each. Start low '
438 'and ramp this number up until your machine begins to '
439 'struggle with RAM. '
440 'This argument is only valid when using --library.')
441 parser.add_option('-v', dest='verbose', action='store_true',
442 help='be verbose, printing lots of status information.')
443 parser.add_option('--nm-out', dest='nm_out',
bulach 2014/01/10 11:23:24 nit: remove dest
Andrew Hayden (chromium.org) 2014/01/16 14:26:49 Done.
444 help='keep the nm output file, and store it at the '
445 'specified path. This is useful if you want to see the '
446 'fully processed nm output after the symbols have been '
447 'mapped to source locations. By default, a tempfile is '
448 'used and is deleted when the program terminates.'
449 'This argument is only valid when using --library.')
450 opts, args = parser.parse_args()
451
452 if ((not opts.library) and (not opts.nm_in)) or (opts.library and opts.nm_in):
453 parser.error('exactly one of --library or --nm-in is required')
454 if (opts.nm_in):
455 if opts.threads:
456 print >> sys.stderr, ('WARNING: --pa2l-threads has no effect '
457 'when used with --nm-in')
458 if opts.arch:
459 print >> sys.stderr, ('WARNING: --arch has no effect '
460 'when used with --nm-in')
461 if not opts.destdir:
462 parser.error('--destdir is required argument')
463 if not opts.threads:
464 opts.threads = 1
465 if not opts.arch:
466 opts.arch = 'host-native'
bulach 2014/01/10 11:23:24 nit: as above, remove the 463-466..
Andrew Hayden (chromium.org) 2014/01/16 14:26:49 I don't know how to do this easily while retaining
467
468 if opts.arch not in ['host-native', 'android-arm',
469 'android-mips', 'android-x86']:
470 parser.error('arch must be one of '
471 '[host-native,android-arm,android-mips,android-x86]')
bulach 2014/01/10 11:23:24 as above, make it a choices
Andrew Hayden (chromium.org) 2014/01/16 14:26:49 Done.
472
473 symbols = GetNmSymbols(opts.nm_in, opts.nm_out, opts.library, opts.arch,
474 opts.threads, opts.verbose is True)
475 if not os.path.exists(opts.destdir):
476 os.makedirs(opts.destdir, 0755)
477 DumpTreemap(symbols, os.path.join(opts.destdir, 'treemap-dump.js'))
478 DumpLargestSymbols(symbols,
479 os.path.join(opts.destdir, 'largest-symbols.js'), 100)
480 DumpLargestSources(symbols,
481 os.path.join(opts.destdir, 'largest-sources.js'), 100)
482 DumpLargestVTables(symbols,
483 os.path.join(opts.destdir, 'largest-vtables.js'), 100)
484
485 # TODO(andrewhayden): Switch to d3 and/or mirror webtreemap project
486 if not os.path.exists(os.path.join(opts.destdir, 'webtreemap.js')):
487 url = 'https://github.com/martine/webtreemap/archive/gh-pages.zip'
488 tmpdir = tempfile.mkdtemp('binary_size')
489 zipfile = os.path.join(tmpdir, 'webtreemap.zip')
490 try:
491 cmd = ['wget', '-O', zipfile, url]
492 return_code = subprocess.call(cmd)
493 if return_code:
494 raise RuntimeError('Failed to download: returned ' + str(return_code))
495 cmd = ['unzip', '-o', zipfile, '-d', tmpdir]
496 return_code = subprocess.call(cmd)
497 if return_code:
498 raise RuntimeError('Failed to unzip: returned ' + str(return_code))
499 shutil.move(os.path.join(tmpdir, 'webtreemap-gh-pages', 'COPYING'),
500 opts.destdir)
501 shutil.move(os.path.join(tmpdir, 'webtreemap-gh-pages', 'webtreemap.js'),
502 opts.destdir)
503 shutil.move(os.path.join(tmpdir, 'webtreemap-gh-pages', 'webtreemap.css'),
504 opts.destdir)
505 finally:
506 shutil.rmtree(tmpdir, ignore_errors=True)
507 shutil.copy(os.path.join('tools', 'binary_size', 'template', 'index.html'),
508 opts.destdir)
509 if opts.verbose:
510 print 'Report saved to ' + opts.destdir + '/index.html'
511
512
513 if __name__ == '__main__':
514 sys.exit(main())
OLDNEW
« no previous file with comments | « tools/binary_size/java/src/org/chromium/tools/binary_size/Record.java ('k') | tools/binary_size/template/.gitignore » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698