Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(8)

Side by Side Diff: tools/binary_size/run_binary_size_analysis.py

Issue 119083006: Add tool to help analyze binary size (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Remove unnecessary threadsafety from Record.java Created 6 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 #!/usr/bin/python
bulach 2014/01/08 15:04:00 make sure this file has a chmod +x :) I got bitten
Andrew Hayden (chromium.org) 2014/01/08 21:04:10 Thank you for the reminder. I just ran: git update
2 # Copyright 2014 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 """Generate a spatial analysis against an arbitrary library.
7
8 To use, build the 'binary_size_java' target. Then run this tool, passing
9 in the location of the library to be analyzed along with any other options
10 you desire.
11 """
12
13 import fileinput
14 import optparse
15 import os
16 import pprint
17 import re
18 import shutil
19 import subprocess
20 import sys
21 import tempfile
22 import json
bulach 2014/01/08 15:04:00 nit: sort order
Andrew Hayden (chromium.org) 2014/01/08 21:04:10 Done.
23
24 def format_bytes(bytes):
bulach 2014/01/08 15:04:00 chromium's python style guide is a bit different..
Andrew Hayden (chromium.org) 2014/01/08 21:04:10 I cobbled this together from an older Chromium-aut
25 """Pretty-print a number of bytes."""
26 if bytes > 1e6:
27 bytes = bytes / 1.0e6
28 return '%.1fm' % bytes
29 if bytes > 1e3:
30 bytes = bytes / 1.0e3
31 return '%.1fk' % bytes
32 return str(bytes)
33
34
35 def symbol_type_to_human(type):
36 """Convert a symbol type as printed by nm into a human-readable name."""
37 return {
38 'b': 'bss',
39 'd': 'data',
40 'r': 'read-only data',
41 't': 'code',
42 'w': 'weak symbol',
43 'v': 'weak symbol'
44 }[type]
45
46
47 def parse_nm(input):
48 """Parse nm output.
49
50 Argument: an iterable over lines of nm output.
51
52 Yields: (symbol name, symbol type, symbol size, source file path).
53 Path may be None if nm couldn't figure out the source file.
54 """
55
56 # Match lines with size, symbol, optional location, optional discriminator
57 sym_re = re.compile(r'^[0-9a-f]{8} ([0-9a-f]{8}) (.) ([^\t]+)(?:\t(.*):[\d\? ]+)?.*$')
58 # Match lines with addr but no size.
59 addr_re = re.compile(r'^[0-9a-f]{8} (.) ([^\t]+)(?:\t.*)?$')
60 # Match lines that don't have an address at all -- typically external symbol s.
61 noaddr_re = re.compile(r'^ {8} (.) (.*)$')
62
63 for line in input:
64 line = line.rstrip()
65 match = sym_re.match(line)
66 if match:
67 size, type, sym = match.groups()[0:3]
68 size = int(size, 16)
69 type = type.lower()
70 if type == 'v':
71 type = 'w' # just call them all weak
72 if type == 'b':
73 continue # skip all BSS for now
74 path = match.group(4)
75 yield sym, type, size, path
76 continue
77 match = addr_re.match(line)
78 if match:
79 type, sym = match.groups()[0:2]
80 # No size == we don't care.
81 continue
82 match = noaddr_re.match(line)
83 if match:
84 type, sym = match.groups()
85 if type in ('U', 'w'):
86 # external or weak symbol
87 continue
88
89 print >>sys.stderr, 'unparsed:', repr(line)
90
91
92 def treeify_syms(symbols):
93 dirs = {}
94 for sym, type, size, path in symbols:
95 if path:
96 path = os.path.normpath(path)
97 if path.startswith('/'):
98 path = path[1:]
99
100 parts = None
101 # TODO: make segmenting by namespace work.
bulach 2014/01/08 15:04:00 nit: TODO(andrewhayden)
Andrew Hayden (chromium.org) 2014/01/08 21:04:10 Done.
102 if False and '::' in sym:
103 if sym.startswith('vtable for '):
104 sym = sym[len('vtable for '):]
105 parts = sym.split('::')
106 parts.append('[vtable]')
107 else:
108 parts = sym.split('::')
109 parts[0] = '::' + parts[0]
110 elif path and '/' in path:
bulach 2014/01/08 15:04:00 nit: is this test needed? I think split will work
Andrew Hayden (chromium.org) 2014/01/08 21:04:10 Done.
111 parts = path.split('/')
112 elif path:
113 parts = [path]
114
115 if parts:
116 key = parts.pop()
117 tree = dirs
118 try:
119 for part in parts:
120 assert part != ''
121 assert path
122 if part not in tree:
123 tree[part] = {}
124 tree = tree[part]
125 tree[key] = tree.get(key, 0) + size
126 except:
127 print >>sys.stderr, sym, parts, key
128 raise
129 else:
130 key = 'symbols without paths'
131 if key not in dirs:
132 dirs[key] = {}
133 tree = dirs[key]
134 subkey = 'misc'
135 if (sym.endswith('::__FUNCTION__') or
136 sym.endswith('::__PRETTY_FUNCTION__')):
137 subkey = '__FUNCTION__'
138 elif sym.startswith('CSWTCH.'):
139 subkey = 'CSWTCH'
140 elif '::' in sym:
141 subkey = sym[0:sym.find('::') + 2]
142 #else:
143 # print >>sys.stderr, 'unbucketed (no path?):', sym, type, size, path
144 tree[subkey] = tree.get(subkey, 0) + size
145 return dirs
146
147
148 def jsonify_tree(tree, name):
149 children = []
150 total = 0
151 files = 0
152
153 for key, val in tree.iteritems():
154 if isinstance(val, dict):
155 subtree = jsonify_tree(val, key)
156 total += subtree['data']['$area']
157 children.append(subtree)
158 else:
159 total += val
160 children.append({
161 'name': key + ' ' + format_bytes(val),
162 'data': { '$area': val }
163 })
164
165 children.sort(key=lambda child: -child['data']['$area'])
166
167 return {
168 'name': name + ' ' + format_bytes(total),
169 'data': {
170 '$area': total,
171 },
172 'children': children,
173 }
174
175
176 def dump_nm(symbols, outfile):
177 dirs = treeify_syms(symbols)
178 out = sys.stdout
179 if outfile is not None:
180 out = open(outfile, 'w')
181 out.write('var kTree = ' + json.dumps(jsonify_tree(dirs, '/'), indent=2))
182 out.flush()
183 if outfile is not None:
184 out.close()
185
bulach 2014/01/08 15:04:00 nit: need another \n here.
Andrew Hayden (chromium.org) 2014/01/08 21:04:10 Done.
186 def dump_largest_symbols(symbols, outfile, n):
187 # a list of (sym, type, size, path); sort by size.
188 symbols = sorted(symbols, key=lambda x: -x[2])
189 dumped = 0
190 out = sys.stdout
191 if outfile is not None:
192 out = open(outfile, 'w')
193 try:
194 out.write('var largestSymbols = [\n')
195 for sym, type, size, path in symbols:
196 if type in ('b', 'w'):
197 continue # skip bss and weak symbols
198 if path is None:
199 path = ''
200 out.write(' {\'size\': \'' + format_bytes(size) + '\','
201 '\'symbol\': \'' + sym + '\','
202 '\'type\': \'' + symbol_type_to_human(type) + '\','
203 '\'location\': \'' + path + '\'},\n')
bulach 2014/01/08 15:04:00 I think it'd be more readable as: entry = { 'size'
Andrew Hayden (chromium.org) 2014/01/08 21:04:10 Sorry, still adapting to python. Makes sense, fixe
204 dumped += 1
205 if dumped >= n:
206 return
207 finally:
208 out.write('];\n')
209 out.flush()
210 if outfile is not None:
bulach 2014/01/08 15:04:00 if it was None, the previous two lines would've fa
Andrew Hayden (chromium.org) 2014/01/08 21:04:10 No, outfile versus out[stream]. The code always go
211 out.close()
212
bulach 2014/01/08 15:04:00 nit: another \n here (two between top levels), so
Andrew Hayden (chromium.org) 2014/01/08 21:04:10 Done.
213 def make_source_map(symbols):
214 sources = {}
215 for sym, type, size, path in symbols:
216 key = None
217 if path:
218 key = os.path.normpath(path)
219 else:
220 key = '[no path]'
221 if key not in sources:
222 sources[key] = {'path': path, 'symbol_count': 0, 'size': 0}
223 record = sources[key]
224 record['size'] += size
225 record['symbol_count'] += 1
226 return sources
227
228 def dump_largest_sources(symbols, outfile, n):
229 map = make_source_map(symbols)
230 sources = sorted(map.values(), key=lambda x: -x['size'])
231 dumped = 0
232 out = sys.stdout
233 if outfile is not None:
234 out = open(outfile, 'w')
235 try:
236 out.write('var largestSources = [\n')
237 for record in sources:
238 out.write(' {\'size\': \'' + format_bytes(record['size']) + '\','
bulach 2014/01/08 15:04:00 ditto, using json would avoid the "quoting 'hell'
Andrew Hayden (chromium.org) 2014/01/08 21:04:10 Done.
239 '\'symbol_count\': \'' + str(record['symbol_count']) + '\' ,'
240 '\'location\': \'' + record['path'] + '\'},\n')
241 dumped += 1
242 if dumped >= n:
243 return
244 finally:
245 out.write('];\n')
246 out.flush()
247 if outfile is not None:
248 out.close()
249
250
251 def run_pa2l(outfile, library, arch, threads, verbose=False):
bulach 2014/01/08 15:04:00 nit: only called on one place, can remove the defa
Andrew Hayden (chromium.org) 2014/01/08 21:04:10 Done.
252 """Run a parallel addr2line processing engine to dump and resolve symbols"""
253 out_dir = os.getenv('CHROMIUM_OUT_DIR', 'out')
254 buildtype = os.getenv('BUILDTYPE', 'Release')
255 classpath = out_dir + '/' + buildtype + '/lib.java/binary_size_java.jar'
bulach 2014/01/08 15:04:00 nit: classpath = os.path.join(out_dir, build_type
Andrew Hayden (chromium.org) 2014/01/08 21:04:10 Done.
256 cmd = ['java',
257 '-classpath', classpath,
258 'org.chromium.tools.binary_size.ParallelAddress2Line',
259 '--disambiguate',
260 '--outfile', outfile,
261 '--library', library,
262 '--threads', threads]
263 if verbose is True:
264 cmd.append('--verbose')
265 if arch == 'android-arm':
266 cmd.extend([
267 '--nm', 'third_party/android_tools/ndk/toolchains/arm-linux- androideabi-4.7/prebuilt/linux-x86_64/bin/arm-linux-androideabi-nm',
bulach 2014/01/08 15:04:00 nit: it has to be <80cols. since it's already unde
Andrew Hayden (chromium.org) 2014/01/08 21:04:10 I've cleaned this up a bit and now use os.path.joi
268 '--addr2line', 'third_party/android_tools/ndk/toolchains/arm -linux-androideabi-4.7/prebuilt/linux-x86_64/bin/arm-linux-androideabi-addr2line ',
269 ])
270 elif arch == 'android-mips':
271 cmd.extend([
272 '--nm', 'third_party/android_tools/ndk/toolchains/mipsel-lin ux-android-4.7/prebuilt/linux-x86_64/bin/mipsel-linux-android-nm',
273 '--addr2line', 'third_party/android_tools/ndk/toolchains/mip sel-linux-android-4.7/prebuilt/linux-x86_64/bin/mipsel-linux-android-addr2line',
274 ])
275 elif arch == 'android-x86':
276 cmd.extend([
277 '--nm', 'third_party/android_tools/ndk/toolchains/x86-4.7/pr ebuilt/linux-x86_64/bin/i686-linux-android-nm'
278 '--addr2line', 'third_party/android_tools/ndk/toolchains/x86 -4.7/prebuilt/linux-x86_64/bin/i686-linux-android-addr2line',
279 ])
280 # else, use whatever is in PATH (don't pass --nm or --addr2line)
281
282 if verbose:
283 print cmd
284
285 return_code = subprocess.call(cmd)
286 if return_code:
287 raise RuntimeError('Failed to run ParallelAddress2Line: returned ' + str (return_code))
288
289 usage="""%prog [options]
bulach 2014/01/08 15:04:00 this whole block has to be under def main(): ...
Andrew Hayden (chromium.org) 2014/01/08 21:04:10 Done.
290
291 Runs a spatial analysis on a given library, looking up the source locations of
292 its symbols and calculating how much space each directory, source file, and so
293 on is taking. The result is a report that can be used to pinpoint sources of
294 large portions of the binary, etceteras.
295
296 Under normal circumstances, you only need to pass two arguments, thusly:
297
298 %prog --library /path/to/library --destdir /path/to/output
299
300 In this mode, the program will dump the symbols from the specified library and
301 map those symbols back to source locations, producing a web-based report in the
302 specified output directory.
303
304 Other options are available via '--help'.
305 """
306 parser = optparse.OptionParser(usage=usage)
307 parser.add_option('--nm-in', dest='nm_in', metavar='PATH',
308 help='if specified, use nm input from <path> instead of '
309 'generating it. Note that source locations should be present '
310 'in the file; i.e., no addr2line symbol lookups will be '
311 'performed when this option is specified. Mutually exclusive '
312 'with --library.')
313 parser.add_option('--destdir', metavar='PATH',
314 help='write output to the specified directory. An HTML '
315 'report is generated here along with supporting files; any '
316 'existing report will be overwritten.')
317 parser.add_option('--library', metavar='PATH',
318 help='if specified, process symbols in the library at the '
319 'specified path. Mutually exclusive with --nm-in.')
320 parser.add_option('--arch',
321 help='the architecture that the library is targeted to. '
322 'Currently supports the following: '
323 'host-native, android-arm, android-mips, android-x86.'
324 'the default is host-native. This determines '
325 'what nm/addr2line binaries are used. When host-native is '
326 'chosen (the default), the program will use whichever '
327 'nm/addr2line binaries are on the PATH. This is appropriate '
328 'when you are analyzing a binary by and for your computer. '
329 'This argument is only valid when using --library.')
330 parser.add_option('--pa2l-threads', dest='threads',
331 help='number of threads to use for the parallel addr2line '
332 'processing pool; defaults to 1. More threads greatly '
333 'improve throughput but eat RAM like popcorn, and take '
334 'several gigabytes each. Start low and ramp this number up '
335 'until your machine begins to struggle with RAM.'
336 'This argument is only valid when using --library.')
337 parser.add_option('-v', dest='verbose', action='store_true',
338 help='be verbose, printing lots of status information.')
339 parser.add_option('--nm-out', dest='nm_out',
340 help='keep the nm output file, and store it at the specified '
341 'path. This is useful if you want to see the fully processed '
342 'nm output after the symbols have been mapped to source '
343 'locations. By default, a tempfile is used and is deleted '
344 'when the program terminates.'
345 'This argument is only valid when using --library.')
346 opts, args = parser.parse_args()
347
348 if ((not opts.library) and (not opts.nm_in)) or (opts.library and opts.nm_in):
349 parser.error('exactly one of --library or --nm-in is required')
350 if (opts.nm_in):
351 if opts.threads:
352 print >> sys.stderr, ('WARNING: --pa2l-threads has no effect '
353 'when used with --nm-in')
354 if opts.arch:
355 print >> sys.stderr, ('WARNING: --arch has no effect '
356 'when used with --nm-in')
357 if not opts.destdir:
358 parser.error('--destdir is required argument')
359 if not opts.threads:
360 opts.threads = 1
361 if not opts.arch:
362 opts.arch = 'host-native'
363
364 if opts.arch not in ['host-native', 'android-arm',
365 'android-mips', 'android-x86']:
366 parser.error('arch must be one of '
367 '[host-native,android-arm,android-mips,android-x86]')
368
369 nm_in = opts.nm_in
bulach 2014/01/08 15:04:00 369-393 would be better as: symbols = GetNMSymbol
Andrew Hayden (chromium.org) 2014/01/08 21:04:10 Oh yes, I had been meaning to extract this! And mo
370 temp_file = None
371 if nm_in is None:
372 if opts.nm_out is None:
373 temp_file = tempfile.NamedTemporaryFile(prefix='binary_size_nm', delete= False)
374 nm_in = temp_file.name
375 else:
376 nm_in = opts.nm_out
377
378 if opts.verbose:
379 print 'Running parallel addr2line, dumping symbols to ' + nm_in;
380 run_pa2l(outfile=nm_in,
381 library=opts.library,
382 arch=opts.arch,
383 threads=opts.threads,
384 verbose=(opts.verbose is True))
385 elif opts.verbose:
386 print 'Using nm input from ' + nm_in
387
388 if not os.path.exists(opts.destdir):
389 os.makedirs(opts.destdir, 0755)
390
391 nmfile = open(nm_in, 'r')
bulach 2014/01/08 15:04:00 nit: with file(nm_in, 'r') as nm_file: symbols
Andrew Hayden (chromium.org) 2014/01/08 21:04:10 Careful. You are in danger of making me into a hal
392 symbols = list(parse_nm(nmfile))
393 nmfile.close()
394
395 dump_nm(symbols, opts.destdir + '/treemap-dump.js')
396 dump_largest_symbols(symbols, opts.destdir + '/largest-symbols.js', 100)
bulach 2014/01/08 15:04:00 os.path.join in these three places..
Andrew Hayden (chromium.org) 2014/01/08 21:04:10 Done.
397 dump_largest_sources(symbols, opts.destdir + '/largest-sources.js', 100)
398
399 if not os.path.exists(opts.destdir + '/webtreemap.js'):
400 url = 'https://github.com/martine/webtreemap/archive/gh-pages.zip'
bulach 2014/01/08 15:04:00 please, get third-party reviewers approval.. also
Andrew Hayden (chromium.org) 2014/01/08 21:04:10 Will add TODO for the latter part and will email t
401 tmpdir = tempfile.mkdtemp('binary_size')
402 try:
403 cmd = ['wget', '-O', tmpdir + '/webtreemap.zip', url]
404 return_code = subprocess.call(cmd)
405 if return_code:
406 raise RuntimeError('Failed to download: returned ' + str(return_code ))
407 cmd = ['unzip', '-o', tmpdir + '/webtreemap.zip', '-d', tmpdir]
408 return_code = subprocess.call(cmd)
409 if return_code:
410 raise RuntimeError('Failed to unzip: returned ' + str(return_code))
411
412 shutil.move(tmpdir + '/webtreemap-gh-pages/COPYING', opts.destdir)
413 shutil.move(tmpdir + '/webtreemap-gh-pages/webtreemap.js', opts.destdir)
414 shutil.move(tmpdir + '/webtreemap-gh-pages/webtreemap.css', opts.destdir )
415 finally:
416 shutil.rmtree(tmpdir, ignore_errors=True)
417 shutil.copy('tools/binary_size/template/index.html', opts.destdir)
418 if opts.verbose:
419 print 'Report saved to ' + opts.destdir + '/index.html'
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698