Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 #!/usr/bin/python | |
|
bulach
2014/01/08 15:04:00
make sure this file has a chmod +x :)
I got bitten
Andrew Hayden (chromium.org)
2014/01/08 21:04:10
Thank you for the reminder. I just ran:
git update
| |
| 2 # Copyright 2014 The Chromium Authors. All rights reserved. | |
| 3 # Use of this source code is governed by a BSD-style license that can be | |
| 4 # found in the LICENSE file. | |
| 5 | |
| 6 """Generate a spatial analysis against an arbitrary library. | |
| 7 | |
| 8 To use, build the 'binary_size_java' target. Then run this tool, passing | |
| 9 in the location of the library to be analyzed along with any other options | |
| 10 you desire. | |
| 11 """ | |
| 12 | |
| 13 import fileinput | |
| 14 import optparse | |
| 15 import os | |
| 16 import pprint | |
| 17 import re | |
| 18 import shutil | |
| 19 import subprocess | |
| 20 import sys | |
| 21 import tempfile | |
| 22 import json | |
|
bulach
2014/01/08 15:04:00
nit: sort order
Andrew Hayden (chromium.org)
2014/01/08 21:04:10
Done.
| |
| 23 | |
| 24 def format_bytes(bytes): | |
|
bulach
2014/01/08 15:04:00
chromium's python style guide is a bit different..
Andrew Hayden (chromium.org)
2014/01/08 21:04:10
I cobbled this together from an older Chromium-aut
| |
| 25 """Pretty-print a number of bytes.""" | |
| 26 if bytes > 1e6: | |
| 27 bytes = bytes / 1.0e6 | |
| 28 return '%.1fm' % bytes | |
| 29 if bytes > 1e3: | |
| 30 bytes = bytes / 1.0e3 | |
| 31 return '%.1fk' % bytes | |
| 32 return str(bytes) | |
| 33 | |
| 34 | |
| 35 def symbol_type_to_human(type): | |
| 36 """Convert a symbol type as printed by nm into a human-readable name.""" | |
| 37 return { | |
| 38 'b': 'bss', | |
| 39 'd': 'data', | |
| 40 'r': 'read-only data', | |
| 41 't': 'code', | |
| 42 'w': 'weak symbol', | |
| 43 'v': 'weak symbol' | |
| 44 }[type] | |
| 45 | |
| 46 | |
| 47 def parse_nm(input): | |
| 48 """Parse nm output. | |
| 49 | |
| 50 Argument: an iterable over lines of nm output. | |
| 51 | |
| 52 Yields: (symbol name, symbol type, symbol size, source file path). | |
| 53 Path may be None if nm couldn't figure out the source file. | |
| 54 """ | |
| 55 | |
| 56 # Match lines with size, symbol, optional location, optional discriminator | |
| 57 sym_re = re.compile(r'^[0-9a-f]{8} ([0-9a-f]{8}) (.) ([^\t]+)(?:\t(.*):[\d\? ]+)?.*$') | |
| 58 # Match lines with addr but no size. | |
| 59 addr_re = re.compile(r'^[0-9a-f]{8} (.) ([^\t]+)(?:\t.*)?$') | |
| 60 # Match lines that don't have an address at all -- typically external symbol s. | |
| 61 noaddr_re = re.compile(r'^ {8} (.) (.*)$') | |
| 62 | |
| 63 for line in input: | |
| 64 line = line.rstrip() | |
| 65 match = sym_re.match(line) | |
| 66 if match: | |
| 67 size, type, sym = match.groups()[0:3] | |
| 68 size = int(size, 16) | |
| 69 type = type.lower() | |
| 70 if type == 'v': | |
| 71 type = 'w' # just call them all weak | |
| 72 if type == 'b': | |
| 73 continue # skip all BSS for now | |
| 74 path = match.group(4) | |
| 75 yield sym, type, size, path | |
| 76 continue | |
| 77 match = addr_re.match(line) | |
| 78 if match: | |
| 79 type, sym = match.groups()[0:2] | |
| 80 # No size == we don't care. | |
| 81 continue | |
| 82 match = noaddr_re.match(line) | |
| 83 if match: | |
| 84 type, sym = match.groups() | |
| 85 if type in ('U', 'w'): | |
| 86 # external or weak symbol | |
| 87 continue | |
| 88 | |
| 89 print >>sys.stderr, 'unparsed:', repr(line) | |
| 90 | |
| 91 | |
| 92 def treeify_syms(symbols): | |
| 93 dirs = {} | |
| 94 for sym, type, size, path in symbols: | |
| 95 if path: | |
| 96 path = os.path.normpath(path) | |
| 97 if path.startswith('/'): | |
| 98 path = path[1:] | |
| 99 | |
| 100 parts = None | |
| 101 # TODO: make segmenting by namespace work. | |
|
bulach
2014/01/08 15:04:00
nit: TODO(andrewhayden)
Andrew Hayden (chromium.org)
2014/01/08 21:04:10
Done.
| |
| 102 if False and '::' in sym: | |
| 103 if sym.startswith('vtable for '): | |
| 104 sym = sym[len('vtable for '):] | |
| 105 parts = sym.split('::') | |
| 106 parts.append('[vtable]') | |
| 107 else: | |
| 108 parts = sym.split('::') | |
| 109 parts[0] = '::' + parts[0] | |
| 110 elif path and '/' in path: | |
|
bulach
2014/01/08 15:04:00
nit: is this test needed? I think split will work
Andrew Hayden (chromium.org)
2014/01/08 21:04:10
Done.
| |
| 111 parts = path.split('/') | |
| 112 elif path: | |
| 113 parts = [path] | |
| 114 | |
| 115 if parts: | |
| 116 key = parts.pop() | |
| 117 tree = dirs | |
| 118 try: | |
| 119 for part in parts: | |
| 120 assert part != '' | |
| 121 assert path | |
| 122 if part not in tree: | |
| 123 tree[part] = {} | |
| 124 tree = tree[part] | |
| 125 tree[key] = tree.get(key, 0) + size | |
| 126 except: | |
| 127 print >>sys.stderr, sym, parts, key | |
| 128 raise | |
| 129 else: | |
| 130 key = 'symbols without paths' | |
| 131 if key not in dirs: | |
| 132 dirs[key] = {} | |
| 133 tree = dirs[key] | |
| 134 subkey = 'misc' | |
| 135 if (sym.endswith('::__FUNCTION__') or | |
| 136 sym.endswith('::__PRETTY_FUNCTION__')): | |
| 137 subkey = '__FUNCTION__' | |
| 138 elif sym.startswith('CSWTCH.'): | |
| 139 subkey = 'CSWTCH' | |
| 140 elif '::' in sym: | |
| 141 subkey = sym[0:sym.find('::') + 2] | |
| 142 #else: | |
| 143 # print >>sys.stderr, 'unbucketed (no path?):', sym, type, size, path | |
| 144 tree[subkey] = tree.get(subkey, 0) + size | |
| 145 return dirs | |
| 146 | |
| 147 | |
| 148 def jsonify_tree(tree, name): | |
| 149 children = [] | |
| 150 total = 0 | |
| 151 files = 0 | |
| 152 | |
| 153 for key, val in tree.iteritems(): | |
| 154 if isinstance(val, dict): | |
| 155 subtree = jsonify_tree(val, key) | |
| 156 total += subtree['data']['$area'] | |
| 157 children.append(subtree) | |
| 158 else: | |
| 159 total += val | |
| 160 children.append({ | |
| 161 'name': key + ' ' + format_bytes(val), | |
| 162 'data': { '$area': val } | |
| 163 }) | |
| 164 | |
| 165 children.sort(key=lambda child: -child['data']['$area']) | |
| 166 | |
| 167 return { | |
| 168 'name': name + ' ' + format_bytes(total), | |
| 169 'data': { | |
| 170 '$area': total, | |
| 171 }, | |
| 172 'children': children, | |
| 173 } | |
| 174 | |
| 175 | |
| 176 def dump_nm(symbols, outfile): | |
| 177 dirs = treeify_syms(symbols) | |
| 178 out = sys.stdout | |
| 179 if outfile is not None: | |
| 180 out = open(outfile, 'w') | |
| 181 out.write('var kTree = ' + json.dumps(jsonify_tree(dirs, '/'), indent=2)) | |
| 182 out.flush() | |
| 183 if outfile is not None: | |
| 184 out.close() | |
| 185 | |
|
bulach
2014/01/08 15:04:00
nit: need another \n here.
Andrew Hayden (chromium.org)
2014/01/08 21:04:10
Done.
| |
| 186 def dump_largest_symbols(symbols, outfile, n): | |
| 187 # a list of (sym, type, size, path); sort by size. | |
| 188 symbols = sorted(symbols, key=lambda x: -x[2]) | |
| 189 dumped = 0 | |
| 190 out = sys.stdout | |
| 191 if outfile is not None: | |
| 192 out = open(outfile, 'w') | |
| 193 try: | |
| 194 out.write('var largestSymbols = [\n') | |
| 195 for sym, type, size, path in symbols: | |
| 196 if type in ('b', 'w'): | |
| 197 continue # skip bss and weak symbols | |
| 198 if path is None: | |
| 199 path = '' | |
| 200 out.write(' {\'size\': \'' + format_bytes(size) + '\',' | |
| 201 '\'symbol\': \'' + sym + '\',' | |
| 202 '\'type\': \'' + symbol_type_to_human(type) + '\',' | |
| 203 '\'location\': \'' + path + '\'},\n') | |
|
bulach
2014/01/08 15:04:00
I think it'd be more readable as:
entry = { 'size'
Andrew Hayden (chromium.org)
2014/01/08 21:04:10
Sorry, still adapting to python. Makes sense, fixe
| |
| 204 dumped += 1 | |
| 205 if dumped >= n: | |
| 206 return | |
| 207 finally: | |
| 208 out.write('];\n') | |
| 209 out.flush() | |
| 210 if outfile is not None: | |
|
bulach
2014/01/08 15:04:00
if it was None, the previous two lines would've fa
Andrew Hayden (chromium.org)
2014/01/08 21:04:10
No, outfile versus out[stream]. The code always go
| |
| 211 out.close() | |
| 212 | |
|
bulach
2014/01/08 15:04:00
nit: another \n here (two between top levels), so
Andrew Hayden (chromium.org)
2014/01/08 21:04:10
Done.
| |
| 213 def make_source_map(symbols): | |
| 214 sources = {} | |
| 215 for sym, type, size, path in symbols: | |
| 216 key = None | |
| 217 if path: | |
| 218 key = os.path.normpath(path) | |
| 219 else: | |
| 220 key = '[no path]' | |
| 221 if key not in sources: | |
| 222 sources[key] = {'path': path, 'symbol_count': 0, 'size': 0} | |
| 223 record = sources[key] | |
| 224 record['size'] += size | |
| 225 record['symbol_count'] += 1 | |
| 226 return sources | |
| 227 | |
| 228 def dump_largest_sources(symbols, outfile, n): | |
| 229 map = make_source_map(symbols) | |
| 230 sources = sorted(map.values(), key=lambda x: -x['size']) | |
| 231 dumped = 0 | |
| 232 out = sys.stdout | |
| 233 if outfile is not None: | |
| 234 out = open(outfile, 'w') | |
| 235 try: | |
| 236 out.write('var largestSources = [\n') | |
| 237 for record in sources: | |
| 238 out.write(' {\'size\': \'' + format_bytes(record['size']) + '\',' | |
|
bulach
2014/01/08 15:04:00
ditto, using json would avoid the "quoting 'hell'
Andrew Hayden (chromium.org)
2014/01/08 21:04:10
Done.
| |
| 239 '\'symbol_count\': \'' + str(record['symbol_count']) + '\' ,' | |
| 240 '\'location\': \'' + record['path'] + '\'},\n') | |
| 241 dumped += 1 | |
| 242 if dumped >= n: | |
| 243 return | |
| 244 finally: | |
| 245 out.write('];\n') | |
| 246 out.flush() | |
| 247 if outfile is not None: | |
| 248 out.close() | |
| 249 | |
| 250 | |
| 251 def run_pa2l(outfile, library, arch, threads, verbose=False): | |
|
bulach
2014/01/08 15:04:00
nit: only called on one place, can remove the defa
Andrew Hayden (chromium.org)
2014/01/08 21:04:10
Done.
| |
| 252 """Run a parallel addr2line processing engine to dump and resolve symbols""" | |
| 253 out_dir = os.getenv('CHROMIUM_OUT_DIR', 'out') | |
| 254 buildtype = os.getenv('BUILDTYPE', 'Release') | |
| 255 classpath = out_dir + '/' + buildtype + '/lib.java/binary_size_java.jar' | |
|
bulach
2014/01/08 15:04:00
nit:
classpath = os.path.join(out_dir, build_type
Andrew Hayden (chromium.org)
2014/01/08 21:04:10
Done.
| |
| 256 cmd = ['java', | |
| 257 '-classpath', classpath, | |
| 258 'org.chromium.tools.binary_size.ParallelAddress2Line', | |
| 259 '--disambiguate', | |
| 260 '--outfile', outfile, | |
| 261 '--library', library, | |
| 262 '--threads', threads] | |
| 263 if verbose is True: | |
| 264 cmd.append('--verbose') | |
| 265 if arch == 'android-arm': | |
| 266 cmd.extend([ | |
| 267 '--nm', 'third_party/android_tools/ndk/toolchains/arm-linux- androideabi-4.7/prebuilt/linux-x86_64/bin/arm-linux-androideabi-nm', | |
|
bulach
2014/01/08 15:04:00
nit: it has to be <80cols.
since it's already unde
Andrew Hayden (chromium.org)
2014/01/08 21:04:10
I've cleaned this up a bit and now use os.path.joi
| |
| 268 '--addr2line', 'third_party/android_tools/ndk/toolchains/arm -linux-androideabi-4.7/prebuilt/linux-x86_64/bin/arm-linux-androideabi-addr2line ', | |
| 269 ]) | |
| 270 elif arch == 'android-mips': | |
| 271 cmd.extend([ | |
| 272 '--nm', 'third_party/android_tools/ndk/toolchains/mipsel-lin ux-android-4.7/prebuilt/linux-x86_64/bin/mipsel-linux-android-nm', | |
| 273 '--addr2line', 'third_party/android_tools/ndk/toolchains/mip sel-linux-android-4.7/prebuilt/linux-x86_64/bin/mipsel-linux-android-addr2line', | |
| 274 ]) | |
| 275 elif arch == 'android-x86': | |
| 276 cmd.extend([ | |
| 277 '--nm', 'third_party/android_tools/ndk/toolchains/x86-4.7/pr ebuilt/linux-x86_64/bin/i686-linux-android-nm' | |
| 278 '--addr2line', 'third_party/android_tools/ndk/toolchains/x86 -4.7/prebuilt/linux-x86_64/bin/i686-linux-android-addr2line', | |
| 279 ]) | |
| 280 # else, use whatever is in PATH (don't pass --nm or --addr2line) | |
| 281 | |
| 282 if verbose: | |
| 283 print cmd | |
| 284 | |
| 285 return_code = subprocess.call(cmd) | |
| 286 if return_code: | |
| 287 raise RuntimeError('Failed to run ParallelAddress2Line: returned ' + str (return_code)) | |
| 288 | |
| 289 usage="""%prog [options] | |
|
bulach
2014/01/08 15:04:00
this whole block has to be under
def main():
...
Andrew Hayden (chromium.org)
2014/01/08 21:04:10
Done.
| |
| 290 | |
| 291 Runs a spatial analysis on a given library, looking up the source locations of | |
| 292 its symbols and calculating how much space each directory, source file, and so | |
| 293 on is taking. The result is a report that can be used to pinpoint sources of | |
| 294 large portions of the binary, etceteras. | |
| 295 | |
| 296 Under normal circumstances, you only need to pass two arguments, thusly: | |
| 297 | |
| 298 %prog --library /path/to/library --destdir /path/to/output | |
| 299 | |
| 300 In this mode, the program will dump the symbols from the specified library and | |
| 301 map those symbols back to source locations, producing a web-based report in the | |
| 302 specified output directory. | |
| 303 | |
| 304 Other options are available via '--help'. | |
| 305 """ | |
| 306 parser = optparse.OptionParser(usage=usage) | |
| 307 parser.add_option('--nm-in', dest='nm_in', metavar='PATH', | |
| 308 help='if specified, use nm input from <path> instead of ' | |
| 309 'generating it. Note that source locations should be present ' | |
| 310 'in the file; i.e., no addr2line symbol lookups will be ' | |
| 311 'performed when this option is specified. Mutually exclusive ' | |
| 312 'with --library.') | |
| 313 parser.add_option('--destdir', metavar='PATH', | |
| 314 help='write output to the specified directory. An HTML ' | |
| 315 'report is generated here along with supporting files; any ' | |
| 316 'existing report will be overwritten.') | |
| 317 parser.add_option('--library', metavar='PATH', | |
| 318 help='if specified, process symbols in the library at the ' | |
| 319 'specified path. Mutually exclusive with --nm-in.') | |
| 320 parser.add_option('--arch', | |
| 321 help='the architecture that the library is targeted to. ' | |
| 322 'Currently supports the following: ' | |
| 323 'host-native, android-arm, android-mips, android-x86.' | |
| 324 'the default is host-native. This determines ' | |
| 325 'what nm/addr2line binaries are used. When host-native is ' | |
| 326 'chosen (the default), the program will use whichever ' | |
| 327 'nm/addr2line binaries are on the PATH. This is appropriate ' | |
| 328 'when you are analyzing a binary by and for your computer. ' | |
| 329 'This argument is only valid when using --library.') | |
| 330 parser.add_option('--pa2l-threads', dest='threads', | |
| 331 help='number of threads to use for the parallel addr2line ' | |
| 332 'processing pool; defaults to 1. More threads greatly ' | |
| 333 'improve throughput but eat RAM like popcorn, and take ' | |
| 334 'several gigabytes each. Start low and ramp this number up ' | |
| 335 'until your machine begins to struggle with RAM.' | |
| 336 'This argument is only valid when using --library.') | |
| 337 parser.add_option('-v', dest='verbose', action='store_true', | |
| 338 help='be verbose, printing lots of status information.') | |
| 339 parser.add_option('--nm-out', dest='nm_out', | |
| 340 help='keep the nm output file, and store it at the specified ' | |
| 341 'path. This is useful if you want to see the fully processed ' | |
| 342 'nm output after the symbols have been mapped to source ' | |
| 343 'locations. By default, a tempfile is used and is deleted ' | |
| 344 'when the program terminates.' | |
| 345 'This argument is only valid when using --library.') | |
| 346 opts, args = parser.parse_args() | |
| 347 | |
| 348 if ((not opts.library) and (not opts.nm_in)) or (opts.library and opts.nm_in): | |
| 349 parser.error('exactly one of --library or --nm-in is required') | |
| 350 if (opts.nm_in): | |
| 351 if opts.threads: | |
| 352 print >> sys.stderr, ('WARNING: --pa2l-threads has no effect ' | |
| 353 'when used with --nm-in') | |
| 354 if opts.arch: | |
| 355 print >> sys.stderr, ('WARNING: --arch has no effect ' | |
| 356 'when used with --nm-in') | |
| 357 if not opts.destdir: | |
| 358 parser.error('--destdir is required argument') | |
| 359 if not opts.threads: | |
| 360 opts.threads = 1 | |
| 361 if not opts.arch: | |
| 362 opts.arch = 'host-native' | |
| 363 | |
| 364 if opts.arch not in ['host-native', 'android-arm', | |
| 365 'android-mips', 'android-x86']: | |
| 366 parser.error('arch must be one of ' | |
| 367 '[host-native,android-arm,android-mips,android-x86]') | |
| 368 | |
| 369 nm_in = opts.nm_in | |
|
bulach
2014/01/08 15:04:00
369-393 would be better as:
symbols = GetNMSymbol
Andrew Hayden (chromium.org)
2014/01/08 21:04:10
Oh yes, I had been meaning to extract this! And mo
| |
| 370 temp_file = None | |
| 371 if nm_in is None: | |
| 372 if opts.nm_out is None: | |
| 373 temp_file = tempfile.NamedTemporaryFile(prefix='binary_size_nm', delete= False) | |
| 374 nm_in = temp_file.name | |
| 375 else: | |
| 376 nm_in = opts.nm_out | |
| 377 | |
| 378 if opts.verbose: | |
| 379 print 'Running parallel addr2line, dumping symbols to ' + nm_in; | |
| 380 run_pa2l(outfile=nm_in, | |
| 381 library=opts.library, | |
| 382 arch=opts.arch, | |
| 383 threads=opts.threads, | |
| 384 verbose=(opts.verbose is True)) | |
| 385 elif opts.verbose: | |
| 386 print 'Using nm input from ' + nm_in | |
| 387 | |
| 388 if not os.path.exists(opts.destdir): | |
| 389 os.makedirs(opts.destdir, 0755) | |
| 390 | |
| 391 nmfile = open(nm_in, 'r') | |
|
bulach
2014/01/08 15:04:00
nit:
with file(nm_in, 'r') as nm_file:
symbols
Andrew Hayden (chromium.org)
2014/01/08 21:04:10
Careful. You are in danger of making me into a hal
| |
| 392 symbols = list(parse_nm(nmfile)) | |
| 393 nmfile.close() | |
| 394 | |
| 395 dump_nm(symbols, opts.destdir + '/treemap-dump.js') | |
| 396 dump_largest_symbols(symbols, opts.destdir + '/largest-symbols.js', 100) | |
|
bulach
2014/01/08 15:04:00
os.path.join in these three places..
Andrew Hayden (chromium.org)
2014/01/08 21:04:10
Done.
| |
| 397 dump_largest_sources(symbols, opts.destdir + '/largest-sources.js', 100) | |
| 398 | |
| 399 if not os.path.exists(opts.destdir + '/webtreemap.js'): | |
| 400 url = 'https://github.com/martine/webtreemap/archive/gh-pages.zip' | |
|
bulach
2014/01/08 15:04:00
please, get third-party reviewers approval..
also
Andrew Hayden (chromium.org)
2014/01/08 21:04:10
Will add TODO for the latter part and will email t
| |
| 401 tmpdir = tempfile.mkdtemp('binary_size') | |
| 402 try: | |
| 403 cmd = ['wget', '-O', tmpdir + '/webtreemap.zip', url] | |
| 404 return_code = subprocess.call(cmd) | |
| 405 if return_code: | |
| 406 raise RuntimeError('Failed to download: returned ' + str(return_code )) | |
| 407 cmd = ['unzip', '-o', tmpdir + '/webtreemap.zip', '-d', tmpdir] | |
| 408 return_code = subprocess.call(cmd) | |
| 409 if return_code: | |
| 410 raise RuntimeError('Failed to unzip: returned ' + str(return_code)) | |
| 411 | |
| 412 shutil.move(tmpdir + '/webtreemap-gh-pages/COPYING', opts.destdir) | |
| 413 shutil.move(tmpdir + '/webtreemap-gh-pages/webtreemap.js', opts.destdir) | |
| 414 shutil.move(tmpdir + '/webtreemap-gh-pages/webtreemap.css', opts.destdir ) | |
| 415 finally: | |
| 416 shutil.rmtree(tmpdir, ignore_errors=True) | |
| 417 shutil.copy('tools/binary_size/template/index.html', opts.destdir) | |
| 418 if opts.verbose: | |
| 419 print 'Report saved to ' + opts.destdir + '/index.html' | |
| OLD | NEW |