tools/binary_size/run_binary_size_analysis.py - Issue 119083006: Add tool to help analyze binary size

Side by Side Diff: tools/binary_size/run_binary_size_analysis.py

Issue 119083006: Add tool to help analyze binary size (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Marcus' comments Created 6 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 #!/usr/bin/python

	2 # Copyright 2014 The Chromium Authors. All rights reserved.

	3 # Use of this source code is governed by a BSD-style license that can be

	4 # found in the LICENSE file.

	5

	6 """Generate a spatial analysis against an arbitrary library.

	7

	8 To use, build the 'binary_size_tool' target. Then run this tool, passing

	9 in the location of the library to be analyzed along with any other options

	10 you desire.

	11 """

	12

	13 import collections

	14 import fileinput

	15 import json

	16 import optparse

	17 import os

	18 import pprint

	19 import re

	20 import shutil

	21 import subprocess

	22 import sys

	23 import tempfile

	24

	25

	26 def FormatBytes(bytes):

	27 """Pretty-print a number of bytes."""

	28 if bytes > 1e6:

	29 bytes = bytes / 1.0e6

	30 return '%.1fm' % bytes

	31 if bytes > 1e3:

	32 bytes = bytes / 1.0e3

	33 return '%.1fk' % bytes

	34 return str(bytes)

	35

	36

	37 def SymbolTypeToHuman(type):

	38 """Convert a symbol type as printed by nm into a human-readable name."""

	39 return {'b': 'bss',

	40 'd': 'data',

	41 'r': 'read-only data',

	42 't': 'code',

	43 'w': 'weak symbol',

	44 'v': 'weak symbol'}[type]

	45

	46

	47 def ParseNm(input):

	48 """Parse nm output.

	49

	50 Argument: an iterable over lines of nm output.

	51

	52 Yields: (symbol name, symbol type, symbol size, source file path).

	53 Path may be None if nm couldn't figure out the source file.

	54 """

	55

	56 # Match lines with size, symbol, optional location, optional discriminator

	57 sym_re = re.compile(r'^[0-9a-f]{8} ' # address (8 hex digits)

	58 '([0-9a-f]{8}) ' # size (8 hex digits)

	59 '(.) ' # symbol type, one character

	60 '([^\t]+)' # symbol name, separated from next by tab

	61 '(?:\t(.):[\d\?]+)?.$') # location

	62 # Match lines with addr but no size.

	63 addr_re = re.compile(r'^[0-9a-f]{8} (.) ([^\t]+)(?:\t.*)?$')

	64 # Match lines that don't have an address at all -- typically external symbols.

	65 noaddr_re = re.compile(r'^ {8} (.) (.*)$')

	66

	67 for line in input:

	68 line = line.rstrip()

	69 match = sym_re.match(line)

	70 if match:

	71 size, type, sym = match.groups()[0:3]

	72 size = int(size, 16)

	73 type = type.lower()

	74 if type == 'v':

	75 type = 'w' # just call them all weak

	76 if type == 'b':

	77 continue # skip all BSS for now

	78 path = match.group(4)

	79 yield sym, type, size, path

	80 continue

	81 match = addr_re.match(line)

	82 if match:

	83 type, sym = match.groups()[0:2]

	84 # No size == we don't care.

	85 continue

	86 match = noaddr_re.match(line)

	87 if match:

	88 type, sym = match.groups()

	89 if type in ('U', 'w'):

	90 # external or weak symbol

	91 continue

	92

	93 print >>sys.stderr, 'unparsed:', repr(line)

	94

	95

	96 def TreeifySymbols(symbols):

	97 """Convert symbols into a path-based tree, calculating size information

	98 along the way.

	99

	100 The result is a dictionary that contains two kinds of nodes:

	101 1. Leaf nodes, representing source code locations (e.g., c++ files)

	102 These nodes have the following dictionary entries:

	103 sizes: a dictionary whose keys are categories (such as code, data,

	104 vtable, etceteras) and whose values are the size, in bytes, of

	105 those categories;

	106 size: the total size, in bytes, of all the entries in the sizes dict

	107 2. Non-leaf nodes, representing directories

	108 These nodes have the following dictionary entries:

	109 children: a dictionary whose keys are names (path entries; either

	110 directory or file names) and whose values are other nodes;

	111 size: the total size, in bytes, of all the leaf nodes that are

	112 contained within the children dict (recursively expanded)

	113

	114 The result object is itself a dictionary that represents the common ancestor

	115 of all child nodes, e.g. a path to which all other nodes beneath it are

	116 relative. The 'size' attribute of this dict yields the sum of the size of all

	117 leaf nodes within the data structure.

	118 """

	119 dirs = {'children': {}, 'size': 0}

	120 for sym, type, size, path in symbols:

	121 dirs['size'] += size

	122 if path:

	123 path = os.path.normpath(path)

	124 if path.startswith('/'):

	125 path = path[1:]

	126

	127 parts = None

	128 if path:

	129 parts = path.split('/')

	130

	131 if parts:

	132 assert path

	133 file_key = parts.pop()

	134 tree = dirs

	135 try:

	136 # Traverse the tree to the parent of the file node, creating as needed

	137 for part in parts:

	138 assert part != ''

	139 if part not in tree['children']:

	140 tree['children'][part] = {'children': {}, 'size': 0}

	141 tree = tree['children'][part]

	142 tree['size'] += size

	143

	144 # Get (creating if necessary) the node for the file

	145 # This node doesn't have a 'children' attribute

	146 if file_key not in tree['children']:

	147 tree['children'][file_key] = {'sizes': collections.defaultdict(int),

	148 'size': 0}

	149 tree = tree['children'][file_key]

	150 tree['size'] += size

	151

	152 # Accumulate size into a bucket within the file

	153 type = type.lower()

	154 if 'vtable for ' in sym:

	155 tree['sizes']['[vtable]'] += size

	156 elif 'r' == type:

	157 tree['sizes']['[rodata]'] += size

	158 elif 'd' == type:

	159 tree['sizes']['[data]'] += size

	160 elif 'b' == type:

	161 tree['sizes']['[bss]'] += size

	162 elif 't' == type:

	163 # 'text' in binary parlance means 'code'.

	164 tree['sizes']['[code]'] += size

	165 elif 'w' == type:

	166 tree['sizes']['[weak]'] += size

	167 else:

	168 tree['sizes']['[other]'] += size

	169 except:

	170 print >>sys.stderr, sym, parts, key

	171 raise

	172 else:

	173 key = 'symbols without paths'

	174 if key not in dirs['children']:

	175 dirs['children'][key] = {'sizes': collections.defaultdict(int),

	176 'size': 0}

	177 tree = dirs['children'][key]

	178 subkey = 'misc'

	179 if (sym.endswith('::__FUNCTION__') or

	180 sym.endswith('::__PRETTY_FUNCTION__')):

	181 subkey = '__FUNCTION__'

	182 elif sym.startswith('CSWTCH.'):

	183 subkey = 'CSWTCH'

	184 elif '::' in sym:

	185 subkey = sym[0:sym.find('::') + 2]

	186 tree['sizes'][subkey] = tree['sizes'].get(subkey, 0) + size

	187 tree['size'] += size

	188 return dirs

	189

	190

	191 def JsonifyTree(tree, name):

	192 """Convert TreeifySymbols output to a JSON treemap.

	193

	194 The format is very similar, with the notable exceptions being

	195 lists of children instead of maps and some different attribute names."""

	196 children = []

	197 css_class_map = {

	198 '[vtable]': 'vtable',

	199 '[rodata]': 'read-only_data',

	200 '[data]': 'data',

	201 '[bss]': 'bss',

	202 '[code]': 'code',

	203 '[weak]': 'weak_symbol'

	204 }

	205 if 'children' in tree:

	206 # Non-leaf node. Recurse.

	207 for child_name, child in tree['children'].iteritems():

	208 children.append(JsonifyTree(child, child_name))

	209 else:

	210 # Leaf node; dump per-file stats as entries in the treemap

	211 for kind, size in tree['sizes'].iteritems():

	212 child_json = {'name': kind + ' (' + FormatBytes(size) + ')',

	213 'data': { '$area': size }}

	214 css_class = css_class_map.get(kind)

	215 if css_class is not None: child_json['data']['$symbol'] = css_class

	216 children.append(child_json)

	217 # Sort children by size, largest to smallest.

	218 children.sort(key=lambda child: -child['data']['$area'])

	219

	220 # For leaf nodes, the 'size' attribute is the size of the leaf;

	221 # Non-leaf nodes don't really have a size, but their 'size' attribute is

	222 # the sum of the sizes of all their children.

	223 return {'name': name + ' (' + FormatBytes(tree['size']) + ')',

	224 'data': { '$area': tree['size'] },

	225 'children': children }

	226

	227

	228 def DumpTreemap(symbols, outfile):

	229 dirs = TreeifySymbols(symbols)

	230 out = open(outfile, 'w')

	231 try:

	232 out.write('var kTree = ' + json.dumps(JsonifyTree(dirs, '/')))

	233 finally:

	234 out.flush()

	235 out.close()

	236

	237

	238 def DumpLargestSymbols(symbols, outfile, n):

	239 # a list of (sym, type, size, path); sort by size.

	240 symbols = sorted(symbols, key=lambda x: -x[2])

	241 dumped = 0

	242 out = open(outfile, 'w')

	243 try:

	244 out.write('var largestSymbols = [\n')

	245 for sym, type, size, path in symbols:

	246 if type in ('b', 'w'):

	247 continue # skip bss and weak symbols

	248 if path is None:

	249 path = ''

	250 entry = {'size': FormatBytes(size),

	251 'symbol': sym,

	252 'type': SymbolTypeToHuman(type),

	253 'location': path }

	254 out.write(json.dumps(entry))

	255 out.write(',\n')

	256 dumped += 1

	257 if dumped >= n:

	258 return

	259 finally:

	260 out.write('];\n')

	261 out.flush()

	262 out.close()

	263

	264

	265 def MakeSourceMap(symbols):

	266 sources = {}

	267 for sym, type, size, path in symbols:

	268 key = None

	269 if path:

	270 key = os.path.normpath(path)

	271 else:

	272 key = '[no path]'

	273 if key not in sources:

	274 sources[key] = {'path': path, 'symbol_count': 0, 'size': 0}

	275 record = sources[key]

	276 record['size'] += size

	277 record['symbol_count'] += 1

	278 return sources

	279

	280

	281 def DumpLargestSources(symbols, outfile, n):

	282 map = MakeSourceMap(symbols)

	283 sources = sorted(map.values(), key=lambda x: -x['size'])

	284 dumped = 0

	285 out = open(outfile, 'w')

	286 try:

	287 out.write('var largestSources = [\n')

	288 for record in sources:

	289 entry = {'size': FormatBytes(record['size']),

	290 'symbol_count': str(record['symbol_count']),

	291 'location': record['path']}

	292 out.write(json.dumps(entry))

	293 out.write(',\n')

	294 dumped += 1

	295 if dumped >= n:

	296 return

	297 finally:

	298 out.write('];\n')

	299 out.flush()

	300 out.close()

	301

	302

	303 def DumpLargestVTables(symbols, outfile, n):

	304 vtables = []

	305 for symbol, type, size, path in symbols:

	306 if 'vtable for ' in symbol:

	307 vtables.append({'symbol': symbol, 'path': path, 'size': size})

	308 vtables = sorted(vtables, key=lambda x: -x['size'])

	309 dumped = 0

	310 out = open(outfile, 'w')

	311 try:

	312 out.write('var largestVTables = [\n')

	313 for record in vtables:

	314 entry = {'size': FormatBytes(record['size']),

	315 'symbol': record['symbol'],

	316 'location': record['path']}

	317 out.write(json.dumps(entry))

	318 out.write(',\n')

	319 dumped += 1

	320 if dumped >= n:

	321 return

	322 finally:

	323 out.write('];\n')

	324 out.flush()

	325 out.close()

	326

	327

	328 def RunParallelAddress2Line(outfile, library, arch, jobs, verbose):

	329 """Run a parallel addr2line processing engine to dump and resolve symbols."""

	330 out_dir = os.getenv('CHROMIUM_OUT_DIR', 'out')

	331 build_type = os.getenv('BUILDTYPE', 'Release')

	332 classpath = os.path.join(out_dir, build_type, 'lib.java',

	333 'binary_size_java.jar')

	334 cmd = ['java',

	335 '-classpath', classpath,

	336 'org.chromium.tools.binary_size.ParallelAddress2Line',

	337 '--disambiguate',

	338 '--outfile', outfile,

	339 '--library', library,

	340 '--threads', jobs]

	341 if verbose is True:

	342 cmd.append('--verbose')

	343 prefix = os.path.join('third_party', 'android_tools', 'ndk', 'toolchains')

	344 if arch == 'android-arm':

	345 prefix = os.path.join(prefix, 'arm-linux-androideabi-4.7', 'prebuilt',

	346 'linux-x86_64', 'bin', 'arm-linux-androideabi-')

	347 cmd.extend(['--nm', prefix + 'nm', '--addr2line', prefix + 'addr2line'])

	348 elif arch == 'android-mips':

	349 prefix = os.path.join(prefix, 'mipsel-linux-android-4.7', 'prebuilt',

	350 'linux-x86_64', 'bin', 'mipsel-linux-android-')

	351 cmd.extend(['--nm', prefix + 'nm', '--addr2line', prefix + 'addr2line'])

	352 elif arch == 'android-x86':

	353 prefix = os.path.join(prefix, 'x86-4.7', 'prebuilt',

	354 'linux-x86_64', 'bin', 'i686-linux-android-')

	355 cmd.extend(['--nm', prefix + 'nm', '--addr2line', prefix + 'addr2line'])

	356 # else, use whatever is in PATH (don't pass --nm or --addr2line)

	357

	358 if verbose:

	359 print cmd

	360

	361 return_code = subprocess.call(cmd)

	362 if return_code:

	363 raise RuntimeError('Failed to run ParallelAddress2Line: returned ' +

	364 str(return_code))

	365

	366

	367 def GetNmSymbols(infile, outfile, library, arch, jobs, verbose):

	368 if infile is None:

	369 if outfile is None:

	370 infile = tempfile.NamedTemporaryFile(delete=False).name

	371 else:

	372 infile = outfile

	373

	374 if verbose:

	375 print 'Running parallel addr2line, dumping symbols to ' + infile;

	376 RunParallelAddress2Line(outfile=infile, library=library, arch=arch,

	377 jobs=jobs, verbose=verbose)

	378 elif verbose:

	379 print 'Using nm input from ' + infile

	380 with file(infile, 'r') as infile:

	381 return list(ParseNm(infile))

	382

	383

	384 def main():

	385 usage="""%prog [options]

	386

	387 Runs a spatial analysis on a given library, looking up the source locations

	388 of its symbols and calculating how much space each directory, source file,

	389 and so on is taking. The result is a report that can be used to pinpoint

	390 sources of large portions of the binary, etceteras.

	391

	392 Under normal circumstances, you only need to pass two arguments, thusly:

	393

	394 %prog --library /path/to/library --destdir /path/to/output

	395

	396 In this mode, the program will dump the symbols from the specified library

	397 and map those symbols back to source locations, producing a web-based

	398 report in the specified output directory.

	399

	400 Other options are available via '--help'.

	401 """

	402 parser = optparse.OptionParser(usage=usage)

	403 parser.add_option('--nm-in', metavar='PATH',

	404 help='if specified, use nm input from <path> instead of '

	405 'generating it. Note that source locations should be '

	406 'present in the file; i.e., no addr2line symbol lookups '

	407 'will be performed when this option is specified. '

	408 'Mutually exclusive with --library.')

	409 parser.add_option('--destdir', metavar='PATH',

	410 help='write output to the specified directory. An HTML '

	411 'report is generated here along with supporting files; '

	412 'any existing report will be overwritten.')

	413 parser.add_option('--library', metavar='PATH',

	414 help='if specified, process symbols in the library at '

	415 'the specified path. Mutually exclusive with --nm-in.')

	416 parser.add_option('--arch',

	417 help='the architecture that the library is targeted to. '

	418 'Determines which nm/addr2line binaries are used. When '

	419 '\'host-native\' is chosen, the program will use whichever '

	420 'nm/addr2line binaries are on the PATH. This is '

	421 'appropriate when you are analyzing a binary by and for '

	422 'your computer. '

	423 'This argument is only valid when using --library. '

	424 'Default is \'host-native\'.',

	425 choices=['host-native', 'android-arm',

	426 'android-mips', 'android-x86'],)

	427 parser.add_option('--jobs',

	428 help='number of jobs to use for the parallel '

	429 'addr2line processing pool; defaults to 1. More '

	430 'jobs greatly improve throughput but eat RAM like '

	431 'popcorn, and take several gigabytes each. Start low '

	432 'and ramp this number up until your machine begins to '

	433 'struggle with RAM. '

	434 'This argument is only valid when using --library.')

	435 parser.add_option('-v', dest='verbose', action='store_true',

	436 help='be verbose, printing lots of status information.')

	437 parser.add_option('--nm-out', metavar='PATH',

	438 help='keep the nm output file, and store it at the '

	439 'specified path. This is useful if you want to see the '

	440 'fully processed nm output after the symbols have been '

	441 'mapped to source locations. By default, a tempfile is '

	442 'used and is deleted when the program terminates.'

	443 'This argument is only valid when using --library.')

	444 opts, args = parser.parse_args()

	445

	446 if ((not opts.library) and (not opts.nm_in)) or (opts.library and opts.nm_in):

	447 parser.error('exactly one of --library or --nm-in is required')

	448 if (opts.nm_in):

	449 if opts.jobs:

	450 print >> sys.stderr, ('WARNING: --jobs has no effect '

	451 'when used with --nm-in')

	452 if opts.arch:

	453 print >> sys.stderr, ('WARNING: --arch has no effect '

	454 'when used with --nm-in')

	455 if not opts.destdir:

	456 parser.error('--destdir is required argument')

	457 if not opts.jobs:

	458 opts.jobs = '1'

	459 if not opts.arch:

	460 opts.arch = 'host-native'

	461

	462 symbols = GetNmSymbols(opts.nm_in, opts.nm_out, opts.library, opts.arch,

	463 opts.jobs, opts.verbose is True)

	464 if not os.path.exists(opts.destdir):

	465 os.makedirs(opts.destdir, 0755)

	466

	467 DumpTreemap(symbols, os.path.join(opts.destdir, 'treemap-dump.js'))

	468 DumpLargestSymbols(symbols,

	469 os.path.join(opts.destdir, 'largest-symbols.js'), 100)

	470 DumpLargestSources(symbols,

	471 os.path.join(opts.destdir, 'largest-sources.js'), 100)

	472 DumpLargestVTables(symbols,

	473 os.path.join(opts.destdir, 'largest-vtables.js'), 100)

	474

	475 # TODO(andrewhayden): Switch to D3 for greater flexibility

	476 treemap_out = os.path.join(opts.destdir, 'webtreemap')

	477 if not os.path.exists(treemap_out):

	478 os.makedirs(treemap_out, 0755)

	479 treemap_src = os.path.join('third_party', 'webtreemap', 'src',

	480 'webtreemap-gh-pages')

	481 shutil.copy(os.path.join(treemap_src, 'COPYING'), treemap_out)

	482 shutil.copy(os.path.join(treemap_src, 'webtreemap.js'), treemap_out)

	483 shutil.copy(os.path.join(treemap_src, 'webtreemap.css'), treemap_out)

	484 shutil.copy(os.path.join('tools', 'binary_size', 'template', 'index.html'),

	485 opts.destdir)

	486 if opts.verbose:

	487 print 'Report saved to ' + opts.destdir + '/index.html'

	488

	489

	490 if __name__ == '__main__':

	491 sys.exit(main())

OLD	NEW

« tools/binary_size/README.txt ('K') | « tools/binary_size/java/src/org/chromium/tools/binary_size/Record.java ('k') | tools/binary_size/template/.gitignore » ('j') | no next file with comments »