| Index: bloat/bloat.py
|
| diff --git a/bloat/bloat.py b/bloat/bloat.py
|
| new file mode 100755
|
| index 0000000000000000000000000000000000000000..6abf55c6379ca6a4163f22f51fee42cae1a515bd
|
| --- /dev/null
|
| +++ b/bloat/bloat.py
|
| @@ -0,0 +1,433 @@
|
| +#!/usr/bin/python
|
| +#
|
| +# Copyright 2013 Google Inc. All Rights Reserved.
|
| +#
|
| +# Licensed under the Apache License, Version 2.0 (the "License");
|
| +# you may not use this file except in compliance with the License.
|
| +# You may obtain a copy of the License at
|
| +#
|
| +# http://www.apache.org/licenses/LICENSE-2.0
|
| +#
|
| +# Unless required by applicable law or agreed to in writing, software
|
| +# distributed under the License is distributed on an "AS IS" BASIS,
|
| +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| +# See the License for the specific language governing permissions and
|
| +# limitations under the License.
|
| +
|
| +import fileinput
|
| +import operator
|
| +import optparse
|
| +import os
|
| +import pprint
|
| +import re
|
| +import subprocess
|
| +import sys
|
| +import json
|
| +
|
| +def format_bytes(bytes):
|
| + """Pretty-print a number of bytes."""
|
| + if bytes > 1e6:
|
| + bytes = bytes / 1.0e6
|
| + return '%.1fm' % bytes
|
| + if bytes > 1e3:
|
| + bytes = bytes / 1.0e3
|
| + return '%.1fk' % bytes
|
| + return str(bytes)
|
| +
|
| +
|
| +def symbol_type_to_human(type):
|
| + """Convert a symbol type as printed by nm into a human-readable name."""
|
| + return {
|
| + 'b': 'bss',
|
| + 'd': 'data',
|
| + 'r': 'read-only data',
|
| + 't': 'code',
|
| + 'u': 'weak symbol', # Unique global.
|
| + 'w': 'weak symbol',
|
| + 'v': 'weak symbol'
|
| + }[type]
|
| +
|
| +
|
| +def parse_nm(input):
|
| + """Parse nm output.
|
| +
|
| + Argument: an iterable over lines of nm output.
|
| +
|
| + Yields: (symbol name, symbol type, symbol size, source file path).
|
| + Path may be None if nm couldn't figure out the source file.
|
| + """
|
| +
|
| + # Match lines with size + symbol + optional filename.
|
| + sym_re = re.compile(r'^[0-9a-f]+ ([0-9a-f]+) (.) ([^\t]+)(?:\t(.*):\d+)?$')
|
| +
|
| + # Match lines with addr but no size.
|
| + addr_re = re.compile(r'^[0-9a-f]+ (.) ([^\t]+)(?:\t.*)?$')
|
| + # Match lines that don't have an address at all -- typically external symbols.
|
| + noaddr_re = re.compile(r'^ + (.) (.*)$')
|
| +
|
| + for line in input:
|
| + line = line.rstrip()
|
| + match = sym_re.match(line)
|
| + if match:
|
| + size, type, sym = match.groups()[0:3]
|
| + size = int(size, 16)
|
| + type = type.lower()
|
| + if type in ['u', 'v']:
|
| + type = 'w' # just call them all weak
|
| + if type == 'b':
|
| + continue # skip all BSS for now
|
| + path = match.group(4)
|
| + yield sym, type, size, path
|
| + continue
|
| + match = addr_re.match(line)
|
| + if match:
|
| + type, sym = match.groups()[0:2]
|
| + # No size == we don't care.
|
| + continue
|
| + match = noaddr_re.match(line)
|
| + if match:
|
| + type, sym = match.groups()
|
| + if type in ('U', 'w'):
|
| + # external or weak symbol
|
| + continue
|
| +
|
| + print >>sys.stderr, 'unparsed:', repr(line)
|
| +
|
| +def demangle(ident, cppfilt):
|
| + if cppfilt and ident.startswith('_Z'):
|
| + # Demangle names when possible. Mangled names all start with _Z.
|
| + ident = subprocess.check_output([cppfilt, ident]).strip()
|
| + return ident
|
| +
|
| +
|
| +class Suffix:
|
| + def __init__(self, suffix, replacement):
|
| + self.pattern = '^(.*)' + suffix + '(.*)$'
|
| + self.re = re.compile(self.pattern)
|
| + self.replacement = replacement
|
| +
|
| +class SuffixCleanup:
|
| + """Pre-compile suffix regular expressions."""
|
| + def __init__(self):
|
| + self.suffixes = [
|
| + Suffix('\.part\.([0-9]+)', 'part'),
|
| + Suffix('\.constprop\.([0-9]+)', 'constprop'),
|
| + Suffix('\.isra\.([0-9]+)', 'isra'),
|
| + ]
|
| + def cleanup(self, ident, cppfilt):
|
| + """Cleanup identifiers that have suffixes preventing demangling,
|
| + and demangle if possible."""
|
| + to_append = []
|
| + for s in self.suffixes:
|
| + found = s.re.match(ident)
|
| + if not found:
|
| + continue
|
| + to_append += [' [' + s.replacement + '.' + found.group(2) + ']']
|
| + ident = found.group(1) + found.group(3)
|
| + if len(to_append) > 0:
|
| + # Only try to demangle if there were suffixes.
|
| + ident = demangle(ident, cppfilt)
|
| + for s in to_append:
|
| + ident += s
|
| + return ident
|
| +
|
| +suffix_cleanup = SuffixCleanup()
|
| +
|
| +def parse_cpp_name(name, cppfilt):
|
| + name = suffix_cleanup.cleanup(name, cppfilt)
|
| +
|
| + # Turn prefixes into suffixes so namespacing works.
|
| + prefixes = [
|
| + ['bool ', ''],
|
| + ['construction vtable for ', ' [construction vtable]'],
|
| + ['global constructors keyed to ', ' [global constructors]'],
|
| + ['guard variable for ', ' [guard variable]'],
|
| + ['int ', ''],
|
| + ['non-virtual thunk to ', ' [non-virtual thunk]'],
|
| + ['typeinfo for ', ' [typeinfo]'],
|
| + ['typeinfo name for ', ' [typeinfo name]'],
|
| + ['virtual thunk to ', ' [virtual thunk]'],
|
| + ['void ', ''],
|
| + ['vtable for ', ' [vtable]'],
|
| + ['VTT for ', ' [VTT]'],
|
| + ]
|
| + for prefix, replacement in prefixes:
|
| + if name.startswith(prefix):
|
| + name = name[len(prefix):] + replacement
|
| + # Simplify parenthesis parsing.
|
| + replacements = [
|
| + ['(anonymous namespace)', '[anonymous namespace]'],
|
| + ]
|
| + for value, replacement in replacements:
|
| + name = name.replace(value, replacement)
|
| +
|
| + def parse_one(val):
|
| + """Returns (leftmost-part, remaining)."""
|
| + if (val.startswith('operator') and
|
| + not (val[8].isalnum() or val[8] == '_')):
|
| + # Operator overload function, terminate.
|
| + return (val, '')
|
| + co = val.find('::')
|
| + lt = val.find('<')
|
| + pa = val.find('(')
|
| + co = len(val) if co == -1 else co
|
| + lt = len(val) if lt == -1 else lt
|
| + pa = len(val) if pa == -1 else pa
|
| + if co < lt and co < pa:
|
| + # Namespace or type name.
|
| + return (val[:co], val[co+2:])
|
| + if lt < pa:
|
| + # Template. Make sure we capture nested templates too.
|
| + open_tmpl = 1
|
| + gt = lt
|
| + while val[gt] != '>' or open_tmpl != 0:
|
| + gt = gt + 1
|
| + if val[gt] == '<':
|
| + open_tmpl = open_tmpl + 1
|
| + if val[gt] == '>':
|
| + open_tmpl = open_tmpl - 1
|
| + ret = val[gt+1:]
|
| + if ret.startswith('::'):
|
| + ret = ret[2:]
|
| + if ret.startswith('('):
|
| + # Template function, terminate.
|
| + return (val, '')
|
| + return (val[:gt+1], ret)
|
| + # Terminate with any function name, identifier, or unmangled name.
|
| + return (val, '')
|
| +
|
| + parts = []
|
| + while len(name) > 0:
|
| + (part, name) = parse_one(name)
|
| + assert len(part) > 0
|
| + parts.append(part)
|
| + return parts
|
| +
|
| +
|
| +def treeify_syms(symbols, strip_prefix=None, cppfilt=None):
|
| + dirs = {}
|
| + for sym, type, size, path in symbols:
|
| + if path:
|
| + path = os.path.normpath(path)
|
| + if strip_prefix and path.startswith(strip_prefix):
|
| + path = path[len(strip_prefix):]
|
| + elif path.startswith('/'):
|
| + path = path[1:]
|
| + path = ['[path]'] + path.split('/')
|
| +
|
| + parts = parse_cpp_name(sym, cppfilt)
|
| + if len(parts) == 1:
|
| + if path:
|
| + # No namespaces, group with path.
|
| + parts = path + parts
|
| + else:
|
| + new_prefix = ['[ungrouped]']
|
| + regroups = [
|
| + ['.L.str', '[str]'],
|
| + ['.L__PRETTY_FUNCTION__.', '[__PRETTY_FUNCTION__]'],
|
| + ['.L__func__.', '[__func__]'],
|
| + ['.Lswitch.table', '[switch table]'],
|
| + ]
|
| + for prefix, group in regroups:
|
| + if parts[0].startswith(prefix):
|
| + parts[0] = parts[0][len(prefix):]
|
| + parts[0] = demangle(parts[0], cppfilt)
|
| + new_prefix += [group]
|
| + break
|
| + parts = new_prefix + parts
|
| +
|
| + key = parts.pop()
|
| + tree = dirs
|
| + try:
|
| + depth = 0
|
| + for part in parts:
|
| + depth = depth + 1
|
| + assert part != '', path
|
| + if part not in tree:
|
| + tree[part] = {'$bloat_symbols':{}}
|
| + if type not in tree[part]['$bloat_symbols']:
|
| + tree[part]['$bloat_symbols'][type] = 0
|
| + tree[part]['$bloat_symbols'][type] += 1
|
| + tree = tree[part]
|
| + old_size, old_symbols = tree.get(key, (0, {}))
|
| + if type not in old_symbols:
|
| + old_symbols[type] = 0
|
| + old_symbols[type] += 1
|
| + tree[key] = (old_size + size, old_symbols)
|
| + except:
|
| + print >>sys.stderr, 'sym `%s`\tparts `%s`\tkey `%s`' % (sym, parts, key)
|
| + raise
|
| + return dirs
|
| +
|
| +
|
| +def jsonify_tree(tree, name):
|
| + children = []
|
| + total = 0
|
| + files = 0
|
| +
|
| + for key, val in tree.iteritems():
|
| + if key == '$bloat_symbols':
|
| + continue
|
| + if isinstance(val, dict):
|
| + subtree = jsonify_tree(val, key)
|
| + total += subtree['data']['$area']
|
| + children.append(subtree)
|
| + else:
|
| + (size, symbols) = val
|
| + total += size
|
| + assert len(symbols) == 1, symbols.values()[0] == 1
|
| + symbol = symbol_type_to_human(symbols.keys()[0])
|
| + children.append({
|
| + 'name': key + ' ' + format_bytes(size),
|
| + 'data': {
|
| + '$area': size,
|
| + '$symbol': symbol,
|
| + }
|
| + })
|
| +
|
| + children.sort(key=lambda child: -child['data']['$area'])
|
| + dominant_symbol = ''
|
| + if '$bloat_symbols' in tree:
|
| + dominant_symbol = symbol_type_to_human(
|
| + max(tree['$bloat_symbols'].iteritems(),
|
| + key=operator.itemgetter(1))[0])
|
| + return {
|
| + 'name': name + ' ' + format_bytes(total),
|
| + 'data': {
|
| + '$area': total,
|
| + '$dominant_symbol': dominant_symbol,
|
| + },
|
| + 'children': children,
|
| + }
|
| +
|
| +
|
| +def dump_nm(nmfile, strip_prefix, cppfilt):
|
| + dirs = treeify_syms(parse_nm(nmfile), strip_prefix, cppfilt)
|
| + print ('var kTree = ' +
|
| + json.dumps(jsonify_tree(dirs, '[everything]'), indent=2))
|
| +
|
| +
|
| +def parse_objdump(input):
|
| + """Parse objdump -h output."""
|
| + sec_re = re.compile('^\d+ (\S+) +([0-9a-z]+)')
|
| + sections = []
|
| + debug_sections = []
|
| +
|
| + for line in input:
|
| + line = line.strip()
|
| + match = sec_re.match(line)
|
| + if match:
|
| + name, size = match.groups()
|
| + if name.startswith('.'):
|
| + name = name[1:]
|
| + if name.startswith('debug_'):
|
| + name = name[len('debug_'):]
|
| + debug_sections.append((name, int(size, 16)))
|
| + else:
|
| + sections.append((name, int(size, 16)))
|
| + continue
|
| + return sections, debug_sections
|
| +
|
| +
|
| +def jsonify_sections(name, sections):
|
| + children = []
|
| + total = 0
|
| + for section, size in sections:
|
| + children.append({
|
| + 'name': section + ' ' + format_bytes(size),
|
| + 'data': { '$area': size }
|
| + })
|
| + total += size
|
| +
|
| + children.sort(key=lambda child: -child['data']['$area'])
|
| +
|
| + return {
|
| + 'name': name + ' ' + format_bytes(total),
|
| + 'data': { '$area': total },
|
| + 'children': children
|
| + }
|
| +
|
| +
|
| +def dump_sections(objdump):
|
| + sections, debug_sections = parse_objdump(objdump)
|
| + sections = jsonify_sections('sections', sections)
|
| + debug_sections = jsonify_sections('debug', debug_sections)
|
| + size = sections['data']['$area'] + debug_sections['data']['$area']
|
| + print 'var kTree = ' + json.dumps({
|
| + 'name': 'top ' + format_bytes(size),
|
| + 'data': { '$area': size },
|
| + 'children': [ debug_sections, sections ]})
|
| +
|
| +
|
| +usage="""%prog [options] MODE
|
| +
|
| +Modes are:
|
| + syms: output symbols json suitable for a treemap
|
| + dump: print symbols sorted by size (pipe to head for best output)
|
| + sections: output binary sections json suitable for a treemap
|
| +
|
| +nm output passed to --nm-output should from running a command
|
| +like the following (note, can take a long time -- 30 minutes):
|
| + nm -C -S -l /path/to/binary > nm.out
|
| +
|
| +objdump output passed to --objdump-output should be from a command
|
| +like:
|
| + objdump -h /path/to/binary > objdump.out"""
|
| +parser = optparse.OptionParser(usage=usage)
|
| +parser.add_option('--nm-output', action='store', dest='nmpath',
|
| + metavar='PATH', default='nm.out',
|
| + help='path to nm output [default=nm.out]')
|
| +parser.add_option('--objdump-output', action='store', dest='objdumppath',
|
| + metavar='PATH', default='objdump.out',
|
| + help='path to objdump output [default=objdump.out]')
|
| +parser.add_option('--strip-prefix', metavar='PATH', action='store',
|
| + help='strip PATH prefix from paths; e.g. /path/to/src/root')
|
| +parser.add_option('--filter', action='store',
|
| + help='include only symbols/files matching FILTER')
|
| +parser.add_option('--c++filt', action='store', metavar='PATH', dest='cppfilt',
|
| + default='c++filt', help="Path to c++filt, used to demangle "
|
| + "symbols that weren't handled by nm. Set to an invalid path "
|
| + "to disable.")
|
| +opts, args = parser.parse_args()
|
| +
|
| +if len(args) != 1:
|
| + parser.print_usage()
|
| + sys.exit(1)
|
| +
|
| +mode = args[0]
|
| +if mode == 'syms':
|
| + nmfile = open(opts.nmpath, 'r')
|
| + try:
|
| + res = subprocess.check_output([opts.cppfilt, 'main'])
|
| + if res.strip() != 'main':
|
| + print >>sys.stderr, ("%s failed demangling, "
|
| + "output won't be demangled." % opt.cppfilt)
|
| + opts.cppfilt = None
|
| + except:
|
| + print >>sys.stderr, ("Could not find c++filt at %s, "
|
| + "output won't be demangled." % opt.cppfilt)
|
| + opts.cppfilt = None
|
| + dump_nm(nmfile, strip_prefix=opts.strip_prefix, cppfilt=opts.cppfilt)
|
| +elif mode == 'sections':
|
| + objdumpfile = open(opts.objdumppath, 'r')
|
| + dump_sections(objdumpfile)
|
| +elif mode == 'dump':
|
| + nmfile = open(opts.nmpath, 'r')
|
| + syms = list(parse_nm(nmfile))
|
| + # a list of (sym, type, size, path); sort by size.
|
| + syms.sort(key=lambda x: -x[2])
|
| + total = 0
|
| + for sym, type, size, path in syms:
|
| + if type in ('b', 'w'):
|
| + continue # skip bss and weak symbols
|
| + if path is None:
|
| + path = ''
|
| + if opts.filter and not (opts.filter in sym or opts.filter in path):
|
| + continue
|
| + print '%6s %s (%s) %s' % (format_bytes(size), sym,
|
| + symbol_type_to_human(type), path)
|
| + total += size
|
| + print '%6s %s' % (format_bytes(total), 'total'),
|
| +else:
|
| + print 'unknown mode'
|
| + parser.print_usage()
|
|
|