Index: bloat/bloat.py |
diff --git a/bloat/bloat.py b/bloat/bloat.py |
new file mode 100755 |
index 0000000000000000000000000000000000000000..6abf55c6379ca6a4163f22f51fee42cae1a515bd |
--- /dev/null |
+++ b/bloat/bloat.py |
@@ -0,0 +1,433 @@ |
+#!/usr/bin/python |
+# |
+# Copyright 2013 Google Inc. All Rights Reserved. |
+# |
+# Licensed under the Apache License, Version 2.0 (the "License"); |
+# you may not use this file except in compliance with the License. |
+# You may obtain a copy of the License at |
+# |
+# http://www.apache.org/licenses/LICENSE-2.0 |
+# |
+# Unless required by applicable law or agreed to in writing, software |
+# distributed under the License is distributed on an "AS IS" BASIS, |
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
+# See the License for the specific language governing permissions and |
+# limitations under the License. |
+ |
+import fileinput |
+import operator |
+import optparse |
+import os |
+import pprint |
+import re |
+import subprocess |
+import sys |
+import json |
+ |
+def format_bytes(bytes): |
+ """Pretty-print a number of bytes.""" |
+ if bytes > 1e6: |
+ bytes = bytes / 1.0e6 |
+ return '%.1fm' % bytes |
+ if bytes > 1e3: |
+ bytes = bytes / 1.0e3 |
+ return '%.1fk' % bytes |
+ return str(bytes) |
+ |
+ |
+def symbol_type_to_human(type): |
+ """Convert a symbol type as printed by nm into a human-readable name.""" |
+ return { |
+ 'b': 'bss', |
+ 'd': 'data', |
+ 'r': 'read-only data', |
+ 't': 'code', |
+ 'u': 'weak symbol', # Unique global. |
+ 'w': 'weak symbol', |
+ 'v': 'weak symbol' |
+ }[type] |
+ |
+ |
+def parse_nm(input): |
+ """Parse nm output. |
+ |
+ Argument: an iterable over lines of nm output. |
+ |
+ Yields: (symbol name, symbol type, symbol size, source file path). |
+ Path may be None if nm couldn't figure out the source file. |
+ """ |
+ |
+ # Match lines with size + symbol + optional filename. |
+ sym_re = re.compile(r'^[0-9a-f]+ ([0-9a-f]+) (.) ([^\t]+)(?:\t(.*):\d+)?$') |
+ |
+ # Match lines with addr but no size. |
+ addr_re = re.compile(r'^[0-9a-f]+ (.) ([^\t]+)(?:\t.*)?$') |
+ # Match lines that don't have an address at all -- typically external symbols. |
+ noaddr_re = re.compile(r'^ + (.) (.*)$') |
+ |
+ for line in input: |
+ line = line.rstrip() |
+ match = sym_re.match(line) |
+ if match: |
+ size, type, sym = match.groups()[0:3] |
+ size = int(size, 16) |
+ type = type.lower() |
+ if type in ['u', 'v']: |
+ type = 'w' # just call them all weak |
+ if type == 'b': |
+ continue # skip all BSS for now |
+ path = match.group(4) |
+ yield sym, type, size, path |
+ continue |
+ match = addr_re.match(line) |
+ if match: |
+ type, sym = match.groups()[0:2] |
+ # No size == we don't care. |
+ continue |
+ match = noaddr_re.match(line) |
+ if match: |
+ type, sym = match.groups() |
+ if type in ('U', 'w'): |
+ # external or weak symbol |
+ continue |
+ |
+ print >>sys.stderr, 'unparsed:', repr(line) |
+ |
+def demangle(ident, cppfilt): |
+ if cppfilt and ident.startswith('_Z'): |
+ # Demangle names when possible. Mangled names all start with _Z. |
+ ident = subprocess.check_output([cppfilt, ident]).strip() |
+ return ident |
+ |
+ |
+class Suffix: |
+ def __init__(self, suffix, replacement): |
+ self.pattern = '^(.*)' + suffix + '(.*)$' |
+ self.re = re.compile(self.pattern) |
+ self.replacement = replacement |
+ |
+class SuffixCleanup: |
+ """Pre-compile suffix regular expressions.""" |
+ def __init__(self): |
+ self.suffixes = [ |
+ Suffix('\.part\.([0-9]+)', 'part'), |
+ Suffix('\.constprop\.([0-9]+)', 'constprop'), |
+ Suffix('\.isra\.([0-9]+)', 'isra'), |
+ ] |
+ def cleanup(self, ident, cppfilt): |
+ """Cleanup identifiers that have suffixes preventing demangling, |
+ and demangle if possible.""" |
+ to_append = [] |
+ for s in self.suffixes: |
+ found = s.re.match(ident) |
+ if not found: |
+ continue |
+ to_append += [' [' + s.replacement + '.' + found.group(2) + ']'] |
+ ident = found.group(1) + found.group(3) |
+ if len(to_append) > 0: |
+ # Only try to demangle if there were suffixes. |
+ ident = demangle(ident, cppfilt) |
+ for s in to_append: |
+ ident += s |
+ return ident |
+ |
+suffix_cleanup = SuffixCleanup() |
+ |
+def parse_cpp_name(name, cppfilt): |
+ name = suffix_cleanup.cleanup(name, cppfilt) |
+ |
+ # Turn prefixes into suffixes so namespacing works. |
+ prefixes = [ |
+ ['bool ', ''], |
+ ['construction vtable for ', ' [construction vtable]'], |
+ ['global constructors keyed to ', ' [global constructors]'], |
+ ['guard variable for ', ' [guard variable]'], |
+ ['int ', ''], |
+ ['non-virtual thunk to ', ' [non-virtual thunk]'], |
+ ['typeinfo for ', ' [typeinfo]'], |
+ ['typeinfo name for ', ' [typeinfo name]'], |
+ ['virtual thunk to ', ' [virtual thunk]'], |
+ ['void ', ''], |
+ ['vtable for ', ' [vtable]'], |
+ ['VTT for ', ' [VTT]'], |
+ ] |
+ for prefix, replacement in prefixes: |
+ if name.startswith(prefix): |
+ name = name[len(prefix):] + replacement |
+ # Simplify parenthesis parsing. |
+ replacements = [ |
+ ['(anonymous namespace)', '[anonymous namespace]'], |
+ ] |
+ for value, replacement in replacements: |
+ name = name.replace(value, replacement) |
+ |
+ def parse_one(val): |
+ """Returns (leftmost-part, remaining).""" |
+ if (val.startswith('operator') and |
+ not (val[8].isalnum() or val[8] == '_')): |
+ # Operator overload function, terminate. |
+ return (val, '') |
+ co = val.find('::') |
+ lt = val.find('<') |
+ pa = val.find('(') |
+ co = len(val) if co == -1 else co |
+ lt = len(val) if lt == -1 else lt |
+ pa = len(val) if pa == -1 else pa |
+ if co < lt and co < pa: |
+ # Namespace or type name. |
+ return (val[:co], val[co+2:]) |
+ if lt < pa: |
+ # Template. Make sure we capture nested templates too. |
+ open_tmpl = 1 |
+ gt = lt |
+ while val[gt] != '>' or open_tmpl != 0: |
+ gt = gt + 1 |
+ if val[gt] == '<': |
+ open_tmpl = open_tmpl + 1 |
+ if val[gt] == '>': |
+ open_tmpl = open_tmpl - 1 |
+ ret = val[gt+1:] |
+ if ret.startswith('::'): |
+ ret = ret[2:] |
+ if ret.startswith('('): |
+ # Template function, terminate. |
+ return (val, '') |
+ return (val[:gt+1], ret) |
+ # Terminate with any function name, identifier, or unmangled name. |
+ return (val, '') |
+ |
+ parts = [] |
+ while len(name) > 0: |
+ (part, name) = parse_one(name) |
+ assert len(part) > 0 |
+ parts.append(part) |
+ return parts |
+ |
+ |
+def treeify_syms(symbols, strip_prefix=None, cppfilt=None): |
+ dirs = {} |
+ for sym, type, size, path in symbols: |
+ if path: |
+ path = os.path.normpath(path) |
+ if strip_prefix and path.startswith(strip_prefix): |
+ path = path[len(strip_prefix):] |
+ elif path.startswith('/'): |
+ path = path[1:] |
+ path = ['[path]'] + path.split('/') |
+ |
+ parts = parse_cpp_name(sym, cppfilt) |
+ if len(parts) == 1: |
+ if path: |
+ # No namespaces, group with path. |
+ parts = path + parts |
+ else: |
+ new_prefix = ['[ungrouped]'] |
+ regroups = [ |
+ ['.L.str', '[str]'], |
+ ['.L__PRETTY_FUNCTION__.', '[__PRETTY_FUNCTION__]'], |
+ ['.L__func__.', '[__func__]'], |
+ ['.Lswitch.table', '[switch table]'], |
+ ] |
+ for prefix, group in regroups: |
+ if parts[0].startswith(prefix): |
+ parts[0] = parts[0][len(prefix):] |
+ parts[0] = demangle(parts[0], cppfilt) |
+ new_prefix += [group] |
+ break |
+ parts = new_prefix + parts |
+ |
+ key = parts.pop() |
+ tree = dirs |
+ try: |
+ depth = 0 |
+ for part in parts: |
+ depth = depth + 1 |
+ assert part != '', path |
+ if part not in tree: |
+ tree[part] = {'$bloat_symbols':{}} |
+ if type not in tree[part]['$bloat_symbols']: |
+ tree[part]['$bloat_symbols'][type] = 0 |
+ tree[part]['$bloat_symbols'][type] += 1 |
+ tree = tree[part] |
+ old_size, old_symbols = tree.get(key, (0, {})) |
+ if type not in old_symbols: |
+ old_symbols[type] = 0 |
+ old_symbols[type] += 1 |
+ tree[key] = (old_size + size, old_symbols) |
+ except: |
+ print >>sys.stderr, 'sym `%s`\tparts `%s`\tkey `%s`' % (sym, parts, key) |
+ raise |
+ return dirs |
+ |
+ |
+def jsonify_tree(tree, name): |
+ children = [] |
+ total = 0 |
+ files = 0 |
+ |
+ for key, val in tree.iteritems(): |
+ if key == '$bloat_symbols': |
+ continue |
+ if isinstance(val, dict): |
+ subtree = jsonify_tree(val, key) |
+ total += subtree['data']['$area'] |
+ children.append(subtree) |
+ else: |
+ (size, symbols) = val |
+ total += size |
+ assert len(symbols) == 1, symbols.values()[0] == 1 |
+ symbol = symbol_type_to_human(symbols.keys()[0]) |
+ children.append({ |
+ 'name': key + ' ' + format_bytes(size), |
+ 'data': { |
+ '$area': size, |
+ '$symbol': symbol, |
+ } |
+ }) |
+ |
+ children.sort(key=lambda child: -child['data']['$area']) |
+ dominant_symbol = '' |
+ if '$bloat_symbols' in tree: |
+ dominant_symbol = symbol_type_to_human( |
+ max(tree['$bloat_symbols'].iteritems(), |
+ key=operator.itemgetter(1))[0]) |
+ return { |
+ 'name': name + ' ' + format_bytes(total), |
+ 'data': { |
+ '$area': total, |
+ '$dominant_symbol': dominant_symbol, |
+ }, |
+ 'children': children, |
+ } |
+ |
+ |
+def dump_nm(nmfile, strip_prefix, cppfilt): |
+ dirs = treeify_syms(parse_nm(nmfile), strip_prefix, cppfilt) |
+ print ('var kTree = ' + |
+ json.dumps(jsonify_tree(dirs, '[everything]'), indent=2)) |
+ |
+ |
+def parse_objdump(input): |
+ """Parse objdump -h output.""" |
+ sec_re = re.compile('^\d+ (\S+) +([0-9a-z]+)') |
+ sections = [] |
+ debug_sections = [] |
+ |
+ for line in input: |
+ line = line.strip() |
+ match = sec_re.match(line) |
+ if match: |
+ name, size = match.groups() |
+ if name.startswith('.'): |
+ name = name[1:] |
+ if name.startswith('debug_'): |
+ name = name[len('debug_'):] |
+ debug_sections.append((name, int(size, 16))) |
+ else: |
+ sections.append((name, int(size, 16))) |
+ continue |
+ return sections, debug_sections |
+ |
+ |
+def jsonify_sections(name, sections): |
+ children = [] |
+ total = 0 |
+ for section, size in sections: |
+ children.append({ |
+ 'name': section + ' ' + format_bytes(size), |
+ 'data': { '$area': size } |
+ }) |
+ total += size |
+ |
+ children.sort(key=lambda child: -child['data']['$area']) |
+ |
+ return { |
+ 'name': name + ' ' + format_bytes(total), |
+ 'data': { '$area': total }, |
+ 'children': children |
+ } |
+ |
+ |
+def dump_sections(objdump): |
+ sections, debug_sections = parse_objdump(objdump) |
+ sections = jsonify_sections('sections', sections) |
+ debug_sections = jsonify_sections('debug', debug_sections) |
+ size = sections['data']['$area'] + debug_sections['data']['$area'] |
+ print 'var kTree = ' + json.dumps({ |
+ 'name': 'top ' + format_bytes(size), |
+ 'data': { '$area': size }, |
+ 'children': [ debug_sections, sections ]}) |
+ |
+ |
+usage="""%prog [options] MODE |
+ |
+Modes are: |
+ syms: output symbols json suitable for a treemap |
+ dump: print symbols sorted by size (pipe to head for best output) |
+ sections: output binary sections json suitable for a treemap |
+ |
+nm output passed to --nm-output should from running a command |
+like the following (note, can take a long time -- 30 minutes): |
+ nm -C -S -l /path/to/binary > nm.out |
+ |
+objdump output passed to --objdump-output should be from a command |
+like: |
+ objdump -h /path/to/binary > objdump.out""" |
+parser = optparse.OptionParser(usage=usage) |
+parser.add_option('--nm-output', action='store', dest='nmpath', |
+ metavar='PATH', default='nm.out', |
+ help='path to nm output [default=nm.out]') |
+parser.add_option('--objdump-output', action='store', dest='objdumppath', |
+ metavar='PATH', default='objdump.out', |
+ help='path to objdump output [default=objdump.out]') |
+parser.add_option('--strip-prefix', metavar='PATH', action='store', |
+ help='strip PATH prefix from paths; e.g. /path/to/src/root') |
+parser.add_option('--filter', action='store', |
+ help='include only symbols/files matching FILTER') |
+parser.add_option('--c++filt', action='store', metavar='PATH', dest='cppfilt', |
+ default='c++filt', help="Path to c++filt, used to demangle " |
+ "symbols that weren't handled by nm. Set to an invalid path " |
+ "to disable.") |
+opts, args = parser.parse_args() |
+ |
+if len(args) != 1: |
+ parser.print_usage() |
+ sys.exit(1) |
+ |
+mode = args[0] |
+if mode == 'syms': |
+ nmfile = open(opts.nmpath, 'r') |
+ try: |
+ res = subprocess.check_output([opts.cppfilt, 'main']) |
+ if res.strip() != 'main': |
+ print >>sys.stderr, ("%s failed demangling, " |
+ "output won't be demangled." % opt.cppfilt) |
+ opts.cppfilt = None |
+ except: |
+ print >>sys.stderr, ("Could not find c++filt at %s, " |
+ "output won't be demangled." % opt.cppfilt) |
+ opts.cppfilt = None |
+ dump_nm(nmfile, strip_prefix=opts.strip_prefix, cppfilt=opts.cppfilt) |
+elif mode == 'sections': |
+ objdumpfile = open(opts.objdumppath, 'r') |
+ dump_sections(objdumpfile) |
+elif mode == 'dump': |
+ nmfile = open(opts.nmpath, 'r') |
+ syms = list(parse_nm(nmfile)) |
+ # a list of (sym, type, size, path); sort by size. |
+ syms.sort(key=lambda x: -x[2]) |
+ total = 0 |
+ for sym, type, size, path in syms: |
+ if type in ('b', 'w'): |
+ continue # skip bss and weak symbols |
+ if path is None: |
+ path = '' |
+ if opts.filter and not (opts.filter in sym or opts.filter in path): |
+ continue |
+ print '%6s %s (%s) %s' % (format_bytes(size), sym, |
+ symbol_type_to_human(type), path) |
+ total += size |
+ print '%6s %s' % (format_bytes(total), 'total'), |
+else: |
+ print 'unknown mode' |
+ parser.print_usage() |