Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1592)

Unified Diff: bloat/bloat.py

Issue 917203002: Subzero: Generate a web page showing llvm2ice size breakdown. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Dump the json file into the build directory Created 5 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « bloat/README.chromium ('k') | bloat/llvm2ice.bloat.html » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: bloat/bloat.py
diff --git a/bloat/bloat.py b/bloat/bloat.py
new file mode 100755
index 0000000000000000000000000000000000000000..6abf55c6379ca6a4163f22f51fee42cae1a515bd
--- /dev/null
+++ b/bloat/bloat.py
@@ -0,0 +1,433 @@
+#!/usr/bin/python
+#
+# Copyright 2013 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import fileinput
+import operator
+import optparse
+import os
+import pprint
+import re
+import subprocess
+import sys
+import json
+
+def format_bytes(bytes):
+ """Pretty-print a number of bytes."""
+ if bytes > 1e6:
+ bytes = bytes / 1.0e6
+ return '%.1fm' % bytes
+ if bytes > 1e3:
+ bytes = bytes / 1.0e3
+ return '%.1fk' % bytes
+ return str(bytes)
+
+
+def symbol_type_to_human(type):
+ """Convert a symbol type as printed by nm into a human-readable name."""
+ return {
+ 'b': 'bss',
+ 'd': 'data',
+ 'r': 'read-only data',
+ 't': 'code',
+ 'u': 'weak symbol', # Unique global.
+ 'w': 'weak symbol',
+ 'v': 'weak symbol'
+ }[type]
+
+
+def parse_nm(input):
+ """Parse nm output.
+
+ Argument: an iterable over lines of nm output.
+
+ Yields: (symbol name, symbol type, symbol size, source file path).
+ Path may be None if nm couldn't figure out the source file.
+ """
+
+ # Match lines with size + symbol + optional filename.
+ sym_re = re.compile(r'^[0-9a-f]+ ([0-9a-f]+) (.) ([^\t]+)(?:\t(.*):\d+)?$')
+
+ # Match lines with addr but no size.
+ addr_re = re.compile(r'^[0-9a-f]+ (.) ([^\t]+)(?:\t.*)?$')
+ # Match lines that don't have an address at all -- typically external symbols.
+ noaddr_re = re.compile(r'^ + (.) (.*)$')
+
+ for line in input:
+ line = line.rstrip()
+ match = sym_re.match(line)
+ if match:
+ size, type, sym = match.groups()[0:3]
+ size = int(size, 16)
+ type = type.lower()
+ if type in ['u', 'v']:
+ type = 'w' # just call them all weak
+ if type == 'b':
+ continue # skip all BSS for now
+ path = match.group(4)
+ yield sym, type, size, path
+ continue
+ match = addr_re.match(line)
+ if match:
+ type, sym = match.groups()[0:2]
+ # No size == we don't care.
+ continue
+ match = noaddr_re.match(line)
+ if match:
+ type, sym = match.groups()
+ if type in ('U', 'w'):
+ # external or weak symbol
+ continue
+
+ print >>sys.stderr, 'unparsed:', repr(line)
+
+def demangle(ident, cppfilt):
+ if cppfilt and ident.startswith('_Z'):
+ # Demangle names when possible. Mangled names all start with _Z.
+ ident = subprocess.check_output([cppfilt, ident]).strip()
+ return ident
+
+
+class Suffix:
+ def __init__(self, suffix, replacement):
+ self.pattern = '^(.*)' + suffix + '(.*)$'
+ self.re = re.compile(self.pattern)
+ self.replacement = replacement
+
+class SuffixCleanup:
+ """Pre-compile suffix regular expressions."""
+ def __init__(self):
+ self.suffixes = [
+ Suffix('\.part\.([0-9]+)', 'part'),
+ Suffix('\.constprop\.([0-9]+)', 'constprop'),
+ Suffix('\.isra\.([0-9]+)', 'isra'),
+ ]
+ def cleanup(self, ident, cppfilt):
+ """Cleanup identifiers that have suffixes preventing demangling,
+ and demangle if possible."""
+ to_append = []
+ for s in self.suffixes:
+ found = s.re.match(ident)
+ if not found:
+ continue
+ to_append += [' [' + s.replacement + '.' + found.group(2) + ']']
+ ident = found.group(1) + found.group(3)
+ if len(to_append) > 0:
+ # Only try to demangle if there were suffixes.
+ ident = demangle(ident, cppfilt)
+ for s in to_append:
+ ident += s
+ return ident
+
+suffix_cleanup = SuffixCleanup()
+
+def parse_cpp_name(name, cppfilt):
+ name = suffix_cleanup.cleanup(name, cppfilt)
+
+ # Turn prefixes into suffixes so namespacing works.
+ prefixes = [
+ ['bool ', ''],
+ ['construction vtable for ', ' [construction vtable]'],
+ ['global constructors keyed to ', ' [global constructors]'],
+ ['guard variable for ', ' [guard variable]'],
+ ['int ', ''],
+ ['non-virtual thunk to ', ' [non-virtual thunk]'],
+ ['typeinfo for ', ' [typeinfo]'],
+ ['typeinfo name for ', ' [typeinfo name]'],
+ ['virtual thunk to ', ' [virtual thunk]'],
+ ['void ', ''],
+ ['vtable for ', ' [vtable]'],
+ ['VTT for ', ' [VTT]'],
+ ]
+ for prefix, replacement in prefixes:
+ if name.startswith(prefix):
+ name = name[len(prefix):] + replacement
+ # Simplify parenthesis parsing.
+ replacements = [
+ ['(anonymous namespace)', '[anonymous namespace]'],
+ ]
+ for value, replacement in replacements:
+ name = name.replace(value, replacement)
+
+ def parse_one(val):
+ """Returns (leftmost-part, remaining)."""
+ if (val.startswith('operator') and
+ not (val[8].isalnum() or val[8] == '_')):
+ # Operator overload function, terminate.
+ return (val, '')
+ co = val.find('::')
+ lt = val.find('<')
+ pa = val.find('(')
+ co = len(val) if co == -1 else co
+ lt = len(val) if lt == -1 else lt
+ pa = len(val) if pa == -1 else pa
+ if co < lt and co < pa:
+ # Namespace or type name.
+ return (val[:co], val[co+2:])
+ if lt < pa:
+ # Template. Make sure we capture nested templates too.
+ open_tmpl = 1
+ gt = lt
+ while val[gt] != '>' or open_tmpl != 0:
+ gt = gt + 1
+ if val[gt] == '<':
+ open_tmpl = open_tmpl + 1
+ if val[gt] == '>':
+ open_tmpl = open_tmpl - 1
+ ret = val[gt+1:]
+ if ret.startswith('::'):
+ ret = ret[2:]
+ if ret.startswith('('):
+ # Template function, terminate.
+ return (val, '')
+ return (val[:gt+1], ret)
+ # Terminate with any function name, identifier, or unmangled name.
+ return (val, '')
+
+ parts = []
+ while len(name) > 0:
+ (part, name) = parse_one(name)
+ assert len(part) > 0
+ parts.append(part)
+ return parts
+
+
+def treeify_syms(symbols, strip_prefix=None, cppfilt=None):
+ dirs = {}
+ for sym, type, size, path in symbols:
+ if path:
+ path = os.path.normpath(path)
+ if strip_prefix and path.startswith(strip_prefix):
+ path = path[len(strip_prefix):]
+ elif path.startswith('/'):
+ path = path[1:]
+ path = ['[path]'] + path.split('/')
+
+ parts = parse_cpp_name(sym, cppfilt)
+ if len(parts) == 1:
+ if path:
+ # No namespaces, group with path.
+ parts = path + parts
+ else:
+ new_prefix = ['[ungrouped]']
+ regroups = [
+ ['.L.str', '[str]'],
+ ['.L__PRETTY_FUNCTION__.', '[__PRETTY_FUNCTION__]'],
+ ['.L__func__.', '[__func__]'],
+ ['.Lswitch.table', '[switch table]'],
+ ]
+ for prefix, group in regroups:
+ if parts[0].startswith(prefix):
+ parts[0] = parts[0][len(prefix):]
+ parts[0] = demangle(parts[0], cppfilt)
+ new_prefix += [group]
+ break
+ parts = new_prefix + parts
+
+ key = parts.pop()
+ tree = dirs
+ try:
+ depth = 0
+ for part in parts:
+ depth = depth + 1
+ assert part != '', path
+ if part not in tree:
+ tree[part] = {'$bloat_symbols':{}}
+ if type not in tree[part]['$bloat_symbols']:
+ tree[part]['$bloat_symbols'][type] = 0
+ tree[part]['$bloat_symbols'][type] += 1
+ tree = tree[part]
+ old_size, old_symbols = tree.get(key, (0, {}))
+ if type not in old_symbols:
+ old_symbols[type] = 0
+ old_symbols[type] += 1
+ tree[key] = (old_size + size, old_symbols)
+ except:
+ print >>sys.stderr, 'sym `%s`\tparts `%s`\tkey `%s`' % (sym, parts, key)
+ raise
+ return dirs
+
+
+def jsonify_tree(tree, name):
+ children = []
+ total = 0
+ files = 0
+
+ for key, val in tree.iteritems():
+ if key == '$bloat_symbols':
+ continue
+ if isinstance(val, dict):
+ subtree = jsonify_tree(val, key)
+ total += subtree['data']['$area']
+ children.append(subtree)
+ else:
+ (size, symbols) = val
+ total += size
+ assert len(symbols) == 1, symbols.values()[0] == 1
+ symbol = symbol_type_to_human(symbols.keys()[0])
+ children.append({
+ 'name': key + ' ' + format_bytes(size),
+ 'data': {
+ '$area': size,
+ '$symbol': symbol,
+ }
+ })
+
+ children.sort(key=lambda child: -child['data']['$area'])
+ dominant_symbol = ''
+ if '$bloat_symbols' in tree:
+ dominant_symbol = symbol_type_to_human(
+ max(tree['$bloat_symbols'].iteritems(),
+ key=operator.itemgetter(1))[0])
+ return {
+ 'name': name + ' ' + format_bytes(total),
+ 'data': {
+ '$area': total,
+ '$dominant_symbol': dominant_symbol,
+ },
+ 'children': children,
+ }
+
+
+def dump_nm(nmfile, strip_prefix, cppfilt):
+ dirs = treeify_syms(parse_nm(nmfile), strip_prefix, cppfilt)
+ print ('var kTree = ' +
+ json.dumps(jsonify_tree(dirs, '[everything]'), indent=2))
+
+
+def parse_objdump(input):
+ """Parse objdump -h output."""
+ sec_re = re.compile('^\d+ (\S+) +([0-9a-z]+)')
+ sections = []
+ debug_sections = []
+
+ for line in input:
+ line = line.strip()
+ match = sec_re.match(line)
+ if match:
+ name, size = match.groups()
+ if name.startswith('.'):
+ name = name[1:]
+ if name.startswith('debug_'):
+ name = name[len('debug_'):]
+ debug_sections.append((name, int(size, 16)))
+ else:
+ sections.append((name, int(size, 16)))
+ continue
+ return sections, debug_sections
+
+
+def jsonify_sections(name, sections):
+ children = []
+ total = 0
+ for section, size in sections:
+ children.append({
+ 'name': section + ' ' + format_bytes(size),
+ 'data': { '$area': size }
+ })
+ total += size
+
+ children.sort(key=lambda child: -child['data']['$area'])
+
+ return {
+ 'name': name + ' ' + format_bytes(total),
+ 'data': { '$area': total },
+ 'children': children
+ }
+
+
+def dump_sections(objdump):
+ sections, debug_sections = parse_objdump(objdump)
+ sections = jsonify_sections('sections', sections)
+ debug_sections = jsonify_sections('debug', debug_sections)
+ size = sections['data']['$area'] + debug_sections['data']['$area']
+ print 'var kTree = ' + json.dumps({
+ 'name': 'top ' + format_bytes(size),
+ 'data': { '$area': size },
+ 'children': [ debug_sections, sections ]})
+
+
+usage="""%prog [options] MODE
+
+Modes are:
+ syms: output symbols json suitable for a treemap
+ dump: print symbols sorted by size (pipe to head for best output)
+ sections: output binary sections json suitable for a treemap
+
+nm output passed to --nm-output should from running a command
+like the following (note, can take a long time -- 30 minutes):
+ nm -C -S -l /path/to/binary > nm.out
+
+objdump output passed to --objdump-output should be from a command
+like:
+ objdump -h /path/to/binary > objdump.out"""
+parser = optparse.OptionParser(usage=usage)
+parser.add_option('--nm-output', action='store', dest='nmpath',
+ metavar='PATH', default='nm.out',
+ help='path to nm output [default=nm.out]')
+parser.add_option('--objdump-output', action='store', dest='objdumppath',
+ metavar='PATH', default='objdump.out',
+ help='path to objdump output [default=objdump.out]')
+parser.add_option('--strip-prefix', metavar='PATH', action='store',
+ help='strip PATH prefix from paths; e.g. /path/to/src/root')
+parser.add_option('--filter', action='store',
+ help='include only symbols/files matching FILTER')
+parser.add_option('--c++filt', action='store', metavar='PATH', dest='cppfilt',
+ default='c++filt', help="Path to c++filt, used to demangle "
+ "symbols that weren't handled by nm. Set to an invalid path "
+ "to disable.")
+opts, args = parser.parse_args()
+
+if len(args) != 1:
+ parser.print_usage()
+ sys.exit(1)
+
+mode = args[0]
+if mode == 'syms':
+ nmfile = open(opts.nmpath, 'r')
+ try:
+ res = subprocess.check_output([opts.cppfilt, 'main'])
+ if res.strip() != 'main':
+ print >>sys.stderr, ("%s failed demangling, "
+ "output won't be demangled." % opt.cppfilt)
+ opts.cppfilt = None
+ except:
+ print >>sys.stderr, ("Could not find c++filt at %s, "
+ "output won't be demangled." % opt.cppfilt)
+ opts.cppfilt = None
+ dump_nm(nmfile, strip_prefix=opts.strip_prefix, cppfilt=opts.cppfilt)
+elif mode == 'sections':
+ objdumpfile = open(opts.objdumppath, 'r')
+ dump_sections(objdumpfile)
+elif mode == 'dump':
+ nmfile = open(opts.nmpath, 'r')
+ syms = list(parse_nm(nmfile))
+ # a list of (sym, type, size, path); sort by size.
+ syms.sort(key=lambda x: -x[2])
+ total = 0
+ for sym, type, size, path in syms:
+ if type in ('b', 'w'):
+ continue # skip bss and weak symbols
+ if path is None:
+ path = ''
+ if opts.filter and not (opts.filter in sym or opts.filter in path):
+ continue
+ print '%6s %s (%s) %s' % (format_bytes(size), sym,
+ symbol_type_to_human(type), path)
+ total += size
+ print '%6s %s' % (format_bytes(total), 'total'),
+else:
+ print 'unknown mode'
+ parser.print_usage()
« no previous file with comments | « bloat/README.chromium ('k') | bloat/llvm2ice.bloat.html » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698