bloat/bloat.py - Issue 917203002: Subzero: Generate a web page showing llvm2ice size breakdown.

Side by Side Diff: bloat/bloat.py

Issue 917203002: Subzero: Generate a web page showing llvm2ice size breakdown. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: Dump the json file into the build directory Created 5 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 #!/usr/bin/python

	2 #

	3 # Copyright 2013 Google Inc. All Rights Reserved.

	4 #

	5 # Licensed under the Apache License, Version 2.0 (the "License");

	6 # you may not use this file except in compliance with the License.

	7 # You may obtain a copy of the License at

	8 #

	9 # http://www.apache.org/licenses/LICENSE-2.0

	10 #

	11 # Unless required by applicable law or agreed to in writing, software

	12 # distributed under the License is distributed on an "AS IS" BASIS,

	13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

	14 # See the License for the specific language governing permissions and

	15 # limitations under the License.

	16

	17 import fileinput

	18 import operator

	19 import optparse

	20 import os

	21 import pprint

	22 import re

	23 import subprocess

	24 import sys

	25 import json

	26

	27 def format_bytes(bytes):

	28 """Pretty-print a number of bytes."""

	29 if bytes > 1e6:

	30 bytes = bytes / 1.0e6

	31 return '%.1fm' % bytes

	32 if bytes > 1e3:

	33 bytes = bytes / 1.0e3

	34 return '%.1fk' % bytes

	35 return str(bytes)

	36

	37

	38 def symbol_type_to_human(type):

	39 """Convert a symbol type as printed by nm into a human-readable name."""

	40 return {

	41 'b': 'bss',

	42 'd': 'data',

	43 'r': 'read-only data',

	44 't': 'code',

	45 'u': 'weak symbol', # Unique global.

	46 'w': 'weak symbol',

	47 'v': 'weak symbol'

	48 }[type]

	49

	50

	51 def parse_nm(input):

	52 """Parse nm output.

	53

	54 Argument: an iterable over lines of nm output.

	55

	56 Yields: (symbol name, symbol type, symbol size, source file path).

	57 Path may be None if nm couldn't figure out the source file.

	58 """

	59

	60 # Match lines with size + symbol + optional filename.

	61 sym_re = re.compile(r'^[0-9a-f]+ ([0-9a-f]+) (.) ([^\t]+)(?:\t(.*):\d+)?$')

	62

	63 # Match lines with addr but no size.

	64 addr_re = re.compile(r'^[0-9a-f]+ (.) ([^\t]+)(?:\t.*)?$')

	65 # Match lines that don't have an address at all -- typically external symbol s.

	66 noaddr_re = re.compile(r'^ + (.) (.*)$')

	67

	68 for line in input:

	69 line = line.rstrip()

	70 match = sym_re.match(line)

	71 if match:

	72 size, type, sym = match.groups()[0:3]

	73 size = int(size, 16)

	74 type = type.lower()

	75 if type in ['u', 'v']:

	76 type = 'w' # just call them all weak

	77 if type == 'b':

	78 continue # skip all BSS for now

	79 path = match.group(4)

	80 yield sym, type, size, path

	81 continue

	82 match = addr_re.match(line)

	83 if match:

	84 type, sym = match.groups()[0:2]

	85 # No size == we don't care.

	86 continue

	87 match = noaddr_re.match(line)

	88 if match:

	89 type, sym = match.groups()

	90 if type in ('U', 'w'):

	91 # external or weak symbol

	92 continue

	93

	94 print >>sys.stderr, 'unparsed:', repr(line)

	95

	96 def demangle(ident, cppfilt):

	97 if cppfilt and ident.startswith('_Z'):

	98 # Demangle names when possible. Mangled names all start with _Z.

	99 ident = subprocess.check_output([cppfilt, ident]).strip()

	100 return ident

	101

	102

	103 class Suffix:

	104 def __init__(self, suffix, replacement):

	105 self.pattern = '^(.)' + suffix + '(.)$'

	106 self.re = re.compile(self.pattern)

	107 self.replacement = replacement

	108

	109 class SuffixCleanup:

	110 """Pre-compile suffix regular expressions."""

	111 def __init__(self):

	112 self.suffixes = [

	113 Suffix('\.part\.([0-9]+)', 'part'),

	114 Suffix('\.constprop\.([0-9]+)', 'constprop'),

	115 Suffix('\.isra\.([0-9]+)', 'isra'),

	116 ]

	117 def cleanup(self, ident, cppfilt):

	118 """Cleanup identifiers that have suffixes preventing demangling,

	119 and demangle if possible."""

	120 to_append = []

	121 for s in self.suffixes:

	122 found = s.re.match(ident)

	123 if not found:

	124 continue

	125 to_append += [' [' + s.replacement + '.' + found.group(2) + ']']

	126 ident = found.group(1) + found.group(3)

	127 if len(to_append) > 0:

	128 # Only try to demangle if there were suffixes.

	129 ident = demangle(ident, cppfilt)

	130 for s in to_append:

	131 ident += s

	132 return ident

	133

	134 suffix_cleanup = SuffixCleanup()

	135

	136 def parse_cpp_name(name, cppfilt):

	137 name = suffix_cleanup.cleanup(name, cppfilt)

	138

	139 # Turn prefixes into suffixes so namespacing works.

	140 prefixes = [

	141 ['bool ', ''],

	142 ['construction vtable for ', ' [construction vtable]'],

	143 ['global constructors keyed to ', ' [global constructors]'],

	144 ['guard variable for ', ' [guard variable]'],

	145 ['int ', ''],

	146 ['non-virtual thunk to ', ' [non-virtual thunk]'],

	147 ['typeinfo for ', ' [typeinfo]'],

	148 ['typeinfo name for ', ' [typeinfo name]'],

	149 ['virtual thunk to ', ' [virtual thunk]'],

	150 ['void ', ''],

	151 ['vtable for ', ' [vtable]'],

	152 ['VTT for ', ' [VTT]'],

	153 ]

	154 for prefix, replacement in prefixes:

	155 if name.startswith(prefix):

	156 name = name[len(prefix):] + replacement

	157 # Simplify parenthesis parsing.

	158 replacements = [

	159 ['(anonymous namespace)', '[anonymous namespace]'],

	160 ]

	161 for value, replacement in replacements:

	162 name = name.replace(value, replacement)

	163

	164 def parse_one(val):

	165 """Returns (leftmost-part, remaining)."""

	166 if (val.startswith('operator') and

	167 not (val[8].isalnum() or val[8] == '_')):

	168 # Operator overload function, terminate.

	169 return (val, '')

	170 co = val.find('::')

	171 lt = val.find('<')

	172 pa = val.find('(')

	173 co = len(val) if co == -1 else co

	174 lt = len(val) if lt == -1 else lt

	175 pa = len(val) if pa == -1 else pa

	176 if co < lt and co < pa:

	177 # Namespace or type name.

	178 return (val[:co], val[co+2:])

	179 if lt < pa:

	180 # Template. Make sure we capture nested templates too.

	181 open_tmpl = 1

	182 gt = lt

	183 while val[gt] != '>' or open_tmpl != 0:

	184 gt = gt + 1

	185 if val[gt] == '<':

	186 open_tmpl = open_tmpl + 1

	187 if val[gt] == '>':

	188 open_tmpl = open_tmpl - 1

	189 ret = val[gt+1:]

	190 if ret.startswith('::'):

	191 ret = ret[2:]

	192 if ret.startswith('('):

	193 # Template function, terminate.

	194 return (val, '')

	195 return (val[:gt+1], ret)

	196 # Terminate with any function name, identifier, or unmangled name.

	197 return (val, '')

	198

	199 parts = []

	200 while len(name) > 0:

	201 (part, name) = parse_one(name)

	202 assert len(part) > 0

	203 parts.append(part)

	204 return parts

	205

	206

	207 def treeify_syms(symbols, strip_prefix=None, cppfilt=None):

	208 dirs = {}

	209 for sym, type, size, path in symbols:

	210 if path:

	211 path = os.path.normpath(path)

	212 if strip_prefix and path.startswith(strip_prefix):

	213 path = path[len(strip_prefix):]

	214 elif path.startswith('/'):

	215 path = path[1:]

	216 path = ['[path]'] + path.split('/')

	217

	218 parts = parse_cpp_name(sym, cppfilt)

	219 if len(parts) == 1:

	220 if path:

	221 # No namespaces, group with path.

	222 parts = path + parts

	223 else:

	224 new_prefix = ['[ungrouped]']

	225 regroups = [

	226 ['.L.str', '[str]'],

	227 ['.L__PRETTY_FUNCTION__.', '[__PRETTY_FUNCTION__]'],

	228 ['.L__func__.', '[__func__]'],

	229 ['.Lswitch.table', '[switch table]'],

	230 ]

	231 for prefix, group in regroups:

	232 if parts[0].startswith(prefix):

	233 parts[0] = parts[0][len(prefix):]

	234 parts[0] = demangle(parts[0], cppfilt)

	235 new_prefix += [group]

	236 break

	237 parts = new_prefix + parts

	238

	239 key = parts.pop()

	240 tree = dirs

	241 try:

	242 depth = 0

	243 for part in parts:

	244 depth = depth + 1

	245 assert part != '', path

	246 if part not in tree:

	247 tree[part] = {'$bloat_symbols':{}}

	248 if type not in tree[part]['$bloat_symbols']:

	249 tree[part]['$bloat_symbols'][type] = 0

	250 tree[part]['$bloat_symbols'][type] += 1

	251 tree = tree[part]

	252 old_size, old_symbols = tree.get(key, (0, {}))

	253 if type not in old_symbols:

	254 old_symbols[type] = 0

	255 old_symbols[type] += 1

	256 tree[key] = (old_size + size, old_symbols)

	257 except:

	258 print >>sys.stderr, 'sym `%s`\tparts `%s`\tkey `%s`' % (sym, parts, key)

	259 raise

	260 return dirs

	261

	262

	263 def jsonify_tree(tree, name):

	264 children = []

	265 total = 0

	266 files = 0

	267

	268 for key, val in tree.iteritems():

	269 if key == '$bloat_symbols':

	270 continue

	271 if isinstance(val, dict):

	272 subtree = jsonify_tree(val, key)

	273 total += subtree['data']['$area']

	274 children.append(subtree)

	275 else:

	276 (size, symbols) = val

	277 total += size

	278 assert len(symbols) == 1, symbols.values()[0] == 1

	279 symbol = symbol_type_to_human(symbols.keys()[0])

	280 children.append({

	281 'name': key + ' ' + format_bytes(size),

	282 'data': {

	283 '$area': size,

	284 '$symbol': symbol,

	285 }

	286 })

	287

	288 children.sort(key=lambda child: -child['data']['$area'])

	289 dominant_symbol = ''

	290 if '$bloat_symbols' in tree:

	291 dominant_symbol = symbol_type_to_human(

	292 max(tree['$bloat_symbols'].iteritems(),

	293 key=operator.itemgetter(1))[0])

	294 return {

	295 'name': name + ' ' + format_bytes(total),

	296 'data': {

	297 '$area': total,

	298 '$dominant_symbol': dominant_symbol,

	299 },

	300 'children': children,

	301 }

	302

	303

	304 def dump_nm(nmfile, strip_prefix, cppfilt):

	305 dirs = treeify_syms(parse_nm(nmfile), strip_prefix, cppfilt)

	306 print ('var kTree = ' +

	307 json.dumps(jsonify_tree(dirs, '[everything]'), indent=2))

	308

	309

	310 def parse_objdump(input):

	311 """Parse objdump -h output."""

	312 sec_re = re.compile('^\d+ (\S+) +([0-9a-z]+)')

	313 sections = []

	314 debug_sections = []

	315

	316 for line in input:

	317 line = line.strip()

	318 match = sec_re.match(line)

	319 if match:

	320 name, size = match.groups()

	321 if name.startswith('.'):

	322 name = name[1:]

	323 if name.startswith('debug_'):

	324 name = name[len('debug_'):]

	325 debug_sections.append((name, int(size, 16)))

	326 else:

	327 sections.append((name, int(size, 16)))

	328 continue

	329 return sections, debug_sections

	330

	331

	332 def jsonify_sections(name, sections):

	333 children = []

	334 total = 0

	335 for section, size in sections:

	336 children.append({

	337 'name': section + ' ' + format_bytes(size),

	338 'data': { '$area': size }

	339 })

	340 total += size

	341

	342 children.sort(key=lambda child: -child['data']['$area'])

	343

	344 return {

	345 'name': name + ' ' + format_bytes(total),

	346 'data': { '$area': total },

	347 'children': children

	348 }

	349

	350

	351 def dump_sections(objdump):

	352 sections, debug_sections = parse_objdump(objdump)

	353 sections = jsonify_sections('sections', sections)

	354 debug_sections = jsonify_sections('debug', debug_sections)

	355 size = sections['data']['$area'] + debug_sections['data']['$area']

	356 print 'var kTree = ' + json.dumps({

	357 'name': 'top ' + format_bytes(size),

	358 'data': { '$area': size },

	359 'children': [ debug_sections, sections ]})

	360

	361

	362 usage="""%prog [options] MODE

	363

	364 Modes are:

	365 syms: output symbols json suitable for a treemap

	366 dump: print symbols sorted by size (pipe to head for best output)

	367 sections: output binary sections json suitable for a treemap

	368

	369 nm output passed to --nm-output should from running a command

	370 like the following (note, can take a long time -- 30 minutes):

	371 nm -C -S -l /path/to/binary > nm.out

	372

	373 objdump output passed to --objdump-output should be from a command

	374 like:

	375 objdump -h /path/to/binary > objdump.out"""

	376 parser = optparse.OptionParser(usage=usage)

	377 parser.add_option('--nm-output', action='store', dest='nmpath',

	378 metavar='PATH', default='nm.out',

	379 help='path to nm output [default=nm.out]')

	380 parser.add_option('--objdump-output', action='store', dest='objdumppath',

	381 metavar='PATH', default='objdump.out',

	382 help='path to objdump output [default=objdump.out]')

	383 parser.add_option('--strip-prefix', metavar='PATH', action='store',

	384 help='strip PATH prefix from paths; e.g. /path/to/src/root')

	385 parser.add_option('--filter', action='store',

	386 help='include only symbols/files matching FILTER')

	387 parser.add_option('--c++filt', action='store', metavar='PATH', dest='cppfilt',

	388 default='c++filt', help="Path to c++filt, used to demangle "

	389 "symbols that weren't handled by nm. Set to an invalid path "

	390 "to disable.")

	391 opts, args = parser.parse_args()

	392

	393 if len(args) != 1:

	394 parser.print_usage()

	395 sys.exit(1)

	396

	397 mode = args[0]

	398 if mode == 'syms':

	399 nmfile = open(opts.nmpath, 'r')

	400 try:

	401 res = subprocess.check_output([opts.cppfilt, 'main'])

	402 if res.strip() != 'main':

	403 print >>sys.stderr, ("%s failed demangling, "

	404 "output won't be demangled." % opt.cppfilt)

	405 opts.cppfilt = None

	406 except:

	407 print >>sys.stderr, ("Could not find c++filt at %s, "

	408 "output won't be demangled." % opt.cppfilt)

	409 opts.cppfilt = None

	410 dump_nm(nmfile, strip_prefix=opts.strip_prefix, cppfilt=opts.cppfilt)

	411 elif mode == 'sections':

	412 objdumpfile = open(opts.objdumppath, 'r')

	413 dump_sections(objdumpfile)

	414 elif mode == 'dump':

	415 nmfile = open(opts.nmpath, 'r')

	416 syms = list(parse_nm(nmfile))

	417 # a list of (sym, type, size, path); sort by size.

	418 syms.sort(key=lambda x: -x[2])

	419 total = 0

	420 for sym, type, size, path in syms:

	421 if type in ('b', 'w'):

	422 continue # skip bss and weak symbols

	423 if path is None:

	424 path = ''

	425 if opts.filter and not (opts.filter in sym or opts.filter in path):

	426 continue

	427 print '%6s %s (%s) %s' % (format_bytes(size), sym,

	428 symbol_type_to_human(type), path)

	429 total += size

	430 print '%6s %s' % (format_bytes(total), 'total'),

	431 else:

	432 print 'unknown mode'

	433 parser.print_usage()

OLD	NEW

« no previous file with comments | « bloat/README.chromium ('k') | bloat/llvm2ice.bloat.html » ('j') | no next file with comments »