OLD | NEW |
(Empty) | |
| 1 #!/usr/bin/python |
| 2 # |
| 3 # Copyright 2013 Google Inc. All Rights Reserved. |
| 4 # |
| 5 # Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 # you may not use this file except in compliance with the License. |
| 7 # You may obtain a copy of the License at |
| 8 # |
| 9 # http://www.apache.org/licenses/LICENSE-2.0 |
| 10 # |
| 11 # Unless required by applicable law or agreed to in writing, software |
| 12 # distributed under the License is distributed on an "AS IS" BASIS, |
| 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 # See the License for the specific language governing permissions and |
| 15 # limitations under the License. |
| 16 |
| 17 import fileinput |
| 18 import operator |
| 19 import optparse |
| 20 import os |
| 21 import pprint |
| 22 import re |
| 23 import subprocess |
| 24 import sys |
| 25 import json |
| 26 |
| 27 def format_bytes(bytes): |
| 28 """Pretty-print a number of bytes.""" |
| 29 if bytes > 1e6: |
| 30 bytes = bytes / 1.0e6 |
| 31 return '%.1fm' % bytes |
| 32 if bytes > 1e3: |
| 33 bytes = bytes / 1.0e3 |
| 34 return '%.1fk' % bytes |
| 35 return str(bytes) |
| 36 |
| 37 |
| 38 def symbol_type_to_human(type): |
| 39 """Convert a symbol type as printed by nm into a human-readable name.""" |
| 40 return { |
| 41 'b': 'bss', |
| 42 'd': 'data', |
| 43 'r': 'read-only data', |
| 44 't': 'code', |
| 45 'u': 'weak symbol', # Unique global. |
| 46 'w': 'weak symbol', |
| 47 'v': 'weak symbol' |
| 48 }[type] |
| 49 |
| 50 |
| 51 def parse_nm(input): |
| 52 """Parse nm output. |
| 53 |
| 54 Argument: an iterable over lines of nm output. |
| 55 |
| 56 Yields: (symbol name, symbol type, symbol size, source file path). |
| 57 Path may be None if nm couldn't figure out the source file. |
| 58 """ |
| 59 |
| 60 # Match lines with size + symbol + optional filename. |
| 61 sym_re = re.compile(r'^[0-9a-f]+ ([0-9a-f]+) (.) ([^\t]+)(?:\t(.*):\d+)?$') |
| 62 |
| 63 # Match lines with addr but no size. |
| 64 addr_re = re.compile(r'^[0-9a-f]+ (.) ([^\t]+)(?:\t.*)?$') |
| 65 # Match lines that don't have an address at all -- typically external symbol
s. |
| 66 noaddr_re = re.compile(r'^ + (.) (.*)$') |
| 67 |
| 68 for line in input: |
| 69 line = line.rstrip() |
| 70 match = sym_re.match(line) |
| 71 if match: |
| 72 size, type, sym = match.groups()[0:3] |
| 73 size = int(size, 16) |
| 74 type = type.lower() |
| 75 if type in ['u', 'v']: |
| 76 type = 'w' # just call them all weak |
| 77 if type == 'b': |
| 78 continue # skip all BSS for now |
| 79 path = match.group(4) |
| 80 yield sym, type, size, path |
| 81 continue |
| 82 match = addr_re.match(line) |
| 83 if match: |
| 84 type, sym = match.groups()[0:2] |
| 85 # No size == we don't care. |
| 86 continue |
| 87 match = noaddr_re.match(line) |
| 88 if match: |
| 89 type, sym = match.groups() |
| 90 if type in ('U', 'w'): |
| 91 # external or weak symbol |
| 92 continue |
| 93 |
| 94 print >>sys.stderr, 'unparsed:', repr(line) |
| 95 |
| 96 def demangle(ident, cppfilt): |
| 97 if cppfilt and ident.startswith('_Z'): |
| 98 # Demangle names when possible. Mangled names all start with _Z. |
| 99 ident = subprocess.check_output([cppfilt, ident]).strip() |
| 100 return ident |
| 101 |
| 102 |
| 103 class Suffix: |
| 104 def __init__(self, suffix, replacement): |
| 105 self.pattern = '^(.*)' + suffix + '(.*)$' |
| 106 self.re = re.compile(self.pattern) |
| 107 self.replacement = replacement |
| 108 |
| 109 class SuffixCleanup: |
| 110 """Pre-compile suffix regular expressions.""" |
| 111 def __init__(self): |
| 112 self.suffixes = [ |
| 113 Suffix('\.part\.([0-9]+)', 'part'), |
| 114 Suffix('\.constprop\.([0-9]+)', 'constprop'), |
| 115 Suffix('\.isra\.([0-9]+)', 'isra'), |
| 116 ] |
| 117 def cleanup(self, ident, cppfilt): |
| 118 """Cleanup identifiers that have suffixes preventing demangling, |
| 119 and demangle if possible.""" |
| 120 to_append = [] |
| 121 for s in self.suffixes: |
| 122 found = s.re.match(ident) |
| 123 if not found: |
| 124 continue |
| 125 to_append += [' [' + s.replacement + '.' + found.group(2) + ']'] |
| 126 ident = found.group(1) + found.group(3) |
| 127 if len(to_append) > 0: |
| 128 # Only try to demangle if there were suffixes. |
| 129 ident = demangle(ident, cppfilt) |
| 130 for s in to_append: |
| 131 ident += s |
| 132 return ident |
| 133 |
| 134 suffix_cleanup = SuffixCleanup() |
| 135 |
| 136 def parse_cpp_name(name, cppfilt): |
| 137 name = suffix_cleanup.cleanup(name, cppfilt) |
| 138 |
| 139 # Turn prefixes into suffixes so namespacing works. |
| 140 prefixes = [ |
| 141 ['bool ', ''], |
| 142 ['construction vtable for ', ' [construction vtable]'], |
| 143 ['global constructors keyed to ', ' [global constructors]'], |
| 144 ['guard variable for ', ' [guard variable]'], |
| 145 ['int ', ''], |
| 146 ['non-virtual thunk to ', ' [non-virtual thunk]'], |
| 147 ['typeinfo for ', ' [typeinfo]'], |
| 148 ['typeinfo name for ', ' [typeinfo name]'], |
| 149 ['virtual thunk to ', ' [virtual thunk]'], |
| 150 ['void ', ''], |
| 151 ['vtable for ', ' [vtable]'], |
| 152 ['VTT for ', ' [VTT]'], |
| 153 ] |
| 154 for prefix, replacement in prefixes: |
| 155 if name.startswith(prefix): |
| 156 name = name[len(prefix):] + replacement |
| 157 # Simplify parenthesis parsing. |
| 158 replacements = [ |
| 159 ['(anonymous namespace)', '[anonymous namespace]'], |
| 160 ] |
| 161 for value, replacement in replacements: |
| 162 name = name.replace(value, replacement) |
| 163 |
| 164 def parse_one(val): |
| 165 """Returns (leftmost-part, remaining).""" |
| 166 if (val.startswith('operator') and |
| 167 not (val[8].isalnum() or val[8] == '_')): |
| 168 # Operator overload function, terminate. |
| 169 return (val, '') |
| 170 co = val.find('::') |
| 171 lt = val.find('<') |
| 172 pa = val.find('(') |
| 173 co = len(val) if co == -1 else co |
| 174 lt = len(val) if lt == -1 else lt |
| 175 pa = len(val) if pa == -1 else pa |
| 176 if co < lt and co < pa: |
| 177 # Namespace or type name. |
| 178 return (val[:co], val[co+2:]) |
| 179 if lt < pa: |
| 180 # Template. Make sure we capture nested templates too. |
| 181 open_tmpl = 1 |
| 182 gt = lt |
| 183 while val[gt] != '>' or open_tmpl != 0: |
| 184 gt = gt + 1 |
| 185 if val[gt] == '<': |
| 186 open_tmpl = open_tmpl + 1 |
| 187 if val[gt] == '>': |
| 188 open_tmpl = open_tmpl - 1 |
| 189 ret = val[gt+1:] |
| 190 if ret.startswith('::'): |
| 191 ret = ret[2:] |
| 192 if ret.startswith('('): |
| 193 # Template function, terminate. |
| 194 return (val, '') |
| 195 return (val[:gt+1], ret) |
| 196 # Terminate with any function name, identifier, or unmangled name. |
| 197 return (val, '') |
| 198 |
| 199 parts = [] |
| 200 while len(name) > 0: |
| 201 (part, name) = parse_one(name) |
| 202 assert len(part) > 0 |
| 203 parts.append(part) |
| 204 return parts |
| 205 |
| 206 |
| 207 def treeify_syms(symbols, strip_prefix=None, cppfilt=None): |
| 208 dirs = {} |
| 209 for sym, type, size, path in symbols: |
| 210 if path: |
| 211 path = os.path.normpath(path) |
| 212 if strip_prefix and path.startswith(strip_prefix): |
| 213 path = path[len(strip_prefix):] |
| 214 elif path.startswith('/'): |
| 215 path = path[1:] |
| 216 path = ['[path]'] + path.split('/') |
| 217 |
| 218 parts = parse_cpp_name(sym, cppfilt) |
| 219 if len(parts) == 1: |
| 220 if path: |
| 221 # No namespaces, group with path. |
| 222 parts = path + parts |
| 223 else: |
| 224 new_prefix = ['[ungrouped]'] |
| 225 regroups = [ |
| 226 ['.L.str', '[str]'], |
| 227 ['.L__PRETTY_FUNCTION__.', '[__PRETTY_FUNCTION__]'], |
| 228 ['.L__func__.', '[__func__]'], |
| 229 ['.Lswitch.table', '[switch table]'], |
| 230 ] |
| 231 for prefix, group in regroups: |
| 232 if parts[0].startswith(prefix): |
| 233 parts[0] = parts[0][len(prefix):] |
| 234 parts[0] = demangle(parts[0], cppfilt) |
| 235 new_prefix += [group] |
| 236 break |
| 237 parts = new_prefix + parts |
| 238 |
| 239 key = parts.pop() |
| 240 tree = dirs |
| 241 try: |
| 242 depth = 0 |
| 243 for part in parts: |
| 244 depth = depth + 1 |
| 245 assert part != '', path |
| 246 if part not in tree: |
| 247 tree[part] = {'$bloat_symbols':{}} |
| 248 if type not in tree[part]['$bloat_symbols']: |
| 249 tree[part]['$bloat_symbols'][type] = 0 |
| 250 tree[part]['$bloat_symbols'][type] += 1 |
| 251 tree = tree[part] |
| 252 old_size, old_symbols = tree.get(key, (0, {})) |
| 253 if type not in old_symbols: |
| 254 old_symbols[type] = 0 |
| 255 old_symbols[type] += 1 |
| 256 tree[key] = (old_size + size, old_symbols) |
| 257 except: |
| 258 print >>sys.stderr, 'sym `%s`\tparts `%s`\tkey `%s`' % (sym, parts,
key) |
| 259 raise |
| 260 return dirs |
| 261 |
| 262 |
| 263 def jsonify_tree(tree, name): |
| 264 children = [] |
| 265 total = 0 |
| 266 files = 0 |
| 267 |
| 268 for key, val in tree.iteritems(): |
| 269 if key == '$bloat_symbols': |
| 270 continue |
| 271 if isinstance(val, dict): |
| 272 subtree = jsonify_tree(val, key) |
| 273 total += subtree['data']['$area'] |
| 274 children.append(subtree) |
| 275 else: |
| 276 (size, symbols) = val |
| 277 total += size |
| 278 assert len(symbols) == 1, symbols.values()[0] == 1 |
| 279 symbol = symbol_type_to_human(symbols.keys()[0]) |
| 280 children.append({ |
| 281 'name': key + ' ' + format_bytes(size), |
| 282 'data': { |
| 283 '$area': size, |
| 284 '$symbol': symbol, |
| 285 } |
| 286 }) |
| 287 |
| 288 children.sort(key=lambda child: -child['data']['$area']) |
| 289 dominant_symbol = '' |
| 290 if '$bloat_symbols' in tree: |
| 291 dominant_symbol = symbol_type_to_human( |
| 292 max(tree['$bloat_symbols'].iteritems(), |
| 293 key=operator.itemgetter(1))[0]) |
| 294 return { |
| 295 'name': name + ' ' + format_bytes(total), |
| 296 'data': { |
| 297 '$area': total, |
| 298 '$dominant_symbol': dominant_symbol, |
| 299 }, |
| 300 'children': children, |
| 301 } |
| 302 |
| 303 |
| 304 def dump_nm(nmfile, strip_prefix, cppfilt): |
| 305 dirs = treeify_syms(parse_nm(nmfile), strip_prefix, cppfilt) |
| 306 print ('var kTree = ' + |
| 307 json.dumps(jsonify_tree(dirs, '[everything]'), indent=2)) |
| 308 |
| 309 |
| 310 def parse_objdump(input): |
| 311 """Parse objdump -h output.""" |
| 312 sec_re = re.compile('^\d+ (\S+) +([0-9a-z]+)') |
| 313 sections = [] |
| 314 debug_sections = [] |
| 315 |
| 316 for line in input: |
| 317 line = line.strip() |
| 318 match = sec_re.match(line) |
| 319 if match: |
| 320 name, size = match.groups() |
| 321 if name.startswith('.'): |
| 322 name = name[1:] |
| 323 if name.startswith('debug_'): |
| 324 name = name[len('debug_'):] |
| 325 debug_sections.append((name, int(size, 16))) |
| 326 else: |
| 327 sections.append((name, int(size, 16))) |
| 328 continue |
| 329 return sections, debug_sections |
| 330 |
| 331 |
| 332 def jsonify_sections(name, sections): |
| 333 children = [] |
| 334 total = 0 |
| 335 for section, size in sections: |
| 336 children.append({ |
| 337 'name': section + ' ' + format_bytes(size), |
| 338 'data': { '$area': size } |
| 339 }) |
| 340 total += size |
| 341 |
| 342 children.sort(key=lambda child: -child['data']['$area']) |
| 343 |
| 344 return { |
| 345 'name': name + ' ' + format_bytes(total), |
| 346 'data': { '$area': total }, |
| 347 'children': children |
| 348 } |
| 349 |
| 350 |
| 351 def dump_sections(objdump): |
| 352 sections, debug_sections = parse_objdump(objdump) |
| 353 sections = jsonify_sections('sections', sections) |
| 354 debug_sections = jsonify_sections('debug', debug_sections) |
| 355 size = sections['data']['$area'] + debug_sections['data']['$area'] |
| 356 print 'var kTree = ' + json.dumps({ |
| 357 'name': 'top ' + format_bytes(size), |
| 358 'data': { '$area': size }, |
| 359 'children': [ debug_sections, sections ]}) |
| 360 |
| 361 |
| 362 usage="""%prog [options] MODE |
| 363 |
| 364 Modes are: |
| 365 syms: output symbols json suitable for a treemap |
| 366 dump: print symbols sorted by size (pipe to head for best output) |
| 367 sections: output binary sections json suitable for a treemap |
| 368 |
| 369 nm output passed to --nm-output should from running a command |
| 370 like the following (note, can take a long time -- 30 minutes): |
| 371 nm -C -S -l /path/to/binary > nm.out |
| 372 |
| 373 objdump output passed to --objdump-output should be from a command |
| 374 like: |
| 375 objdump -h /path/to/binary > objdump.out""" |
| 376 parser = optparse.OptionParser(usage=usage) |
| 377 parser.add_option('--nm-output', action='store', dest='nmpath', |
| 378 metavar='PATH', default='nm.out', |
| 379 help='path to nm output [default=nm.out]') |
| 380 parser.add_option('--objdump-output', action='store', dest='objdumppath', |
| 381 metavar='PATH', default='objdump.out', |
| 382 help='path to objdump output [default=objdump.out]') |
| 383 parser.add_option('--strip-prefix', metavar='PATH', action='store', |
| 384 help='strip PATH prefix from paths; e.g. /path/to/src/root') |
| 385 parser.add_option('--filter', action='store', |
| 386 help='include only symbols/files matching FILTER') |
| 387 parser.add_option('--c++filt', action='store', metavar='PATH', dest='cppfilt', |
| 388 default='c++filt', help="Path to c++filt, used to demangle " |
| 389 "symbols that weren't handled by nm. Set to an invalid path " |
| 390 "to disable.") |
| 391 opts, args = parser.parse_args() |
| 392 |
| 393 if len(args) != 1: |
| 394 parser.print_usage() |
| 395 sys.exit(1) |
| 396 |
| 397 mode = args[0] |
| 398 if mode == 'syms': |
| 399 nmfile = open(opts.nmpath, 'r') |
| 400 try: |
| 401 res = subprocess.check_output([opts.cppfilt, 'main']) |
| 402 if res.strip() != 'main': |
| 403 print >>sys.stderr, ("%s failed demangling, " |
| 404 "output won't be demangled." % opt.cppfilt) |
| 405 opts.cppfilt = None |
| 406 except: |
| 407 print >>sys.stderr, ("Could not find c++filt at %s, " |
| 408 "output won't be demangled." % opt.cppfilt) |
| 409 opts.cppfilt = None |
| 410 dump_nm(nmfile, strip_prefix=opts.strip_prefix, cppfilt=opts.cppfilt) |
| 411 elif mode == 'sections': |
| 412 objdumpfile = open(opts.objdumppath, 'r') |
| 413 dump_sections(objdumpfile) |
| 414 elif mode == 'dump': |
| 415 nmfile = open(opts.nmpath, 'r') |
| 416 syms = list(parse_nm(nmfile)) |
| 417 # a list of (sym, type, size, path); sort by size. |
| 418 syms.sort(key=lambda x: -x[2]) |
| 419 total = 0 |
| 420 for sym, type, size, path in syms: |
| 421 if type in ('b', 'w'): |
| 422 continue # skip bss and weak symbols |
| 423 if path is None: |
| 424 path = '' |
| 425 if opts.filter and not (opts.filter in sym or opts.filter in path): |
| 426 continue |
| 427 print '%6s %s (%s) %s' % (format_bytes(size), sym, |
| 428 symbol_type_to_human(type), path) |
| 429 total += size |
| 430 print '%6s %s' % (format_bytes(total), 'total'), |
| 431 else: |
| 432 print 'unknown mode' |
| 433 parser.print_usage() |
OLD | NEW |