Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(254)

Side by Side Diff: bloat/bloat.py

Issue 917203002: Subzero: Generate a web page showing llvm2ice size breakdown. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Dump the json file into the build directory Created 5 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « bloat/README.chromium ('k') | bloat/llvm2ice.bloat.html » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/python
2 #
3 # Copyright 2013 Google Inc. All Rights Reserved.
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16
17 import fileinput
18 import operator
19 import optparse
20 import os
21 import pprint
22 import re
23 import subprocess
24 import sys
25 import json
26
27 def format_bytes(bytes):
28 """Pretty-print a number of bytes."""
29 if bytes > 1e6:
30 bytes = bytes / 1.0e6
31 return '%.1fm' % bytes
32 if bytes > 1e3:
33 bytes = bytes / 1.0e3
34 return '%.1fk' % bytes
35 return str(bytes)
36
37
38 def symbol_type_to_human(type):
39 """Convert a symbol type as printed by nm into a human-readable name."""
40 return {
41 'b': 'bss',
42 'd': 'data',
43 'r': 'read-only data',
44 't': 'code',
45 'u': 'weak symbol', # Unique global.
46 'w': 'weak symbol',
47 'v': 'weak symbol'
48 }[type]
49
50
51 def parse_nm(input):
52 """Parse nm output.
53
54 Argument: an iterable over lines of nm output.
55
56 Yields: (symbol name, symbol type, symbol size, source file path).
57 Path may be None if nm couldn't figure out the source file.
58 """
59
60 # Match lines with size + symbol + optional filename.
61 sym_re = re.compile(r'^[0-9a-f]+ ([0-9a-f]+) (.) ([^\t]+)(?:\t(.*):\d+)?$')
62
63 # Match lines with addr but no size.
64 addr_re = re.compile(r'^[0-9a-f]+ (.) ([^\t]+)(?:\t.*)?$')
65 # Match lines that don't have an address at all -- typically external symbol s.
66 noaddr_re = re.compile(r'^ + (.) (.*)$')
67
68 for line in input:
69 line = line.rstrip()
70 match = sym_re.match(line)
71 if match:
72 size, type, sym = match.groups()[0:3]
73 size = int(size, 16)
74 type = type.lower()
75 if type in ['u', 'v']:
76 type = 'w' # just call them all weak
77 if type == 'b':
78 continue # skip all BSS for now
79 path = match.group(4)
80 yield sym, type, size, path
81 continue
82 match = addr_re.match(line)
83 if match:
84 type, sym = match.groups()[0:2]
85 # No size == we don't care.
86 continue
87 match = noaddr_re.match(line)
88 if match:
89 type, sym = match.groups()
90 if type in ('U', 'w'):
91 # external or weak symbol
92 continue
93
94 print >>sys.stderr, 'unparsed:', repr(line)
95
96 def demangle(ident, cppfilt):
97 if cppfilt and ident.startswith('_Z'):
98 # Demangle names when possible. Mangled names all start with _Z.
99 ident = subprocess.check_output([cppfilt, ident]).strip()
100 return ident
101
102
103 class Suffix:
104 def __init__(self, suffix, replacement):
105 self.pattern = '^(.*)' + suffix + '(.*)$'
106 self.re = re.compile(self.pattern)
107 self.replacement = replacement
108
109 class SuffixCleanup:
110 """Pre-compile suffix regular expressions."""
111 def __init__(self):
112 self.suffixes = [
113 Suffix('\.part\.([0-9]+)', 'part'),
114 Suffix('\.constprop\.([0-9]+)', 'constprop'),
115 Suffix('\.isra\.([0-9]+)', 'isra'),
116 ]
117 def cleanup(self, ident, cppfilt):
118 """Cleanup identifiers that have suffixes preventing demangling,
119 and demangle if possible."""
120 to_append = []
121 for s in self.suffixes:
122 found = s.re.match(ident)
123 if not found:
124 continue
125 to_append += [' [' + s.replacement + '.' + found.group(2) + ']']
126 ident = found.group(1) + found.group(3)
127 if len(to_append) > 0:
128 # Only try to demangle if there were suffixes.
129 ident = demangle(ident, cppfilt)
130 for s in to_append:
131 ident += s
132 return ident
133
134 suffix_cleanup = SuffixCleanup()
135
136 def parse_cpp_name(name, cppfilt):
137 name = suffix_cleanup.cleanup(name, cppfilt)
138
139 # Turn prefixes into suffixes so namespacing works.
140 prefixes = [
141 ['bool ', ''],
142 ['construction vtable for ', ' [construction vtable]'],
143 ['global constructors keyed to ', ' [global constructors]'],
144 ['guard variable for ', ' [guard variable]'],
145 ['int ', ''],
146 ['non-virtual thunk to ', ' [non-virtual thunk]'],
147 ['typeinfo for ', ' [typeinfo]'],
148 ['typeinfo name for ', ' [typeinfo name]'],
149 ['virtual thunk to ', ' [virtual thunk]'],
150 ['void ', ''],
151 ['vtable for ', ' [vtable]'],
152 ['VTT for ', ' [VTT]'],
153 ]
154 for prefix, replacement in prefixes:
155 if name.startswith(prefix):
156 name = name[len(prefix):] + replacement
157 # Simplify parenthesis parsing.
158 replacements = [
159 ['(anonymous namespace)', '[anonymous namespace]'],
160 ]
161 for value, replacement in replacements:
162 name = name.replace(value, replacement)
163
164 def parse_one(val):
165 """Returns (leftmost-part, remaining)."""
166 if (val.startswith('operator') and
167 not (val[8].isalnum() or val[8] == '_')):
168 # Operator overload function, terminate.
169 return (val, '')
170 co = val.find('::')
171 lt = val.find('<')
172 pa = val.find('(')
173 co = len(val) if co == -1 else co
174 lt = len(val) if lt == -1 else lt
175 pa = len(val) if pa == -1 else pa
176 if co < lt and co < pa:
177 # Namespace or type name.
178 return (val[:co], val[co+2:])
179 if lt < pa:
180 # Template. Make sure we capture nested templates too.
181 open_tmpl = 1
182 gt = lt
183 while val[gt] != '>' or open_tmpl != 0:
184 gt = gt + 1
185 if val[gt] == '<':
186 open_tmpl = open_tmpl + 1
187 if val[gt] == '>':
188 open_tmpl = open_tmpl - 1
189 ret = val[gt+1:]
190 if ret.startswith('::'):
191 ret = ret[2:]
192 if ret.startswith('('):
193 # Template function, terminate.
194 return (val, '')
195 return (val[:gt+1], ret)
196 # Terminate with any function name, identifier, or unmangled name.
197 return (val, '')
198
199 parts = []
200 while len(name) > 0:
201 (part, name) = parse_one(name)
202 assert len(part) > 0
203 parts.append(part)
204 return parts
205
206
207 def treeify_syms(symbols, strip_prefix=None, cppfilt=None):
208 dirs = {}
209 for sym, type, size, path in symbols:
210 if path:
211 path = os.path.normpath(path)
212 if strip_prefix and path.startswith(strip_prefix):
213 path = path[len(strip_prefix):]
214 elif path.startswith('/'):
215 path = path[1:]
216 path = ['[path]'] + path.split('/')
217
218 parts = parse_cpp_name(sym, cppfilt)
219 if len(parts) == 1:
220 if path:
221 # No namespaces, group with path.
222 parts = path + parts
223 else:
224 new_prefix = ['[ungrouped]']
225 regroups = [
226 ['.L.str', '[str]'],
227 ['.L__PRETTY_FUNCTION__.', '[__PRETTY_FUNCTION__]'],
228 ['.L__func__.', '[__func__]'],
229 ['.Lswitch.table', '[switch table]'],
230 ]
231 for prefix, group in regroups:
232 if parts[0].startswith(prefix):
233 parts[0] = parts[0][len(prefix):]
234 parts[0] = demangle(parts[0], cppfilt)
235 new_prefix += [group]
236 break
237 parts = new_prefix + parts
238
239 key = parts.pop()
240 tree = dirs
241 try:
242 depth = 0
243 for part in parts:
244 depth = depth + 1
245 assert part != '', path
246 if part not in tree:
247 tree[part] = {'$bloat_symbols':{}}
248 if type not in tree[part]['$bloat_symbols']:
249 tree[part]['$bloat_symbols'][type] = 0
250 tree[part]['$bloat_symbols'][type] += 1
251 tree = tree[part]
252 old_size, old_symbols = tree.get(key, (0, {}))
253 if type not in old_symbols:
254 old_symbols[type] = 0
255 old_symbols[type] += 1
256 tree[key] = (old_size + size, old_symbols)
257 except:
258 print >>sys.stderr, 'sym `%s`\tparts `%s`\tkey `%s`' % (sym, parts, key)
259 raise
260 return dirs
261
262
263 def jsonify_tree(tree, name):
264 children = []
265 total = 0
266 files = 0
267
268 for key, val in tree.iteritems():
269 if key == '$bloat_symbols':
270 continue
271 if isinstance(val, dict):
272 subtree = jsonify_tree(val, key)
273 total += subtree['data']['$area']
274 children.append(subtree)
275 else:
276 (size, symbols) = val
277 total += size
278 assert len(symbols) == 1, symbols.values()[0] == 1
279 symbol = symbol_type_to_human(symbols.keys()[0])
280 children.append({
281 'name': key + ' ' + format_bytes(size),
282 'data': {
283 '$area': size,
284 '$symbol': symbol,
285 }
286 })
287
288 children.sort(key=lambda child: -child['data']['$area'])
289 dominant_symbol = ''
290 if '$bloat_symbols' in tree:
291 dominant_symbol = symbol_type_to_human(
292 max(tree['$bloat_symbols'].iteritems(),
293 key=operator.itemgetter(1))[0])
294 return {
295 'name': name + ' ' + format_bytes(total),
296 'data': {
297 '$area': total,
298 '$dominant_symbol': dominant_symbol,
299 },
300 'children': children,
301 }
302
303
304 def dump_nm(nmfile, strip_prefix, cppfilt):
305 dirs = treeify_syms(parse_nm(nmfile), strip_prefix, cppfilt)
306 print ('var kTree = ' +
307 json.dumps(jsonify_tree(dirs, '[everything]'), indent=2))
308
309
310 def parse_objdump(input):
311 """Parse objdump -h output."""
312 sec_re = re.compile('^\d+ (\S+) +([0-9a-z]+)')
313 sections = []
314 debug_sections = []
315
316 for line in input:
317 line = line.strip()
318 match = sec_re.match(line)
319 if match:
320 name, size = match.groups()
321 if name.startswith('.'):
322 name = name[1:]
323 if name.startswith('debug_'):
324 name = name[len('debug_'):]
325 debug_sections.append((name, int(size, 16)))
326 else:
327 sections.append((name, int(size, 16)))
328 continue
329 return sections, debug_sections
330
331
332 def jsonify_sections(name, sections):
333 children = []
334 total = 0
335 for section, size in sections:
336 children.append({
337 'name': section + ' ' + format_bytes(size),
338 'data': { '$area': size }
339 })
340 total += size
341
342 children.sort(key=lambda child: -child['data']['$area'])
343
344 return {
345 'name': name + ' ' + format_bytes(total),
346 'data': { '$area': total },
347 'children': children
348 }
349
350
351 def dump_sections(objdump):
352 sections, debug_sections = parse_objdump(objdump)
353 sections = jsonify_sections('sections', sections)
354 debug_sections = jsonify_sections('debug', debug_sections)
355 size = sections['data']['$area'] + debug_sections['data']['$area']
356 print 'var kTree = ' + json.dumps({
357 'name': 'top ' + format_bytes(size),
358 'data': { '$area': size },
359 'children': [ debug_sections, sections ]})
360
361
362 usage="""%prog [options] MODE
363
364 Modes are:
365 syms: output symbols json suitable for a treemap
366 dump: print symbols sorted by size (pipe to head for best output)
367 sections: output binary sections json suitable for a treemap
368
369 nm output passed to --nm-output should from running a command
370 like the following (note, can take a long time -- 30 minutes):
371 nm -C -S -l /path/to/binary > nm.out
372
373 objdump output passed to --objdump-output should be from a command
374 like:
375 objdump -h /path/to/binary > objdump.out"""
376 parser = optparse.OptionParser(usage=usage)
377 parser.add_option('--nm-output', action='store', dest='nmpath',
378 metavar='PATH', default='nm.out',
379 help='path to nm output [default=nm.out]')
380 parser.add_option('--objdump-output', action='store', dest='objdumppath',
381 metavar='PATH', default='objdump.out',
382 help='path to objdump output [default=objdump.out]')
383 parser.add_option('--strip-prefix', metavar='PATH', action='store',
384 help='strip PATH prefix from paths; e.g. /path/to/src/root')
385 parser.add_option('--filter', action='store',
386 help='include only symbols/files matching FILTER')
387 parser.add_option('--c++filt', action='store', metavar='PATH', dest='cppfilt',
388 default='c++filt', help="Path to c++filt, used to demangle "
389 "symbols that weren't handled by nm. Set to an invalid path "
390 "to disable.")
391 opts, args = parser.parse_args()
392
393 if len(args) != 1:
394 parser.print_usage()
395 sys.exit(1)
396
397 mode = args[0]
398 if mode == 'syms':
399 nmfile = open(opts.nmpath, 'r')
400 try:
401 res = subprocess.check_output([opts.cppfilt, 'main'])
402 if res.strip() != 'main':
403 print >>sys.stderr, ("%s failed demangling, "
404 "output won't be demangled." % opt.cppfilt)
405 opts.cppfilt = None
406 except:
407 print >>sys.stderr, ("Could not find c++filt at %s, "
408 "output won't be demangled." % opt.cppfilt)
409 opts.cppfilt = None
410 dump_nm(nmfile, strip_prefix=opts.strip_prefix, cppfilt=opts.cppfilt)
411 elif mode == 'sections':
412 objdumpfile = open(opts.objdumppath, 'r')
413 dump_sections(objdumpfile)
414 elif mode == 'dump':
415 nmfile = open(opts.nmpath, 'r')
416 syms = list(parse_nm(nmfile))
417 # a list of (sym, type, size, path); sort by size.
418 syms.sort(key=lambda x: -x[2])
419 total = 0
420 for sym, type, size, path in syms:
421 if type in ('b', 'w'):
422 continue # skip bss and weak symbols
423 if path is None:
424 path = ''
425 if opts.filter and not (opts.filter in sym or opts.filter in path):
426 continue
427 print '%6s %s (%s) %s' % (format_bytes(size), sym,
428 symbol_type_to_human(type), path)
429 total += size
430 print '%6s %s' % (format_bytes(total), 'total'),
431 else:
432 print 'unknown mode'
433 parser.print_usage()
OLDNEW
« no previous file with comments | « bloat/README.chromium ('k') | bloat/llvm2ice.bloat.html » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698