Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(21)

Unified Diff: tools/binary_size/explain_binary_size_delta.py

Issue 258633003: Graphical version of the run_binary_size_analysis tool. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Using the python addr2line wrapper. Created 6 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: tools/binary_size/explain_binary_size_delta.py
diff --git a/tools/binary_size/explain_binary_size_delta.py b/tools/binary_size/explain_binary_size_delta.py
new file mode 100755
index 0000000000000000000000000000000000000000..9552ebc1e5428c36f5faec9cf89a0f66d78ff865
--- /dev/null
+++ b/tools/binary_size/explain_binary_size_delta.py
@@ -0,0 +1,414 @@
+#!/usr/bin/python
Primiano Tucci (use gerrit) 2014/05/20 15:22:57 #!/usr/bin/env python ? The difference is that the
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Generate a description of the size differences between two binaries
Primiano Tucci (use gerrit) 2014/05/20 15:22:57 The first line of a docstring should fit in one li
+based on an analysis of symbols.
+
+This tool needs "nm" dumps of each binary with full symbol information.
+In order to obtain such dumps you need full source checkouts of each
+binary that you want to anaylze. You can obtain the necessary dumps by
Primiano Tucci (use gerrit) 2014/05/20 15:22:57 Out of curiosity, why you need the source? Isn't t
Daniel Bratell 2014/05/21 08:42:13 I don't know for sure, but we've talked about comp
+running the run_binary_size_analysis.py script upon each binary, with
+the "--nm-out" parameter set to the location in which you want to save
+the dumps. Example:
+
+ # obtain symbol data from first binary in /tmp/nm1.dump
+ cd $CHECKOUT1_SRC
+ ninja -C out/Release binary_size_tool
+ tools/binary_size/run_binary_size_analysis \
+ --library <path_to_library>
+ --destdir /tmp/throwaway
+ --nm-out /tmp/nm1.dump
+
+ # obtain symbol data from second binary in /tmp/nm2.dump
+ cd $CHECKOUT2_SRC
+ ninja -C out/Release binary_size_tool
+ tools/binary_size/run_binary_size_analysis \
+ --library <path_to_library>
+ --destdir /tmp/throwaway
+ --nm-out /tmp/nm2.dump
+
+ # cleanup useless files
+ rm -r /tmp/throwaway
+
+ # run this tool
+ explain_binary_size_delta.py --nm1 /tmp/nm1.dump --nm2 /tmp/nm2.dump
+"""
+
+import collections
+import fileinput
+import json
+import optparse
+import os
+import pprint
+import sys
+
+import binary_size_utils
+
Primiano Tucci (use gerrit) 2014/05/20 15:22:57 Nit, add an extra line (two lines between top leve
+def compare(symbols1, symbols2):
Primiano Tucci (use gerrit) 2014/05/20 15:22:57 Nit: Compare (capital case) Also top level functio
+ added = [] # tuples
+ removed = [] # tuples
+ changed = [] # tuples
+ unchanged = [] # tuples
+
+ cache1 = {}
+ cache2 = {}
+ # Make a map of (file, symbol_type) : (symbol_name, symbol_size)
+ for cache, symbols in ((cache1, symbols1), (cache2, symbols2)):
+ for symbol_name, symbol_type, symbol_size, file_path in symbols:
+ if 'vtable for ' in symbol_name:
+ symbol_type = '@' # hack to categorize these separately
+ if file_path:
+ file_path = os.path.normpath(file_path)
+ else:
+ file_path = '(No Path)'
+ key = (file_path, symbol_type)
+ bucket = cache.get(key, None)
+ if not bucket:
Primiano Tucci (use gerrit) 2014/05/20 15:22:57 lines 68.70 can be just: bucket = cache.setdefault
+ bucket = {}
+ cache[key] = bucket
+ bucket[symbol_name] = symbol_size
+
+ # Now diff them. We iterate over the elements in cache1. For each symbol
+ # that we find in cache2, we record whether it was deleted, changed, or
+ # unchanged. We then remove it from cache2; all the symbols that remain
+ # in cache2 at the end of the iteration over cache1 are the 'new' symbols.
+ for key, bucket1 in cache1.items():
Primiano Tucci (use gerrit) 2014/05/20 15:22:57 s/items/iteritems/
Daniel Bratell 2014/05/21 08:42:13 cache1 is mutated during the iteration so it has t
+ bucket2 = cache2.get(key, None)
Primiano Tucci (use gerrit) 2014/05/20 15:22:57 ", None" is redundant, the default value for the
+ if not bucket2:
+ # A file was removed. Everything in bucket1 is dead.
+ for symbol_name, symbol_size in bucket1.items():
+ removed.append((key[0], key[1], symbol_name, symbol_size, None))
+ else:
+ # File still exists, look for changes within.
+ for symbol_name, symbol_size in bucket1.items():
+ size2 = bucket2.get(symbol_name, None)
+ if not size2:
+ # Symbol no longer exists in bucket2.
+ removed.append((key[0], key[1], symbol_name, symbol_size, None))
+ else:
+ del bucket2[symbol_name] # Symbol is not new, delete from cache2.
+ if len(bucket2) == 0:
+ del cache1[key] # Entire bucket is empty, delete from cache2
+ if symbol_size != size2:
+ # Symbol has change size in bucket.
+ changed.append((key[0], key[1], symbol_name, symbol_size, size2))
+ else:
+ # Symbol is unchanged.
+ unchanged.append((key[0], key[1], symbol_name, symbol_size, size2))
+
+ # We have now analyzed all symbols that are in cache1 and removed all of
+ # the encountered symbols from cache2. What's left in cache2 is the new
+ # symbols.
+ for key, bucket2 in cache2.items():
Primiano Tucci (use gerrit) 2014/05/20 15:22:57 s/items/iteritems/ here and below
+ for symbol_name, symbol_size in bucket2.items():
+ added.append((key[0], key[1], symbol_name, None, symbol_size))
+ return [added, removed, changed, unchanged]
Primiano Tucci (use gerrit) 2014/05/20 15:22:57 Shouldn't this be a tuple rather than a list? You
+
+
+def test_compare():
Primiano Tucci (use gerrit) 2014/05/20 15:22:57 Many thanks for the test, yay! :) Just, we typical
Daniel Bratell 2014/05/21 08:42:13 Let me make a case for keeping it in the same file
Primiano Tucci (use gerrit) 2014/05/21 10:05:59 Oh, actually, good point. This should have a PRESU
+ # List entries have form: symbol_name, symbol_type, symbol_size, file_path
+ symbol_list1 = (
+ # File with one symbol, left as-is.
+ ( 'unchanged', 't', 1000, '/file_unchanged' ),
+ # File with one symbol, changed.
+ ( 'changed', 't', 1000, '/file_all_changed' ),
+ # File with one symbol, deleted.
+ ( 'removed', 't', 1000, '/file_all_deleted' ),
+ # File with two symbols, one unchanged, one changed, same bucket
+ ( 'unchanged', 't', 1000, '/file_pair_unchanged_changed' ),
+ ( 'changed', 't', 1000, '/file_pair_unchanged_changed' ),
+ # File with two symbols, one unchanged, one deleted, same bucket
+ ( 'unchanged', 't', 1000, '/file_pair_unchanged_removed' ),
+ ( 'removed', 't', 1000, '/file_pair_unchanged_removed' ),
+ # File with two symbols, one unchanged, one added, same bucket
+ ( 'unchanged', 't', 1000, '/file_pair_unchanged_added' ),
+ # File with two symbols, one unchanged, one changed, different bucket
+ ( 'unchanged', 't', 1000, '/file_pair_unchanged_diffbuck_changed' ),
+ ( 'changed', '@', 1000, '/file_pair_unchanged_diffbuck_changed' ),
+ # File with two symbols, one unchanged, one deleted, different bucket
+ ( 'unchanged', 't', 1000, '/file_pair_unchanged_diffbuck_removed' ),
+ ( 'removed', '@', 1000, '/file_pair_unchanged_diffbuck_removed' ),
+ # File with two symbols, one unchanged, one added, different bucket
+ ( 'unchanged', 't', 1000, '/file_pair_unchanged_diffbuck_added' ),
+ # File with four symbols, one added, one removed, one changed, one unchanged
+ ( 'size_changed', 't', 1000, '/file_tetra' ),
+ ( 'removed', 't', 1000, '/file_tetra' ),
+ ( 'unchanged', 't', 1000, '/file_tetra' ),
+ );
+
+ symbol_list2 = (
+ # File with one symbol, left as-is.
+ ( 'unchanged', 't', 1000, '/file_unchanged' ),
+ # File with one symbol, changed.
+ ( 'changed', 't', 2000, '/file_all_changed' ),
+ # File with two symbols, one unchanged, one changed, same bucket
+ ( 'unchanged', 't', 1000, '/file_pair_unchanged_changed' ),
+ ( 'changed', 't', 2000, '/file_pair_unchanged_changed' ),
+ # File with two symbols, one unchanged, one deleted, same bucket
+ ( 'unchanged', 't', 1000, '/file_pair_unchanged_removed' ),
+ # File with two symbols, one unchanged, one added, same bucket
+ ( 'unchanged', 't', 1000, '/file_pair_unchanged_added' ),
+ ( 'added', 't', 1000, '/file_pair_unchanged_added' ),
+ # File with two symbols, one unchanged, one changed, different bucket
+ ( 'unchanged', 't', 1000, '/file_pair_unchanged_diffbuck_changed' ),
+ ( 'changed', '@', 2000, '/file_pair_unchanged_diffbuck_changed' ),
+ # File with two symbols, one unchanged, one deleted, different bucket
+ ( 'unchanged', 't', 1000, '/file_pair_unchanged_diffbuck_removed' ),
+ # File with two symbols, one unchanged, one added, different bucket
+ ( 'unchanged', 't', 1000, '/file_pair_unchanged_diffbuck_added' ),
+ ( 'added', '@', 1000, '/file_pair_unchanged_diffbuck_added' ),
+ # File with four symbols, one added, one removed, one changed, one unchanged
+ ( 'size_changed', 't', 2000, '/file_tetra' ),
+ ( 'unchanged', 't', 1000, '/file_tetra' ),
+ ( 'added', 't', 1000, '/file_tetra' ),
+ # New file with one symbol added
+ ( 'added', 't', 1000, '/file_new' ),
+ );
+
+ # Here we go
+ (added, removed, changed, unchanged) = compare(symbol_list1, symbol_list2)
+
+ # File with one symbol, left as-is.
+ assert ('/file_unchanged', 't', 'unchanged', 1000, 1000) in unchanged
Primiano Tucci (use gerrit) 2014/05/20 15:22:57 Also, you might want to take a look to python unit
+ # File with one symbol, changed.
+ assert ('/file_all_changed', 't', 'changed', 1000, 2000) in changed
+ # File with one symbol, deleted.
+ assert ('/file_all_deleted', 't', 'removed', 1000, None) in removed
+ # New file with one symbol added
+ assert ('/file_new', 't', 'added', None, 1000) in added
+ # File with two symbols, one unchanged, one changed, same bucket
+ assert ('/file_pair_unchanged_changed',
+ 't', 'unchanged', 1000, 1000) in unchanged
+ assert ('/file_pair_unchanged_changed',
+ 't', 'changed', 1000, 2000) in changed
+ # File with two symbols, one unchanged, one removed, same bucket
+ assert ('/file_pair_unchanged_removed',
+ 't', 'unchanged', 1000, 1000) in unchanged
+ assert ('/file_pair_unchanged_removed',
+ 't', 'removed', 1000, None) in removed
+ # File with two symbols, one unchanged, one added, same bucket
+ assert ('/file_pair_unchanged_added',
+ 't', 'unchanged', 1000, 1000) in unchanged
+ assert ('/file_pair_unchanged_added',
+ 't', 'added', None, 1000) in added
+ # File with two symbols, one unchanged, one changed, different bucket
+ assert ('/file_pair_unchanged_diffbuck_changed',
+ 't', 'unchanged', 1000, 1000) in unchanged
+ assert ('/file_pair_unchanged_diffbuck_changed',
+ '@', 'changed', 1000, 2000) in changed
+ # File with two symbols, one unchanged, one removed, different bucket
+ assert ('/file_pair_unchanged_diffbuck_removed',
+ 't', 'unchanged', 1000, 1000) in unchanged
+ assert ('/file_pair_unchanged_diffbuck_removed',
+ '@', 'removed', 1000, None) in removed
+ # File with two symbols, one unchanged, one added, different bucket
+ assert ('/file_pair_unchanged_diffbuck_added',
+ 't', 'unchanged', 1000, 1000) in unchanged
+ assert ('/file_pair_unchanged_diffbuck_added',
+ '@', 'added', None, 1000) in added
+ # File with four symbols, one added, one removed, one changed, one unchanged
+ assert ('/file_tetra', 't', 'size_changed', 1000, 2000) in changed
+ assert ('/file_tetra', 't', 'unchanged', 1000, 1000) in unchanged
+ assert ('/file_tetra', 't', 'added', None, 1000) in added
+ assert ('/file_tetra', 't', 'removed', 1000, None) in removed
+
+ # Now check final stats.
+ crunchStats(added, removed, changed, unchanged, True)
+
Primiano Tucci (use gerrit) 2014/05/20 15:22:57 Nit: two lines betwen top levels
+def crunchStats(added, removed, changed, unchanged, showsources, showsymbols):
Primiano Tucci (use gerrit) 2014/05/20 15:22:57 Nit: casing s/crunch/Crunch/ + docstring
+ print 'Symbol statistics:'
+ sources_with_new_symbols = set()
+ new_symbols_size = 0
+ new_symbols_by_path = {}
+ for file_path, symbol_type, symbol_name, size1, size2 in added:
+ sources_with_new_symbols.add(file_path)
+ new_symbols_size += size2
+ bucket = new_symbols_by_path.get(file_path, None)
Primiano Tucci (use gerrit) 2014/05/20 15:22:57 s/, None//
+ if not bucket:
+ bucket = []
+ new_symbols_by_path[file_path] = bucket
Primiano Tucci (use gerrit) 2014/05/20 15:22:57 229-231: bucket = new_symbols_by_path.setdefault(
+ bucket.append((symbol_name, symbol_type, None, size2))
+ print(' ' + str(len(added)) + ' added, totalling ' +
Primiano Tucci (use gerrit) 2014/05/20 15:22:57 Use string formatting? i.e. print(' %d added, tot
+ str(new_symbols_size) + ' bytes across ' +
+ str(len(sources_with_new_symbols)) + ' sources')
+
+ sources_with_removed_symbols = set()
+ removed_symbols_size = 0
+ removed_symbols_by_path = {}
+ for file_path, symbol_type, symbol_name, size1, size2 in removed:
+ sources_with_removed_symbols.add(file_path)
+ removed_symbols_size += size1
+ bucket = removed_symbols_by_path.get(file_path, None)
+ if not bucket:
Primiano Tucci (use gerrit) 2014/05/20 15:22:57 ditto
+ bucket = []
+ removed_symbols_by_path[file_path] = bucket
+ bucket.append((symbol_name, symbol_type, size1, None))
+ print(' ' + str(len(removed)) + ' removed, totalling ' +
+ str(removed_symbols_size) + ' bytes removed across ' +
+ str(len(sources_with_removed_symbols)) + ' sources')
+
+ sources_with_changed_symbols = set()
+ before_size = 0
+ after_size = 0
+ changed_symbols_by_path = {}
+ for file_path, symbol_type, symbol_name, size1, size2 in changed:
+ sources_with_changed_symbols.add(file_path)
+ before_size += size1
+ after_size += size2
+ bucket = changed_symbols_by_path.get(file_path, None)
+ if not bucket:
Primiano Tucci (use gerrit) 2014/05/20 15:22:57 ditto (setdefault) here and below
+ bucket = []
+ changed_symbols_by_path[file_path] = bucket
+ bucket.append((symbol_name, symbol_type, size1, size2))
+ print(' ' + str(len(changed)) + ' changed, resulting in a net change of ' +
Primiano Tucci (use gerrit) 2014/05/20 15:22:57 ditto (string format) here and below
+ str(after_size - before_size) + ' bytes (' +
+ str(before_size) + ' bytes before, ' +
+ str(after_size) + ' bytes after) across ' +
+ str(len(sources_with_changed_symbols)) + ' sources')
+
+ maybe_unchanged_sources = set()
+ unchanged_symbols_size = 0
+ for file_path, symbol_type, symbol_name, size1, size2 in unchanged:
+ maybe_unchanged_sources.add(file_path)
+ unchanged_symbols_size += size1 # == size2
+ print(' ' + str(len(unchanged)) + ' unchanged, totalling ' +
+ str(unchanged_symbols_size) + ' bytes')
+
+ # High level analysis, always output.
+ unchanged_sources = (maybe_unchanged_sources -
+ sources_with_changed_symbols -
+ sources_with_removed_symbols -
+ sources_with_new_symbols)
+ new_sources = (sources_with_new_symbols -
+ maybe_unchanged_sources -
+ sources_with_removed_symbols)
+ removed_sources = (sources_with_removed_symbols -
+ maybe_unchanged_sources -
+ sources_with_new_symbols)
+ partially_changed_sources = (sources_with_changed_symbols |
+ sources_with_new_symbols |
+ sources_with_removed_symbols) - removed_sources - new_sources
+ allFiles = (sources_with_new_symbols |
+ sources_with_removed_symbols |
+ sources_with_changed_symbols |
+ maybe_unchanged_sources)
+ print 'Source stats: '
+ print(' ' + str(len(allFiles)) + ' sources encountered.')
+ print(' ' + str(len(new_sources)) + ' completely new.')
+ print(' ' + str(len(removed_sources)) + ' removed completely.')
+ print(' ' + str(len(partially_changed_sources)) + ' partially changed.')
+ print(' ' + str(len(unchanged_sources)) + ' completely unchanged.')
+ remainder = (allFiles - new_sources - removed_sources -
+ partially_changed_sources - unchanged_sources)
+ assert len(remainder) == 0
+
+ if not showsources: return # Per-source analysis, only if requested
+ print 'Per-source Analysis:'
+ delta_by_path = {}
+ for path in new_symbols_by_path:
+ entry = delta_by_path.get(path, None)
+ if not entry:
+ entry = {'plus': 0, 'minus': 0};
+ delta_by_path[path] = entry
+ for symbol_name, symbol_type, size1, size2 in new_symbols_by_path[path]:
+ entry['plus'] += size2
+ for path in removed_symbols_by_path:
+ entry = delta_by_path.get(path, None)
+ if not entry:
+ entry = {'plus': 0, 'minus': 0};
+ delta_by_path[path] = entry
+ for symbol_name, symbol_type, size1, size2 in removed_symbols_by_path[path]:
+ entry['minus'] += size1
+ for path in changed_symbols_by_path:
+ entry = delta_by_path.get(path, None)
+ if not entry:
+ entry = {'plus': 0, 'minus': 0};
+ delta_by_path[path] = entry
+ for symbol_name, symbol_type, size1, size2 in changed_symbols_by_path[path]:
+ delta = size2 - size1
+ if delta > 0:
+ entry['plus'] += delta
+ else:
+ entry['minus'] += (-1 * delta)
+
+ for path in sorted(delta_by_path):
+ print ' Source: ' + path
+ size_data = delta_by_path[path]
+ gain = size_data['plus']
+ loss = size_data['minus']
+ delta = size_data['plus'] - size_data['minus']
+ print (' Change: ' + str(delta) + ' bytes (gained ' +
+ str(gain) + ', lost ' + str(loss) + ')')
+ if showsymbols:
+ if path in new_symbols_by_path:
+ print ' New symbols:'
+ for symbol_name, symbol_type, size1, size2 in \
+ new_symbols_by_path[path]:
+ print (' ' + symbol_name +
+ ' type=' + symbol_type + ', size=' + str(size2) + ' bytes')
+ if path in removed_symbols_by_path:
+ print ' Removed symbols:'
+ for symbol_name, symbol_type, size1, size2 in \
+ removed_symbols_by_path[path]:
+ print (' ' + symbol_name +
+ ' type=' + symbol_type + ', size=' + str(size1) + ' bytes')
+ if path in changed_symbols_by_path:
+ print ' Changed symbols:'
+ def sortkey(item):
+ symbol_name, symbol_type, size1, size2 = item
+ return (size1 - size2, symbol_name)
+ for symbol_name, symbol_type, size1, size2 in \
+ sorted(changed_symbols_by_path[path], key=sortkey):
+ print (' ' + symbol_name +
+ ' type=' + symbol_type + ', delta=' + str(size2 - size1) +
+ ' bytes (was ' + str(size1) +
+ ' bytes, now ' + str(size2) + ' bytes)')
+
+def main():
+ usage="""%prog [options]
+
+ Runs a spatial analysis on a given library, looking up the source locations
Andrew Hayden (chromium.org) 2014/05/20 15:10:51 This stuff is out of date. As you can tell, I star
+ of its symbols and calculating how much space each directory, source file,
+ and so on is taking. The result is a report that can be used to pinpoint
+ sources of large portions of the binary, etceteras.
+
+ Under normal circumstances, you only need to pass two arguments, thusly:
+
+ %prog --library /path/to/library --destdir /path/to/output
+
+ In this mode, the program will dump the symbols from the specified library
+ and map those symbols back to source locations, producing a web-based
+ report in the specified output directory.
+
+ Other options are available via '--help'.
+ """
+ parser = optparse.OptionParser(usage=usage)
+ parser.add_option('--nm1', metavar='PATH',
+ help='the nm dump of the first library')
+ parser.add_option('--nm2', metavar='PATH',
+ help='the nm dump of the second library')
+ parser.add_option('--showsources', action='store_true', default=False,
+ help='show per-source statistics')
+ parser.add_option('--showsymbols', action='store_true', default=False,
+ help='show all symbol information; implies --showfiles')
+ parser.add_option('--verbose', action='store_true', default=False,
+ help='output internal debugging stuff')
+ opts, args = parser.parse_args()
+
+ if not opts.nm1:
+ parser.error('--nm1 is required')
+ if not opts.nm2:
+ parser.error('--nm2 is required')
+ symbols = []
+ for path in [opts.nm1, opts.nm2]:
+ with file(path, 'r') as input:
+ if opts.verbose: print 'parsing ' + path + '...'
+ symbols.append(list(binary_size_utils.ParseNm(input)))
+ (added, removed, changed, unchanged) = compare(symbols[0], symbols[1])
+ crunchStats(added, removed, changed, unchanged,
+ opts.showsources | opts.showsymbols, opts.showsymbols)
+
+if __name__ == '__main__':
+ sys.exit(main())

Powered by Google App Engine
This is Rietveld 408576698