Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(189)

Unified Diff: tools/binary_size/explain_binary_size_delta.py

Issue 258633003: Graphical version of the run_binary_size_analysis tool. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Made the code fully pylint clean. Created 6 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: tools/binary_size/explain_binary_size_delta.py
diff --git a/tools/binary_size/explain_binary_size_delta.py b/tools/binary_size/explain_binary_size_delta.py
new file mode 100755
index 0000000000000000000000000000000000000000..ba1b25c4b27330aeb171e5cd4b00475f089c7068
--- /dev/null
+++ b/tools/binary_size/explain_binary_size_delta.py
@@ -0,0 +1,405 @@
+#!/usr/bin/env python
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Describe the size difference of two binaries.
+
+Generates a description of the size difference of two binaries based
+on the difference of the size of various symbols.
+
+This tool needs "nm" dumps of each binary with full symbol
+information. You can obtain the necessary dumps by running the
+run_binary_size_analysis.py script upon each binary, with the
+"--nm-out" parameter set to the location in which you want to save the
+dumps. Example:
+
+ # obtain symbol data from first binary in /tmp/nm1.dump
+ cd $CHECKOUT1_SRC
+ ninja -C out/Release binary_size_tool
+ tools/binary_size/run_binary_size_analysis \
+ --library <path_to_library>
+ --destdir /tmp/throwaway
+ --nm-out /tmp/nm1.dump
+
+ # obtain symbol data from second binary in /tmp/nm2.dump
+ cd $CHECKOUT2_SRC
+ ninja -C out/Release binary_size_tool
+ tools/binary_size/run_binary_size_analysis \
+ --library <path_to_library>
+ --destdir /tmp/throwaway
+ --nm-out /tmp/nm2.dump
+
+ # cleanup useless files
+ rm -r /tmp/throwaway
+
+ # run this tool
+ explain_binary_size_delta.py --nm1 /tmp/nm1.dump --nm2 /tmp/nm2.dump
+"""
+
+import optparse
+import os
+import sys
+
+import binary_size_utils
+
+
+def Compare(symbols1, symbols2):
+ """Executes a comparison of the symbols in symbols1 and symbols2.
+
+ Returns:
+ tuple of lists: (added_symbols, removed_symbols, changed_symbols, others)
+ """
+ added = [] # tuples
+ removed = [] # tuples
+ changed = [] # tuples
+ unchanged = [] # tuples
+
+ cache1 = {}
+ cache2 = {}
+ # Make a map of (file, symbol_type) : (symbol_name, symbol_size)
+ for cache, symbols in ((cache1, symbols1), (cache2, symbols2)):
+ for symbol_name, symbol_type, symbol_size, file_path in symbols:
+ if 'vtable for ' in symbol_name:
+ symbol_type = '@' # hack to categorize these separately
+ if file_path:
+ file_path = os.path.normpath(file_path)
+ else:
+ file_path = '(No Path)'
+ key = (file_path, symbol_type)
+ bucket = cache.setdefault(key, {})
+ bucket[symbol_name] = symbol_size
+
+ # Now diff them. We iterate over the elements in cache1. For each symbol
+ # that we find in cache2, we record whether it was deleted, changed, or
+ # unchanged. We then remove it from cache2; all the symbols that remain
+ # in cache2 at the end of the iteration over cache1 are the 'new' symbols.
+ for key, bucket1 in cache1.items():
+ bucket2 = cache2.get(key)
+ if not bucket2:
+ # A file was removed. Everything in bucket1 is dead.
+ for symbol_name, symbol_size in bucket1.items():
+ removed.append((key[0], key[1], symbol_name, symbol_size, None))
+ else:
+ # File still exists, look for changes within.
+ for symbol_name, symbol_size in bucket1.items():
+ size2 = bucket2.get(symbol_name)
+ if size2 is None:
+ # Symbol no longer exists in bucket2.
+ removed.append((key[0], key[1], symbol_name, symbol_size, None))
+ else:
+ del bucket2[symbol_name] # Symbol is not new, delete from cache2.
+ if len(bucket2) == 0:
+ del cache1[key] # Entire bucket is empty, delete from cache2
+ if symbol_size != size2:
+ # Symbol has change size in bucket.
+ changed.append((key[0], key[1], symbol_name, symbol_size, size2))
+ else:
+ # Symbol is unchanged.
+ unchanged.append((key[0], key[1], symbol_name, symbol_size, size2))
+
+ # We have now analyzed all symbols that are in cache1 and removed all of
+ # the encountered symbols from cache2. What's left in cache2 is the new
+ # symbols.
+ for key, bucket2 in cache2.iteritems():
+ for symbol_name, symbol_size in bucket2.items():
+ added.append((key[0], key[1], symbol_name, None, symbol_size))
+ return (added, removed, changed, unchanged)
+
+
+def TestCompare():
+ # List entries have form: symbol_name, symbol_type, symbol_size, file_path
+ symbol_list1 = (
+ # File with one symbol, left as-is.
+ ( 'unchanged', 't', 1000, '/file_unchanged' ),
+ # File with one symbol, changed.
+ ( 'changed', 't', 1000, '/file_all_changed' ),
+ # File with one symbol, deleted.
+ ( 'removed', 't', 1000, '/file_all_deleted' ),
+ # File with two symbols, one unchanged, one changed, same bucket
+ ( 'unchanged', 't', 1000, '/file_pair_unchanged_changed' ),
+ ( 'changed', 't', 1000, '/file_pair_unchanged_changed' ),
+ # File with two symbols, one unchanged, one deleted, same bucket
+ ( 'unchanged', 't', 1000, '/file_pair_unchanged_removed' ),
+ ( 'removed', 't', 1000, '/file_pair_unchanged_removed' ),
+ # File with two symbols, one unchanged, one added, same bucket
+ ( 'unchanged', 't', 1000, '/file_pair_unchanged_added' ),
+ # File with two symbols, one unchanged, one changed, different bucket
+ ( 'unchanged', 't', 1000, '/file_pair_unchanged_diffbuck_changed' ),
+ ( 'changed', '@', 1000, '/file_pair_unchanged_diffbuck_changed' ),
+ # File with two symbols, one unchanged, one deleted, different bucket
+ ( 'unchanged', 't', 1000, '/file_pair_unchanged_diffbuck_removed' ),
+ ( 'removed', '@', 1000, '/file_pair_unchanged_diffbuck_removed' ),
+ # File with two symbols, one unchanged, one added, different bucket
+ ( 'unchanged', 't', 1000, '/file_pair_unchanged_diffbuck_added' ),
+ # File with four symbols, one added, one removed, one changed, one unchanged
+ ( 'size_changed', 't', 1000, '/file_tetra' ),
+ ( 'removed', 't', 1000, '/file_tetra' ),
+ ( 'unchanged', 't', 1000, '/file_tetra' ),
+ )
+
+ symbol_list2 = (
+ # File with one symbol, left as-is.
+ ( 'unchanged', 't', 1000, '/file_unchanged' ),
+ # File with one symbol, changed.
+ ( 'changed', 't', 2000, '/file_all_changed' ),
+ # File with two symbols, one unchanged, one changed, same bucket
+ ( 'unchanged', 't', 1000, '/file_pair_unchanged_changed' ),
+ ( 'changed', 't', 2000, '/file_pair_unchanged_changed' ),
+ # File with two symbols, one unchanged, one deleted, same bucket
+ ( 'unchanged', 't', 1000, '/file_pair_unchanged_removed' ),
+ # File with two symbols, one unchanged, one added, same bucket
+ ( 'unchanged', 't', 1000, '/file_pair_unchanged_added' ),
+ ( 'added', 't', 1000, '/file_pair_unchanged_added' ),
+ # File with two symbols, one unchanged, one changed, different bucket
+ ( 'unchanged', 't', 1000, '/file_pair_unchanged_diffbuck_changed' ),
+ ( 'changed', '@', 2000, '/file_pair_unchanged_diffbuck_changed' ),
+ # File with two symbols, one unchanged, one deleted, different bucket
+ ( 'unchanged', 't', 1000, '/file_pair_unchanged_diffbuck_removed' ),
+ # File with two symbols, one unchanged, one added, different bucket
+ ( 'unchanged', 't', 1000, '/file_pair_unchanged_diffbuck_added' ),
+ ( 'added', '@', 1000, '/file_pair_unchanged_diffbuck_added' ),
+ # File with four symbols, one added, one removed, one changed, one unchanged
+ ( 'size_changed', 't', 2000, '/file_tetra' ),
+ ( 'unchanged', 't', 1000, '/file_tetra' ),
+ ( 'added', 't', 1000, '/file_tetra' ),
+ # New file with one symbol added
+ ( 'added', 't', 1000, '/file_new' ),
+ )
+
+ # Here we go
+ (added, removed, changed, unchanged) = Compare(symbol_list1, symbol_list2)
+
+ # File with one symbol, left as-is.
+ assert ('/file_unchanged', 't', 'unchanged', 1000, 1000) in unchanged
+ # File with one symbol, changed.
+ assert ('/file_all_changed', 't', 'changed', 1000, 2000) in changed
+ # File with one symbol, deleted.
+ assert ('/file_all_deleted', 't', 'removed', 1000, None) in removed
+ # New file with one symbol added
+ assert ('/file_new', 't', 'added', None, 1000) in added
+ # File with two symbols, one unchanged, one changed, same bucket
+ assert ('/file_pair_unchanged_changed',
+ 't', 'unchanged', 1000, 1000) in unchanged
+ assert ('/file_pair_unchanged_changed',
+ 't', 'changed', 1000, 2000) in changed
+ # File with two symbols, one unchanged, one removed, same bucket
+ assert ('/file_pair_unchanged_removed',
+ 't', 'unchanged', 1000, 1000) in unchanged
+ assert ('/file_pair_unchanged_removed',
+ 't', 'removed', 1000, None) in removed
+ # File with two symbols, one unchanged, one added, same bucket
+ assert ('/file_pair_unchanged_added',
+ 't', 'unchanged', 1000, 1000) in unchanged
+ assert ('/file_pair_unchanged_added',
+ 't', 'added', None, 1000) in added
+ # File with two symbols, one unchanged, one changed, different bucket
+ assert ('/file_pair_unchanged_diffbuck_changed',
+ 't', 'unchanged', 1000, 1000) in unchanged
+ assert ('/file_pair_unchanged_diffbuck_changed',
+ '@', 'changed', 1000, 2000) in changed
+ # File with two symbols, one unchanged, one removed, different bucket
+ assert ('/file_pair_unchanged_diffbuck_removed',
+ 't', 'unchanged', 1000, 1000) in unchanged
+ assert ('/file_pair_unchanged_diffbuck_removed',
+ '@', 'removed', 1000, None) in removed
+ # File with two symbols, one unchanged, one added, different bucket
+ assert ('/file_pair_unchanged_diffbuck_added',
+ 't', 'unchanged', 1000, 1000) in unchanged
+ assert ('/file_pair_unchanged_diffbuck_added',
+ '@', 'added', None, 1000) in added
+ # File with four symbols, one added, one removed, one changed, one unchanged
+ assert ('/file_tetra', 't', 'size_changed', 1000, 2000) in changed
+ assert ('/file_tetra', 't', 'unchanged', 1000, 1000) in unchanged
+ assert ('/file_tetra', 't', 'added', None, 1000) in added
+ assert ('/file_tetra', 't', 'removed', 1000, None) in removed
+
+ # Now check final stats.
+ CrunchStats(added, removed, changed, unchanged, True, True)
+
+
+def CrunchStats(added, removed, changed, unchanged, showsources, showsymbols):
+ """Outputs to stdout a summary of changes based on the symbol lists."""
+ print 'Symbol statistics:'
+ sources_with_new_symbols = set()
+ new_symbols_size = 0
+ new_symbols_by_path = {}
+ for file_path, symbol_type, symbol_name, size1, size2 in added:
+ sources_with_new_symbols.add(file_path)
+ new_symbols_size += size2
+ bucket = new_symbols_by_path.setdefault(file_path, [])
+ bucket.append((symbol_name, symbol_type, None, size2))
+ print(' %d added, totalling %d bytes across %d sources' %
+ (len(added), new_symbols_size, len(sources_with_new_symbols)))
+
+ sources_with_removed_symbols = set()
+ removed_symbols_size = 0
+ removed_symbols_by_path = {}
+ for file_path, symbol_type, symbol_name, size1, size2 in removed:
+ sources_with_removed_symbols.add(file_path)
+ removed_symbols_size += size1
+ bucket = removed_symbols_by_path.setdefault(file_path, [])
+ bucket.append((symbol_name, symbol_type, size1, None))
+ print(' %d removed, totalling %d bytes removed across %d sources' %
+ (len(removed), removed_symbols_size, len(sources_with_removed_symbols)))
+
+ sources_with_changed_symbols = set()
+ before_size = 0
+ after_size = 0
+ changed_symbols_by_path = {}
+ for file_path, symbol_type, symbol_name, size1, size2 in changed:
+ sources_with_changed_symbols.add(file_path)
+ before_size += size1
+ after_size += size2
+ bucket = changed_symbols_by_path.setdefault(file_path, [])
+ bucket.append((symbol_name, symbol_type, size1, size2))
+ print(' %d changed, resulting in a net change of %d bytes '
+ '(%d bytes before, %d bytes after) across %d sources' %
+ (len(changed), (after_size - before_size), before_size, after_size,
+ len(sources_with_changed_symbols)))
+
+ maybe_unchanged_sources = set()
+ unchanged_symbols_size = 0
+ for file_path, symbol_type, symbol_name, size1, size2 in unchanged:
+ maybe_unchanged_sources.add(file_path)
+ unchanged_symbols_size += size1 # == size2
+ print(' %d unchanged, totalling %d bytes' %
+ (len(unchanged), unchanged_symbols_size))
+
+ # High level analysis, always output.
+ unchanged_sources = (maybe_unchanged_sources -
+ sources_with_changed_symbols -
+ sources_with_removed_symbols -
+ sources_with_new_symbols)
+ new_sources = (sources_with_new_symbols -
+ maybe_unchanged_sources -
+ sources_with_removed_symbols)
+ removed_sources = (sources_with_removed_symbols -
+ maybe_unchanged_sources -
+ sources_with_new_symbols)
+ partially_changed_sources = (sources_with_changed_symbols |
+ sources_with_new_symbols |
+ sources_with_removed_symbols) - removed_sources - new_sources
+ allFiles = (sources_with_new_symbols |
+ sources_with_removed_symbols |
+ sources_with_changed_symbols |
+ maybe_unchanged_sources)
+ print 'Source stats: '
+ print(' %d sources encountered.' % len(allFiles))
+ print(' %d completely new.' % len(new_sources))
+ print(' %d removed completely.' % len(removed_sources))
+ print(' %d partially changed.' % len(partially_changed_sources))
+ print(' %d completely unchanged.' % len(unchanged_sources))
+ remainder = (allFiles - new_sources - removed_sources -
+ partially_changed_sources - unchanged_sources)
+ assert len(remainder) == 0
+
+ if not showsources:
+ return # Per-source analysis, only if requested
+ print 'Per-source Analysis:'
+ delta_by_path = {}
+ for path in new_symbols_by_path:
+ entry = delta_by_path.get(path)
+ if not entry:
+ entry = {'plus': 0, 'minus': 0}
+ delta_by_path[path] = entry
+ for symbol_name, symbol_type, size1, size2 in new_symbols_by_path[path]:
+ entry['plus'] += size2
+ for path in removed_symbols_by_path:
+ entry = delta_by_path.get(path)
+ if not entry:
+ entry = {'plus': 0, 'minus': 0}
+ delta_by_path[path] = entry
+ for symbol_name, symbol_type, size1, size2 in removed_symbols_by_path[path]:
+ entry['minus'] += size1
+ for path in changed_symbols_by_path:
+ entry = delta_by_path.get(path)
+ if not entry:
+ entry = {'plus': 0, 'minus': 0}
+ delta_by_path[path] = entry
+ for symbol_name, symbol_type, size1, size2 in changed_symbols_by_path[path]:
+ delta = size2 - size1
+ if delta > 0:
+ entry['plus'] += delta
+ else:
+ entry['minus'] += (-1 * delta)
+
+ for path in sorted(delta_by_path):
+ print ' Source: ' + path
+ size_data = delta_by_path[path]
+ gain = size_data['plus']
+ loss = size_data['minus']
+ delta = size_data['plus'] - size_data['minus']
+ print (' Change: %d bytes (gained %d, lost %d)' % (delta, gain, loss))
+ if showsymbols:
+ if path in new_symbols_by_path:
+ print ' New symbols:'
+ for symbol_name, symbol_type, size1, size2 in \
+ new_symbols_by_path[path]:
+ print (' %s type=%s, size=%d bytes' %
+ (symbol_name, symbol_type, size2))
+ if path in removed_symbols_by_path:
+ print ' Removed symbols:'
+ for symbol_name, symbol_type, size1, size2 in \
+ removed_symbols_by_path[path]:
+ print (' %s type=%s, size=%d bytes' %
+ (symbol_name, symbol_type, size1))
+ if path in changed_symbols_by_path:
+ print ' Changed symbols:'
+ def sortkey(item):
+ symbol_name, _symbol_type, size1, size2 = item
+ return (size1 - size2, symbol_name)
+ for symbol_name, symbol_type, size1, size2 in \
+ sorted(changed_symbols_by_path[path], key=sortkey):
+ print (' %s type=%s, delta=%d bytes (was %d bytes, now %d bytes)'
+ % (symbol_name, symbol_type, (size2 - size1), size1, size2))
+
+
+def main():
+ usage = """%prog [options]
+
+ Analyzes the symbolic differences between two binary files
+ (typically, not necessarily, two different builds of the same
+ library) and produces a detailed description of symbols that have
+ been added, removed, or whose size has changed.
+
+ Example:
+ explain_binary_size_delta.py --nm1 /tmp/nm1.dump --nm2 /tmp/nm2.dump
+
+ Options are available via '--help'.
+ """
+ parser = optparse.OptionParser(usage=usage)
+ parser.add_option('--nm1', metavar='PATH',
+ help='the nm dump of the first library')
+ parser.add_option('--nm2', metavar='PATH',
+ help='the nm dump of the second library')
+ parser.add_option('--showsources', action='store_true', default=False,
+ help='show per-source statistics')
+ parser.add_option('--showsymbols', action='store_true', default=False,
+ help='show all symbol information; implies --showfiles')
+ parser.add_option('--verbose', action='store_true', default=False,
+ help='output internal debugging stuff')
+ parser.add_option('--selftest', action='store_true', default=False,
+ help='run internal diagnosis')
+ opts, _args = parser.parse_args()
+
+ if opts.selftest:
+ TestCompare()
+ return
+
+ if not opts.nm1:
+ parser.error('--nm1 is required')
+ if not opts.nm2:
+ parser.error('--nm2 is required')
+ symbols = []
+ for path in [opts.nm1, opts.nm2]:
+ with file(path, 'r') as nm_input:
+ if opts.verbose:
+ print 'parsing ' + path + '...'
+ symbols.append(list(binary_size_utils.ParseNm(nm_input)))
+ (added, removed, changed, unchanged) = Compare(symbols[0], symbols[1])
+ CrunchStats(added, removed, changed, unchanged,
+ opts.showsources | opts.showsymbols, opts.showsymbols)
+
+if __name__ == '__main__':
+ sys.exit(main())

Powered by Google App Engine
This is Rietveld 408576698