| Index: tools/binary_size/explain_binary_size_delta.py
|
| diff --git a/tools/binary_size/explain_binary_size_delta.py b/tools/binary_size/explain_binary_size_delta.py
|
| new file mode 100755
|
| index 0000000000000000000000000000000000000000..ba1b25c4b27330aeb171e5cd4b00475f089c7068
|
| --- /dev/null
|
| +++ b/tools/binary_size/explain_binary_size_delta.py
|
| @@ -0,0 +1,405 @@
|
| +#!/usr/bin/env python
|
| +# Copyright 2014 The Chromium Authors. All rights reserved.
|
| +# Use of this source code is governed by a BSD-style license that can be
|
| +# found in the LICENSE file.
|
| +
|
| +"""Describe the size difference of two binaries.
|
| +
|
| +Generates a description of the size difference of two binaries based
|
| +on the difference of the size of various symbols.
|
| +
|
| +This tool needs "nm" dumps of each binary with full symbol
|
| +information. You can obtain the necessary dumps by running the
|
| +run_binary_size_analysis.py script upon each binary, with the
|
| +"--nm-out" parameter set to the location in which you want to save the
|
| +dumps. Example:
|
| +
|
| + # obtain symbol data from first binary in /tmp/nm1.dump
|
| + cd $CHECKOUT1_SRC
|
| + ninja -C out/Release binary_size_tool
|
| + tools/binary_size/run_binary_size_analysis \
|
| + --library <path_to_library>
|
| + --destdir /tmp/throwaway
|
| + --nm-out /tmp/nm1.dump
|
| +
|
| + # obtain symbol data from second binary in /tmp/nm2.dump
|
| + cd $CHECKOUT2_SRC
|
| + ninja -C out/Release binary_size_tool
|
| + tools/binary_size/run_binary_size_analysis \
|
| + --library <path_to_library>
|
| + --destdir /tmp/throwaway
|
| + --nm-out /tmp/nm2.dump
|
| +
|
| + # cleanup useless files
|
| + rm -r /tmp/throwaway
|
| +
|
| + # run this tool
|
| + explain_binary_size_delta.py --nm1 /tmp/nm1.dump --nm2 /tmp/nm2.dump
|
| +"""
|
| +
|
| +import optparse
|
| +import os
|
| +import sys
|
| +
|
| +import binary_size_utils
|
| +
|
| +
|
| +def Compare(symbols1, symbols2):
|
| + """Executes a comparison of the symbols in symbols1 and symbols2.
|
| +
|
| + Returns:
|
| + tuple of lists: (added_symbols, removed_symbols, changed_symbols, others)
|
| + """
|
| + added = [] # tuples
|
| + removed = [] # tuples
|
| + changed = [] # tuples
|
| + unchanged = [] # tuples
|
| +
|
| + cache1 = {}
|
| + cache2 = {}
|
| + # Make a map of (file, symbol_type) : (symbol_name, symbol_size)
|
| + for cache, symbols in ((cache1, symbols1), (cache2, symbols2)):
|
| + for symbol_name, symbol_type, symbol_size, file_path in symbols:
|
| + if 'vtable for ' in symbol_name:
|
| + symbol_type = '@' # hack to categorize these separately
|
| + if file_path:
|
| + file_path = os.path.normpath(file_path)
|
| + else:
|
| + file_path = '(No Path)'
|
| + key = (file_path, symbol_type)
|
| + bucket = cache.setdefault(key, {})
|
| + bucket[symbol_name] = symbol_size
|
| +
|
| + # Now diff them. We iterate over the elements in cache1. For each symbol
|
| + # that we find in cache2, we record whether it was deleted, changed, or
|
| + # unchanged. We then remove it from cache2; all the symbols that remain
|
| + # in cache2 at the end of the iteration over cache1 are the 'new' symbols.
|
| + for key, bucket1 in cache1.items():
|
| + bucket2 = cache2.get(key)
|
| + if not bucket2:
|
| + # A file was removed. Everything in bucket1 is dead.
|
| + for symbol_name, symbol_size in bucket1.items():
|
| + removed.append((key[0], key[1], symbol_name, symbol_size, None))
|
| + else:
|
| + # File still exists, look for changes within.
|
| + for symbol_name, symbol_size in bucket1.items():
|
| + size2 = bucket2.get(symbol_name)
|
| + if size2 is None:
|
| + # Symbol no longer exists in bucket2.
|
| + removed.append((key[0], key[1], symbol_name, symbol_size, None))
|
| + else:
|
| + del bucket2[symbol_name] # Symbol is not new, delete from cache2.
|
| + if len(bucket2) == 0:
|
| + del cache1[key] # Entire bucket is empty, delete from cache2
|
| + if symbol_size != size2:
|
| + # Symbol has change size in bucket.
|
| + changed.append((key[0], key[1], symbol_name, symbol_size, size2))
|
| + else:
|
| + # Symbol is unchanged.
|
| + unchanged.append((key[0], key[1], symbol_name, symbol_size, size2))
|
| +
|
| + # We have now analyzed all symbols that are in cache1 and removed all of
|
| + # the encountered symbols from cache2. What's left in cache2 is the new
|
| + # symbols.
|
| + for key, bucket2 in cache2.iteritems():
|
| + for symbol_name, symbol_size in bucket2.items():
|
| + added.append((key[0], key[1], symbol_name, None, symbol_size))
|
| + return (added, removed, changed, unchanged)
|
| +
|
| +
|
| +def TestCompare():
|
| + # List entries have form: symbol_name, symbol_type, symbol_size, file_path
|
| + symbol_list1 = (
|
| + # File with one symbol, left as-is.
|
| + ( 'unchanged', 't', 1000, '/file_unchanged' ),
|
| + # File with one symbol, changed.
|
| + ( 'changed', 't', 1000, '/file_all_changed' ),
|
| + # File with one symbol, deleted.
|
| + ( 'removed', 't', 1000, '/file_all_deleted' ),
|
| + # File with two symbols, one unchanged, one changed, same bucket
|
| + ( 'unchanged', 't', 1000, '/file_pair_unchanged_changed' ),
|
| + ( 'changed', 't', 1000, '/file_pair_unchanged_changed' ),
|
| + # File with two symbols, one unchanged, one deleted, same bucket
|
| + ( 'unchanged', 't', 1000, '/file_pair_unchanged_removed' ),
|
| + ( 'removed', 't', 1000, '/file_pair_unchanged_removed' ),
|
| + # File with two symbols, one unchanged, one added, same bucket
|
| + ( 'unchanged', 't', 1000, '/file_pair_unchanged_added' ),
|
| + # File with two symbols, one unchanged, one changed, different bucket
|
| + ( 'unchanged', 't', 1000, '/file_pair_unchanged_diffbuck_changed' ),
|
| + ( 'changed', '@', 1000, '/file_pair_unchanged_diffbuck_changed' ),
|
| + # File with two symbols, one unchanged, one deleted, different bucket
|
| + ( 'unchanged', 't', 1000, '/file_pair_unchanged_diffbuck_removed' ),
|
| + ( 'removed', '@', 1000, '/file_pair_unchanged_diffbuck_removed' ),
|
| + # File with two symbols, one unchanged, one added, different bucket
|
| + ( 'unchanged', 't', 1000, '/file_pair_unchanged_diffbuck_added' ),
|
| + # File with four symbols, one added, one removed, one changed, one unchanged
|
| + ( 'size_changed', 't', 1000, '/file_tetra' ),
|
| + ( 'removed', 't', 1000, '/file_tetra' ),
|
| + ( 'unchanged', 't', 1000, '/file_tetra' ),
|
| + )
|
| +
|
| + symbol_list2 = (
|
| + # File with one symbol, left as-is.
|
| + ( 'unchanged', 't', 1000, '/file_unchanged' ),
|
| + # File with one symbol, changed.
|
| + ( 'changed', 't', 2000, '/file_all_changed' ),
|
| + # File with two symbols, one unchanged, one changed, same bucket
|
| + ( 'unchanged', 't', 1000, '/file_pair_unchanged_changed' ),
|
| + ( 'changed', 't', 2000, '/file_pair_unchanged_changed' ),
|
| + # File with two symbols, one unchanged, one deleted, same bucket
|
| + ( 'unchanged', 't', 1000, '/file_pair_unchanged_removed' ),
|
| + # File with two symbols, one unchanged, one added, same bucket
|
| + ( 'unchanged', 't', 1000, '/file_pair_unchanged_added' ),
|
| + ( 'added', 't', 1000, '/file_pair_unchanged_added' ),
|
| + # File with two symbols, one unchanged, one changed, different bucket
|
| + ( 'unchanged', 't', 1000, '/file_pair_unchanged_diffbuck_changed' ),
|
| + ( 'changed', '@', 2000, '/file_pair_unchanged_diffbuck_changed' ),
|
| + # File with two symbols, one unchanged, one deleted, different bucket
|
| + ( 'unchanged', 't', 1000, '/file_pair_unchanged_diffbuck_removed' ),
|
| + # File with two symbols, one unchanged, one added, different bucket
|
| + ( 'unchanged', 't', 1000, '/file_pair_unchanged_diffbuck_added' ),
|
| + ( 'added', '@', 1000, '/file_pair_unchanged_diffbuck_added' ),
|
| + # File with four symbols, one added, one removed, one changed, one unchanged
|
| + ( 'size_changed', 't', 2000, '/file_tetra' ),
|
| + ( 'unchanged', 't', 1000, '/file_tetra' ),
|
| + ( 'added', 't', 1000, '/file_tetra' ),
|
| + # New file with one symbol added
|
| + ( 'added', 't', 1000, '/file_new' ),
|
| + )
|
| +
|
| + # Here we go
|
| + (added, removed, changed, unchanged) = Compare(symbol_list1, symbol_list2)
|
| +
|
| + # File with one symbol, left as-is.
|
| + assert ('/file_unchanged', 't', 'unchanged', 1000, 1000) in unchanged
|
| + # File with one symbol, changed.
|
| + assert ('/file_all_changed', 't', 'changed', 1000, 2000) in changed
|
| + # File with one symbol, deleted.
|
| + assert ('/file_all_deleted', 't', 'removed', 1000, None) in removed
|
| + # New file with one symbol added
|
| + assert ('/file_new', 't', 'added', None, 1000) in added
|
| + # File with two symbols, one unchanged, one changed, same bucket
|
| + assert ('/file_pair_unchanged_changed',
|
| + 't', 'unchanged', 1000, 1000) in unchanged
|
| + assert ('/file_pair_unchanged_changed',
|
| + 't', 'changed', 1000, 2000) in changed
|
| + # File with two symbols, one unchanged, one removed, same bucket
|
| + assert ('/file_pair_unchanged_removed',
|
| + 't', 'unchanged', 1000, 1000) in unchanged
|
| + assert ('/file_pair_unchanged_removed',
|
| + 't', 'removed', 1000, None) in removed
|
| + # File with two symbols, one unchanged, one added, same bucket
|
| + assert ('/file_pair_unchanged_added',
|
| + 't', 'unchanged', 1000, 1000) in unchanged
|
| + assert ('/file_pair_unchanged_added',
|
| + 't', 'added', None, 1000) in added
|
| + # File with two symbols, one unchanged, one changed, different bucket
|
| + assert ('/file_pair_unchanged_diffbuck_changed',
|
| + 't', 'unchanged', 1000, 1000) in unchanged
|
| + assert ('/file_pair_unchanged_diffbuck_changed',
|
| + '@', 'changed', 1000, 2000) in changed
|
| + # File with two symbols, one unchanged, one removed, different bucket
|
| + assert ('/file_pair_unchanged_diffbuck_removed',
|
| + 't', 'unchanged', 1000, 1000) in unchanged
|
| + assert ('/file_pair_unchanged_diffbuck_removed',
|
| + '@', 'removed', 1000, None) in removed
|
| + # File with two symbols, one unchanged, one added, different bucket
|
| + assert ('/file_pair_unchanged_diffbuck_added',
|
| + 't', 'unchanged', 1000, 1000) in unchanged
|
| + assert ('/file_pair_unchanged_diffbuck_added',
|
| + '@', 'added', None, 1000) in added
|
| + # File with four symbols, one added, one removed, one changed, one unchanged
|
| + assert ('/file_tetra', 't', 'size_changed', 1000, 2000) in changed
|
| + assert ('/file_tetra', 't', 'unchanged', 1000, 1000) in unchanged
|
| + assert ('/file_tetra', 't', 'added', None, 1000) in added
|
| + assert ('/file_tetra', 't', 'removed', 1000, None) in removed
|
| +
|
| + # Now check final stats.
|
| + CrunchStats(added, removed, changed, unchanged, True, True)
|
| +
|
| +
|
| +def CrunchStats(added, removed, changed, unchanged, showsources, showsymbols):
|
| + """Outputs to stdout a summary of changes based on the symbol lists."""
|
| + print 'Symbol statistics:'
|
| + sources_with_new_symbols = set()
|
| + new_symbols_size = 0
|
| + new_symbols_by_path = {}
|
| + for file_path, symbol_type, symbol_name, size1, size2 in added:
|
| + sources_with_new_symbols.add(file_path)
|
| + new_symbols_size += size2
|
| + bucket = new_symbols_by_path.setdefault(file_path, [])
|
| + bucket.append((symbol_name, symbol_type, None, size2))
|
| + print(' %d added, totalling %d bytes across %d sources' %
|
| + (len(added), new_symbols_size, len(sources_with_new_symbols)))
|
| +
|
| + sources_with_removed_symbols = set()
|
| + removed_symbols_size = 0
|
| + removed_symbols_by_path = {}
|
| + for file_path, symbol_type, symbol_name, size1, size2 in removed:
|
| + sources_with_removed_symbols.add(file_path)
|
| + removed_symbols_size += size1
|
| + bucket = removed_symbols_by_path.setdefault(file_path, [])
|
| + bucket.append((symbol_name, symbol_type, size1, None))
|
| + print(' %d removed, totalling %d bytes removed across %d sources' %
|
| + (len(removed), removed_symbols_size, len(sources_with_removed_symbols)))
|
| +
|
| + sources_with_changed_symbols = set()
|
| + before_size = 0
|
| + after_size = 0
|
| + changed_symbols_by_path = {}
|
| + for file_path, symbol_type, symbol_name, size1, size2 in changed:
|
| + sources_with_changed_symbols.add(file_path)
|
| + before_size += size1
|
| + after_size += size2
|
| + bucket = changed_symbols_by_path.setdefault(file_path, [])
|
| + bucket.append((symbol_name, symbol_type, size1, size2))
|
| + print(' %d changed, resulting in a net change of %d bytes '
|
| + '(%d bytes before, %d bytes after) across %d sources' %
|
| + (len(changed), (after_size - before_size), before_size, after_size,
|
| + len(sources_with_changed_symbols)))
|
| +
|
| + maybe_unchanged_sources = set()
|
| + unchanged_symbols_size = 0
|
| + for file_path, symbol_type, symbol_name, size1, size2 in unchanged:
|
| + maybe_unchanged_sources.add(file_path)
|
| + unchanged_symbols_size += size1 # == size2
|
| + print(' %d unchanged, totalling %d bytes' %
|
| + (len(unchanged), unchanged_symbols_size))
|
| +
|
| + # High level analysis, always output.
|
| + unchanged_sources = (maybe_unchanged_sources -
|
| + sources_with_changed_symbols -
|
| + sources_with_removed_symbols -
|
| + sources_with_new_symbols)
|
| + new_sources = (sources_with_new_symbols -
|
| + maybe_unchanged_sources -
|
| + sources_with_removed_symbols)
|
| + removed_sources = (sources_with_removed_symbols -
|
| + maybe_unchanged_sources -
|
| + sources_with_new_symbols)
|
| + partially_changed_sources = (sources_with_changed_symbols |
|
| + sources_with_new_symbols |
|
| + sources_with_removed_symbols) - removed_sources - new_sources
|
| + allFiles = (sources_with_new_symbols |
|
| + sources_with_removed_symbols |
|
| + sources_with_changed_symbols |
|
| + maybe_unchanged_sources)
|
| + print 'Source stats: '
|
| + print(' %d sources encountered.' % len(allFiles))
|
| + print(' %d completely new.' % len(new_sources))
|
| + print(' %d removed completely.' % len(removed_sources))
|
| + print(' %d partially changed.' % len(partially_changed_sources))
|
| + print(' %d completely unchanged.' % len(unchanged_sources))
|
| + remainder = (allFiles - new_sources - removed_sources -
|
| + partially_changed_sources - unchanged_sources)
|
| + assert len(remainder) == 0
|
| +
|
| + if not showsources:
|
| + return # Per-source analysis, only if requested
|
| + print 'Per-source Analysis:'
|
| + delta_by_path = {}
|
| + for path in new_symbols_by_path:
|
| + entry = delta_by_path.get(path)
|
| + if not entry:
|
| + entry = {'plus': 0, 'minus': 0}
|
| + delta_by_path[path] = entry
|
| + for symbol_name, symbol_type, size1, size2 in new_symbols_by_path[path]:
|
| + entry['plus'] += size2
|
| + for path in removed_symbols_by_path:
|
| + entry = delta_by_path.get(path)
|
| + if not entry:
|
| + entry = {'plus': 0, 'minus': 0}
|
| + delta_by_path[path] = entry
|
| + for symbol_name, symbol_type, size1, size2 in removed_symbols_by_path[path]:
|
| + entry['minus'] += size1
|
| + for path in changed_symbols_by_path:
|
| + entry = delta_by_path.get(path)
|
| + if not entry:
|
| + entry = {'plus': 0, 'minus': 0}
|
| + delta_by_path[path] = entry
|
| + for symbol_name, symbol_type, size1, size2 in changed_symbols_by_path[path]:
|
| + delta = size2 - size1
|
| + if delta > 0:
|
| + entry['plus'] += delta
|
| + else:
|
| + entry['minus'] += (-1 * delta)
|
| +
|
| + for path in sorted(delta_by_path):
|
| + print ' Source: ' + path
|
| + size_data = delta_by_path[path]
|
| + gain = size_data['plus']
|
| + loss = size_data['minus']
|
| + delta = size_data['plus'] - size_data['minus']
|
| + print (' Change: %d bytes (gained %d, lost %d)' % (delta, gain, loss))
|
| + if showsymbols:
|
| + if path in new_symbols_by_path:
|
| + print ' New symbols:'
|
| + for symbol_name, symbol_type, size1, size2 in \
|
| + new_symbols_by_path[path]:
|
| + print (' %s type=%s, size=%d bytes' %
|
| + (symbol_name, symbol_type, size2))
|
| + if path in removed_symbols_by_path:
|
| + print ' Removed symbols:'
|
| + for symbol_name, symbol_type, size1, size2 in \
|
| + removed_symbols_by_path[path]:
|
| + print (' %s type=%s, size=%d bytes' %
|
| + (symbol_name, symbol_type, size1))
|
| + if path in changed_symbols_by_path:
|
| + print ' Changed symbols:'
|
| + def sortkey(item):
|
| + symbol_name, _symbol_type, size1, size2 = item
|
| + return (size1 - size2, symbol_name)
|
| + for symbol_name, symbol_type, size1, size2 in \
|
| + sorted(changed_symbols_by_path[path], key=sortkey):
|
| + print (' %s type=%s, delta=%d bytes (was %d bytes, now %d bytes)'
|
| + % (symbol_name, symbol_type, (size2 - size1), size1, size2))
|
| +
|
| +
|
| +def main():
|
| + usage = """%prog [options]
|
| +
|
| + Analyzes the symbolic differences between two binary files
|
| + (typically, not necessarily, two different builds of the same
|
| + library) and produces a detailed description of symbols that have
|
| + been added, removed, or whose size has changed.
|
| +
|
| + Example:
|
| + explain_binary_size_delta.py --nm1 /tmp/nm1.dump --nm2 /tmp/nm2.dump
|
| +
|
| + Options are available via '--help'.
|
| + """
|
| + parser = optparse.OptionParser(usage=usage)
|
| + parser.add_option('--nm1', metavar='PATH',
|
| + help='the nm dump of the first library')
|
| + parser.add_option('--nm2', metavar='PATH',
|
| + help='the nm dump of the second library')
|
| + parser.add_option('--showsources', action='store_true', default=False,
|
| + help='show per-source statistics')
|
| + parser.add_option('--showsymbols', action='store_true', default=False,
|
| + help='show all symbol information; implies --showfiles')
|
| + parser.add_option('--verbose', action='store_true', default=False,
|
| + help='output internal debugging stuff')
|
| + parser.add_option('--selftest', action='store_true', default=False,
|
| + help='run internal diagnosis')
|
| + opts, _args = parser.parse_args()
|
| +
|
| + if opts.selftest:
|
| + TestCompare()
|
| + return
|
| +
|
| + if not opts.nm1:
|
| + parser.error('--nm1 is required')
|
| + if not opts.nm2:
|
| + parser.error('--nm2 is required')
|
| + symbols = []
|
| + for path in [opts.nm1, opts.nm2]:
|
| + with file(path, 'r') as nm_input:
|
| + if opts.verbose:
|
| + print 'parsing ' + path + '...'
|
| + symbols.append(list(binary_size_utils.ParseNm(nm_input)))
|
| + (added, removed, changed, unchanged) = Compare(symbols[0], symbols[1])
|
| + CrunchStats(added, removed, changed, unchanged,
|
| + opts.showsources | opts.showsymbols, opts.showsymbols)
|
| +
|
| +if __name__ == '__main__':
|
| + sys.exit(main())
|
|
|