| Index: runtime/third_party/binary_size/src/explain_binary_size_delta.py
|
| diff --git a/runtime/third_party/binary_size/src/explain_binary_size_delta.py b/runtime/third_party/binary_size/src/explain_binary_size_delta.py
|
| new file mode 100755
|
| index 0000000000000000000000000000000000000000..45c1236271f1b9ca843df6cc154a850f26ca5317
|
| --- /dev/null
|
| +++ b/runtime/third_party/binary_size/src/explain_binary_size_delta.py
|
| @@ -0,0 +1,484 @@
|
| +#!/usr/bin/env python
|
| +# Copyright 2014 The Chromium Authors. All rights reserved.
|
| +# Use of this source code is governed by a BSD-style license that can be
|
| +# found in the LICENSE file.
|
| +
|
| +"""Describe the size difference of two binaries.
|
| +
|
| +Generates a description of the size difference of two binaries based
|
| +on the difference of the size of various symbols.
|
| +
|
| +This tool needs "nm" dumps of each binary with full symbol
|
| +information. You can obtain the necessary dumps by running the
|
| +run_binary_size_analysis.py script upon each binary, with the
|
| +"--nm-out" parameter set to the location in which you want to save the
|
| +dumps. Example:
|
| +
|
| + # obtain symbol data from first binary in /tmp/nm1.dump
|
| + cd $CHECKOUT1_SRC
|
| + ninja -C out/Release binary_size_tool
|
| + tools/binary_size/run_binary_size_analysis \
|
| + --library <path_to_library>
|
| + --destdir /tmp/throwaway
|
| + --nm-out /tmp/nm1.dump
|
| +
|
| + # obtain symbol data from second binary in /tmp/nm2.dump
|
| + cd $CHECKOUT2_SRC
|
| + ninja -C out/Release binary_size_tool
|
| + tools/binary_size/run_binary_size_analysis \
|
| + --library <path_to_library>
|
| + --destdir /tmp/throwaway
|
| + --nm-out /tmp/nm2.dump
|
| +
|
| + # cleanup useless files
|
| + rm -r /tmp/throwaway
|
| +
|
| + # run this tool
|
| + explain_binary_size_delta.py --nm1 /tmp/nm1.dump --nm2 /tmp/nm2.dump
|
| +"""
|
| +
|
| +import collections
|
| +from collections import Counter
|
| +from math import ceil
|
| +import operator
|
| +import optparse
|
| +import os
|
| +import sys
|
| +
|
| +import binary_size_utils
|
| +
|
| +
|
| +def CalculateSharedAddresses(symbols):
|
| + """Checks how many symbols share the same memory space. This returns a
|
| +Counter result where result[address] will tell you how many times address was
|
| +used by symbols."""
|
| + count = Counter()
|
| + for _, _, _, _, address in symbols:
|
| + count[address] += 1
|
| +
|
| + return count
|
| +
|
| +
|
| +def CalculateEffectiveSize(share_count, address, symbol_size):
|
| + """Given a raw symbol_size and an address, this method returns the
|
| + size we should blame on this symbol considering it might share the
|
| + machine code/data with other symbols. Using the raw symbol_size for
|
| + each symbol would in those cases over estimate the true cost of that
|
| + block.
|
| +
|
| + """
|
| + shared_count = share_count[address]
|
| + if shared_count == 1:
|
| + return symbol_size
|
| +
|
| + assert shared_count > 1
|
| + return int(ceil(symbol_size / float(shared_count)))
|
| +
|
| +class SymbolDelta(object):
|
| + """Stores old size, new size and some metadata."""
|
| + def __init__(self, shared):
|
| + self.old_size = None
|
| + self.new_size = None
|
| + self.shares_space_with_other_symbols = shared
|
| +
|
| + def __eq__(self, other):
|
| + return (self.old_size == other.old_size and
|
| + self.new_size == other.new_size and
|
| + self.shares_space_with_other_symbols ==
|
| + other.shares_space_with_other_symbols)
|
| +
|
| + def __ne__(self, other):
|
| + return not self.__eq__(other)
|
| +
|
| + def copy_symbol_delta(self):
|
| + symbol_delta = SymbolDelta(self.shares_space_with_other_symbols)
|
| + symbol_delta.old_size = self.old_size
|
| + symbol_delta.new_size = self.new_size
|
| + return symbol_delta
|
| +
|
| +class DeltaInfo(SymbolDelta):
|
| + """Summary of a the change for one symbol between two instances."""
|
| + def __init__(self, file_path, symbol_type, symbol_name, shared):
|
| + SymbolDelta.__init__(self, shared)
|
| + self.file_path = file_path
|
| + self.symbol_type = symbol_type
|
| + self.symbol_name = symbol_name
|
| +
|
| + def __eq__(self, other):
|
| + return (self.file_path == other.file_path and
|
| + self.symbol_type == other.symbol_type and
|
| + self.symbol_name == other.symbol_name and
|
| + SymbolDelta.__eq__(self, other))
|
| +
|
| + def __ne__(self, other):
|
| + return not self.__eq__(other)
|
| +
|
| + def ExtractSymbolDelta(self):
|
| + """Returns a copy of the SymbolDelta for this DeltaInfo."""
|
| + return SymbolDelta.copy_symbol_delta(self)
|
| +
|
| +def Compare(symbols1, symbols2):
|
| + """Executes a comparison of the symbols in symbols1 and symbols2.
|
| +
|
| + Returns:
|
| + tuple of lists: (added_symbols, removed_symbols, changed_symbols, others)
|
| + where each list contains DeltaInfo objects.
|
| + """
|
| + added = [] # tuples
|
| + removed = [] # tuples
|
| + changed = [] # tuples
|
| + unchanged = [] # tuples
|
| +
|
| + cache1 = {}
|
| + cache2 = {}
|
| + # Make a map of (file, symbol_type) : (symbol_name, effective_symbol_size)
|
| + share_count1 = CalculateSharedAddresses(symbols1)
|
| + share_count2 = CalculateSharedAddresses(symbols2)
|
| + for cache, symbols, share_count in ((cache1, symbols1, share_count1),
|
| + (cache2, symbols2, share_count2)):
|
| + for symbol_name, symbol_type, symbol_size, file_path, address in symbols:
|
| + if 'vtable for ' in symbol_name:
|
| + symbol_type = '@' # hack to categorize these separately
|
| + if file_path:
|
| + file_path = os.path.normpath(file_path)
|
| + if sys.platform.startswith('win'):
|
| + file_path = file_path.replace('\\', '/')
|
| + else:
|
| + file_path = '(No Path)'
|
| + # Take into consideration that multiple symbols might share the same
|
| + # block of code.
|
| + effective_symbol_size = CalculateEffectiveSize(share_count, address,
|
| + symbol_size)
|
| + key = (file_path, symbol_type)
|
| + bucket = cache.setdefault(key, {})
|
| + size_list = bucket.setdefault(symbol_name, [])
|
| + size_list.append((effective_symbol_size,
|
| + effective_symbol_size != symbol_size))
|
| +
|
| + # Now diff them. We iterate over the elements in cache1. For each symbol
|
| + # that we find in cache2, we record whether it was deleted, changed, or
|
| + # unchanged. We then remove it from cache2; all the symbols that remain
|
| + # in cache2 at the end of the iteration over cache1 are the 'new' symbols.
|
| + for key, bucket1 in cache1.items():
|
| + bucket2 = cache2.get(key)
|
| + file_path, symbol_type = key;
|
| + if not bucket2:
|
| + # A file was removed. Everything in bucket1 is dead.
|
| + for symbol_name, symbol_size_list in bucket1.items():
|
| + for (symbol_size, shared) in symbol_size_list:
|
| + delta_info = DeltaInfo(file_path, symbol_type, symbol_name, shared)
|
| + delta_info.old_size = symbol_size
|
| + removed.append(delta_info)
|
| + else:
|
| + # File still exists, look for changes within.
|
| + for symbol_name, symbol_size_list in bucket1.items():
|
| + size_list2 = bucket2.get(symbol_name)
|
| + if size_list2 is None:
|
| + # Symbol no longer exists in bucket2.
|
| + for (symbol_size, shared) in symbol_size_list:
|
| + delta_info = DeltaInfo(file_path, symbol_type, symbol_name, shared)
|
| + delta_info.old_size = symbol_size
|
| + removed.append(delta_info)
|
| + else:
|
| + del bucket2[symbol_name] # Symbol is not new, delete from cache2.
|
| + if len(symbol_size_list) == 1 and len(size_list2) == 1:
|
| + symbol_size, shared1 = symbol_size_list[0]
|
| + size2, shared2 = size_list2[0]
|
| + delta_info = DeltaInfo(file_path, symbol_type, symbol_name,
|
| + shared1 or shared2)
|
| + delta_info.old_size = symbol_size
|
| + delta_info.new_size = size2
|
| + if symbol_size != size2:
|
| + # Symbol has change size in bucket.
|
| + changed.append(delta_info)
|
| + else:
|
| + # Symbol is unchanged.
|
| + unchanged.append(delta_info)
|
| + else:
|
| + # Complex comparison for when a symbol exists multiple times
|
| + # in the same file (where file can be "unknown file").
|
| + symbol_size_counter = collections.Counter(symbol_size_list)
|
| + delta_counter = collections.Counter(symbol_size_list)
|
| + delta_counter.subtract(size_list2)
|
| + for delta_counter_key in sorted(delta_counter.keys()):
|
| + delta = delta_counter[delta_counter_key]
|
| + unchanged_count = symbol_size_counter[delta_counter_key]
|
| + (symbol_size, shared) = delta_counter_key
|
| + if delta > 0:
|
| + unchanged_count -= delta
|
| + for _ in range(unchanged_count):
|
| + delta_info = DeltaInfo(file_path, symbol_type,
|
| + symbol_name, shared)
|
| + delta_info.old_size = symbol_size
|
| + delta_info.new_size = symbol_size
|
| + unchanged.append(delta_info)
|
| + if delta > 0: # Used to be more of these than there is now.
|
| + for _ in range(delta):
|
| + delta_info = DeltaInfo(file_path, symbol_type,
|
| + symbol_name, shared)
|
| + delta_info.old_size = symbol_size
|
| + removed.append(delta_info)
|
| + elif delta < 0: # More of this (symbol,size) now.
|
| + for _ in range(-delta):
|
| + delta_info = DeltaInfo(file_path, symbol_type,
|
| + symbol_name, shared)
|
| + delta_info.new_size = symbol_size
|
| + added.append(delta_info)
|
| +
|
| + if len(bucket2) == 0:
|
| + del cache1[key] # Entire bucket is empty, delete from cache2
|
| +
|
| + # We have now analyzed all symbols that are in cache1 and removed all of
|
| + # the encountered symbols from cache2. What's left in cache2 is the new
|
| + # symbols.
|
| + for key, bucket2 in cache2.iteritems():
|
| + file_path, symbol_type = key;
|
| + for symbol_name, symbol_size_list in bucket2.items():
|
| + for (symbol_size, shared) in symbol_size_list:
|
| + delta_info = DeltaInfo(file_path, symbol_type, symbol_name, shared)
|
| + delta_info.new_size = symbol_size
|
| + added.append(delta_info)
|
| + return (added, removed, changed, unchanged)
|
| +
|
| +
|
| +def DeltaStr(number):
|
| + """Returns the number as a string with a '+' prefix if it's > 0 and
|
| + a '-' prefix if it's < 0."""
|
| + result = str(number)
|
| + if number > 0:
|
| + result = '+' + result
|
| + return result
|
| +
|
| +
|
| +def SharedInfoStr(symbol_info):
|
| + """Returns a string (prefixed by space) explaining that numbers are
|
| + adjusted because of shared space between symbols, or an empty string
|
| + if space had not been shared."""
|
| +
|
| + if symbol_info.shares_space_with_other_symbols:
|
| + return " (adjusted sizes because of memory sharing)"
|
| +
|
| + return ""
|
| +
|
| +class CrunchStatsData(object):
|
| + """Stores a summary of data of a certain kind."""
|
| + def __init__(self, symbols):
|
| + self.symbols = symbols
|
| + self.sources = set()
|
| + self.before_size = 0
|
| + self.after_size = 0
|
| + self.symbols_by_path = {}
|
| +
|
| +
|
| +def CrunchStats(added, removed, changed, unchanged, showsources, showsymbols):
|
| + """Outputs to stdout a summary of changes based on the symbol lists."""
|
| + # Split changed into grown and shrunk because that is easier to
|
| + # discuss.
|
| + grown = []
|
| + shrunk = []
|
| + for item in changed:
|
| + if item.old_size < item.new_size:
|
| + grown.append(item)
|
| + else:
|
| + shrunk.append(item)
|
| +
|
| + new_symbols = CrunchStatsData(added)
|
| + removed_symbols = CrunchStatsData(removed)
|
| + grown_symbols = CrunchStatsData(grown)
|
| + shrunk_symbols = CrunchStatsData(shrunk)
|
| + sections = [new_symbols, removed_symbols, grown_symbols, shrunk_symbols]
|
| + for section in sections:
|
| + for item in section.symbols:
|
| + section.sources.add(item.file_path)
|
| + if item.old_size is not None:
|
| + section.before_size += item.old_size
|
| + if item.new_size is not None:
|
| + section.after_size += item.new_size
|
| + bucket = section.symbols_by_path.setdefault(item.file_path, [])
|
| + bucket.append((item.symbol_name, item.symbol_type,
|
| + item.ExtractSymbolDelta()))
|
| +
|
| + total_change = sum(s.after_size - s.before_size for s in sections)
|
| + summary = 'Total change: %s bytes' % DeltaStr(total_change)
|
| + print(summary)
|
| + print('=' * len(summary))
|
| + for section in sections:
|
| + if not section.symbols:
|
| + continue
|
| + if section.before_size == 0:
|
| + description = ('added, totalling %s bytes' % DeltaStr(section.after_size))
|
| + elif section.after_size == 0:
|
| + description = ('removed, totalling %s bytes' %
|
| + DeltaStr(-section.before_size))
|
| + else:
|
| + if section.after_size > section.before_size:
|
| + type_str = 'grown'
|
| + else:
|
| + type_str = 'shrunk'
|
| + description = ('%s, for a net change of %s bytes '
|
| + '(%d bytes before, %d bytes after)' %
|
| + (type_str, DeltaStr(section.after_size - section.before_size),
|
| + section.before_size, section.after_size))
|
| + print(' %d %s across %d sources' %
|
| + (len(section.symbols), description, len(section.sources)))
|
| +
|
| + maybe_unchanged_sources = set()
|
| + unchanged_symbols_size = 0
|
| + for item in unchanged:
|
| + maybe_unchanged_sources.add(item.file_path)
|
| + unchanged_symbols_size += item.old_size # == item.new_size
|
| + print(' %d unchanged, totalling %d bytes' %
|
| + (len(unchanged), unchanged_symbols_size))
|
| +
|
| + # High level analysis, always output.
|
| + unchanged_sources = maybe_unchanged_sources
|
| + for section in sections:
|
| + unchanged_sources = unchanged_sources - section.sources
|
| + new_sources = (new_symbols.sources -
|
| + maybe_unchanged_sources -
|
| + removed_symbols.sources)
|
| + removed_sources = (removed_symbols.sources -
|
| + maybe_unchanged_sources -
|
| + new_symbols.sources)
|
| + partially_changed_sources = (grown_symbols.sources |
|
| + shrunk_symbols.sources | new_symbols.sources |
|
| + removed_symbols.sources) - removed_sources - new_sources
|
| + allFiles = set()
|
| + for section in sections:
|
| + allFiles = allFiles | section.sources
|
| + allFiles = allFiles | maybe_unchanged_sources
|
| + print 'Source stats:'
|
| + print(' %d sources encountered.' % len(allFiles))
|
| + print(' %d completely new.' % len(new_sources))
|
| + print(' %d removed completely.' % len(removed_sources))
|
| + print(' %d partially changed.' % len(partially_changed_sources))
|
| + print(' %d completely unchanged.' % len(unchanged_sources))
|
| + remainder = (allFiles - new_sources - removed_sources -
|
| + partially_changed_sources - unchanged_sources)
|
| + assert len(remainder) == 0
|
| +
|
| + if not showsources:
|
| + return # Per-source analysis, only if requested
|
| + print 'Per-source Analysis:'
|
| + delta_by_path = {}
|
| + for section in sections:
|
| + for path in section.symbols_by_path:
|
| + entry = delta_by_path.get(path)
|
| + if not entry:
|
| + entry = {'plus': 0, 'minus': 0}
|
| + delta_by_path[path] = entry
|
| + for symbol_name, symbol_type, symbol_delta in \
|
| + section.symbols_by_path[path]:
|
| + if symbol_delta.old_size is None:
|
| + delta = symbol_delta.new_size
|
| + elif symbol_delta.new_size is None:
|
| + delta = -symbol_delta.old_size
|
| + else:
|
| + delta = symbol_delta.new_size - symbol_delta.old_size
|
| +
|
| + if delta > 0:
|
| + entry['plus'] += delta
|
| + else:
|
| + entry['minus'] += (-1 * delta)
|
| +
|
| + def delta_sort_key(item):
|
| + _path, size_data = item
|
| + growth = size_data['plus'] - size_data['minus']
|
| + return growth
|
| +
|
| + for path, size_data in sorted(delta_by_path.iteritems(), key=delta_sort_key,
|
| + reverse=True):
|
| + gain = size_data['plus']
|
| + loss = size_data['minus']
|
| + delta = size_data['plus'] - size_data['minus']
|
| + header = ' %s - Source: %s - (gained %d, lost %d)' % (DeltaStr(delta),
|
| + path, gain, loss)
|
| + divider = '-' * len(header)
|
| + print ''
|
| + print divider
|
| + print header
|
| + print divider
|
| + if showsymbols:
|
| + def ExtractNewSize(tup):
|
| + symbol_delta = tup[2]
|
| + return symbol_delta.new_size
|
| + def ExtractOldSize(tup):
|
| + symbol_delta = tup[2]
|
| + return symbol_delta.old_size
|
| + if path in new_symbols.symbols_by_path:
|
| + print ' New symbols:'
|
| + for symbol_name, symbol_type, symbol_delta in \
|
| + sorted(new_symbols.symbols_by_path[path],
|
| + key=ExtractNewSize,
|
| + reverse=True):
|
| + print (' %8s: %s type=%s, size=%d bytes%s' %
|
| + (DeltaStr(symbol_delta.new_size), symbol_name, symbol_type,
|
| + symbol_delta.new_size, SharedInfoStr(symbol_delta)))
|
| + if path in removed_symbols.symbols_by_path:
|
| + print ' Removed symbols:'
|
| + for symbol_name, symbol_type, symbol_delta in \
|
| + sorted(removed_symbols.symbols_by_path[path],
|
| + key=ExtractOldSize):
|
| + print (' %8s: %s type=%s, size=%d bytes%s' %
|
| + (DeltaStr(-symbol_delta.old_size), symbol_name, symbol_type,
|
| + symbol_delta.old_size,
|
| + SharedInfoStr(symbol_delta)))
|
| + for (changed_symbols_by_path, type_str) in [
|
| + (grown_symbols.symbols_by_path, "Grown"),
|
| + (shrunk_symbols.symbols_by_path, "Shrunk")]:
|
| + if path in changed_symbols_by_path:
|
| + print ' %s symbols:' % type_str
|
| + def changed_symbol_sortkey(item):
|
| + symbol_name, _symbol_type, symbol_delta = item
|
| + return (symbol_delta.old_size - symbol_delta.new_size, symbol_name)
|
| + for symbol_name, symbol_type, symbol_delta in \
|
| + sorted(changed_symbols_by_path[path], key=changed_symbol_sortkey):
|
| + print (' %8s: %s type=%s, (was %d bytes, now %d bytes)%s'
|
| + % (DeltaStr(symbol_delta.new_size - symbol_delta.old_size),
|
| + symbol_name, symbol_type,
|
| + symbol_delta.old_size, symbol_delta.new_size,
|
| + SharedInfoStr(symbol_delta)))
|
| +
|
| +
|
| +def main():
|
| + usage = """%prog [options]
|
| +
|
| + Analyzes the symbolic differences between two binary files
|
| + (typically, not necessarily, two different builds of the same
|
| + library) and produces a detailed description of symbols that have
|
| + been added, removed, or whose size has changed.
|
| +
|
| + Example:
|
| + explain_binary_size_delta.py --nm1 /tmp/nm1.dump --nm2 /tmp/nm2.dump
|
| +
|
| + Options are available via '--help'.
|
| + """
|
| + parser = optparse.OptionParser(usage=usage)
|
| + parser.add_option('--nm1', metavar='PATH',
|
| + help='the nm dump of the first library')
|
| + parser.add_option('--nm2', metavar='PATH',
|
| + help='the nm dump of the second library')
|
| + parser.add_option('--showsources', action='store_true', default=False,
|
| + help='show per-source statistics')
|
| + parser.add_option('--showsymbols', action='store_true', default=False,
|
| + help='show all symbol information; implies --showsources')
|
| + parser.add_option('--verbose', action='store_true', default=False,
|
| + help='output internal debugging stuff')
|
| + opts, _args = parser.parse_args()
|
| +
|
| + if not opts.nm1:
|
| + parser.error('--nm1 is required')
|
| + if not opts.nm2:
|
| + parser.error('--nm2 is required')
|
| + symbols = []
|
| + for path in [opts.nm1, opts.nm2]:
|
| + with file(path, 'r') as nm_input:
|
| + if opts.verbose:
|
| + print 'parsing ' + path + '...'
|
| + symbols.append(list(binary_size_utils.ParseNm(nm_input)))
|
| + (added, removed, changed, unchanged) = Compare(symbols[0], symbols[1])
|
| + CrunchStats(added, removed, changed, unchanged,
|
| + opts.showsources | opts.showsymbols, opts.showsymbols)
|
| +
|
| +if __name__ == '__main__':
|
| + sys.exit(main())
|
|
|