| Index: tools/binary_size/explain_binary_size_delta.py
|
| diff --git a/tools/binary_size/explain_binary_size_delta.py b/tools/binary_size/explain_binary_size_delta.py
|
| index d6eba2601224ae0bc0cc71d4986ffe1bf599cf8e..8bc6c5a1e329cb53c2eb94f36481dc84d45b4fb2 100755
|
| --- a/tools/binary_size/explain_binary_size_delta.py
|
| +++ b/tools/binary_size/explain_binary_size_delta.py
|
| @@ -38,6 +38,8 @@ dumps. Example:
|
| """
|
|
|
| import collections
|
| +from collections import Counter
|
| +from math import ceil
|
| import operator
|
| import optparse
|
| import os
|
| @@ -46,11 +48,81 @@ import sys
|
| import binary_size_utils
|
|
|
|
|
| +def CalculateSharedAddresses(symbols):
|
| + """Checks how many symbols share the same memory space. This returns a
|
| +Counter result where result[address] will tell you how many times address was
|
| +used by symbols."""
|
| + count = Counter()
|
| + for _, _, _, _, address in symbols:
|
| + count[address] += 1
|
| +
|
| + return count
|
| +
|
| +
|
| +def CalculateEffectiveSize(share_count, address, symbol_size):
|
| + """Given a raw symbol_size and an address, this method returns the
|
| + size we should blame on this symbol considering it might share the
|
| + machine code/data with other symbols. Using the raw symbol_size for
|
| + each symbol would in those cases over estimate the true cost of that
|
| + block.
|
| +
|
| + """
|
| + shared_count = share_count[address]
|
| + if shared_count == 1:
|
| + return symbol_size
|
| +
|
| + assert shared_count > 1
|
| + return int(ceil(symbol_size / float(shared_count)))
|
| +
|
| +class SymbolDelta(object):
|
| + """Stores old size, new size and some metadata."""
|
| + def __init__(self, shared):
|
| + self.old_size = None
|
| + self.new_size = None
|
| + self.shares_space_with_other_symbols = shared
|
| +
|
| + def __eq__(self, other):
|
| + return (self.old_size == other.old_size and
|
| + self.new_size == other.new_size and
|
| + self.shares_space_with_other_symbols ==
|
| + other.shares_space_with_other_symbols)
|
| +
|
| + def __ne__(self, other):
|
| + return not self.__eq__(other)
|
| +
|
| + def copy_symbol_delta(self):
|
| + symbol_delta = SymbolDelta(self.shares_space_with_other_symbols)
|
| + symbol_delta.old_size = self.old_size
|
| + symbol_delta.new_size = self.new_size
|
| + return symbol_delta
|
| +
|
| +class DeltaInfo(SymbolDelta):
|
| + """Summary of a the change for one symbol between two instances."""
|
| + def __init__(self, file_path, symbol_type, symbol_name, shared):
|
| + SymbolDelta.__init__(self, shared)
|
| + self.file_path = file_path
|
| + self.symbol_type = symbol_type
|
| + self.symbol_name = symbol_name
|
| +
|
| + def __eq__(self, other):
|
| + return (self.file_path == other.file_path and
|
| + self.symbol_type == other.symbol_type and
|
| + self.symbol_name == other.symbol_name and
|
| + SymbolDelta.__eq__(self, other))
|
| +
|
| + def __ne__(self, other):
|
| + return not self.__eq__(other)
|
| +
|
| + def ExtractSymbolDelta(self):
|
| + """Returns a copy of the SymbolDelta for this DeltaInfo."""
|
| + return SymbolDelta.copy_symbol_delta(self)
|
| +
|
| def Compare(symbols1, symbols2):
|
| """Executes a comparison of the symbols in symbols1 and symbols2.
|
|
|
| Returns:
|
| tuple of lists: (added_symbols, removed_symbols, changed_symbols, others)
|
| + where each list contains DeltaInfo objects.
|
| """
|
| added = [] # tuples
|
| removed = [] # tuples
|
| @@ -59,9 +131,12 @@ def Compare(symbols1, symbols2):
|
|
|
| cache1 = {}
|
| cache2 = {}
|
| - # Make a map of (file, symbol_type) : (symbol_name, symbol_size)
|
| - for cache, symbols in ((cache1, symbols1), (cache2, symbols2)):
|
| - for symbol_name, symbol_type, symbol_size, file_path in symbols:
|
| + # Make a map of (file, symbol_type) : (symbol_name, effective_symbol_size)
|
| + share_count1 = CalculateSharedAddresses(symbols1)
|
| + share_count2 = CalculateSharedAddresses(symbols2)
|
| + for cache, symbols, share_count in ((cache1, symbols1, share_count1),
|
| + (cache2, symbols2, share_count2)):
|
| + for symbol_name, symbol_type, symbol_size, file_path, address in symbols:
|
| if 'vtable for ' in symbol_name:
|
| symbol_type = '@' # hack to categorize these separately
|
| if file_path:
|
| @@ -70,10 +145,15 @@ def Compare(symbols1, symbols2):
|
| file_path = file_path.replace('\\', '/')
|
| else:
|
| file_path = '(No Path)'
|
| + # Take into consideration that multiple symbols might share the same
|
| + # block of code.
|
| + effective_symbol_size = CalculateEffectiveSize(share_count, address,
|
| + symbol_size)
|
| key = (file_path, symbol_type)
|
| bucket = cache.setdefault(key, {})
|
| size_list = bucket.setdefault(symbol_name, [])
|
| - size_list.append(symbol_size)
|
| + size_list.append((effective_symbol_size,
|
| + effective_symbol_size != symbol_size))
|
|
|
| # Now diff them. We iterate over the elements in cache1. For each symbol
|
| # that we find in cache2, we record whether it was deleted, changed, or
|
| @@ -81,52 +161,69 @@ def Compare(symbols1, symbols2):
|
| # in cache2 at the end of the iteration over cache1 are the 'new' symbols.
|
| for key, bucket1 in cache1.items():
|
| bucket2 = cache2.get(key)
|
| + file_path, symbol_type = key;
|
| if not bucket2:
|
| # A file was removed. Everything in bucket1 is dead.
|
| for symbol_name, symbol_size_list in bucket1.items():
|
| - for symbol_size in symbol_size_list:
|
| - removed.append((key[0], key[1], symbol_name, symbol_size, None))
|
| + for (symbol_size, shared) in symbol_size_list:
|
| + delta_info = DeltaInfo(file_path, symbol_type, symbol_name, shared)
|
| + delta_info.old_size = symbol_size
|
| + removed.append(delta_info)
|
| else:
|
| # File still exists, look for changes within.
|
| for symbol_name, symbol_size_list in bucket1.items():
|
| size_list2 = bucket2.get(symbol_name)
|
| if size_list2 is None:
|
| # Symbol no longer exists in bucket2.
|
| - for symbol_size in symbol_size_list:
|
| - removed.append((key[0], key[1], symbol_name, symbol_size, None))
|
| + for (symbol_size, shared) in symbol_size_list:
|
| + delta_info = DeltaInfo(file_path, symbol_type, symbol_name, shared)
|
| + delta_info.old_size = symbol_size
|
| + removed.append(delta_info)
|
| else:
|
| del bucket2[symbol_name] # Symbol is not new, delete from cache2.
|
| if len(symbol_size_list) == 1 and len(size_list2) == 1:
|
| - symbol_size = symbol_size_list[0]
|
| - size2 = size_list2[0]
|
| + symbol_size, shared1 = symbol_size_list[0]
|
| + size2, shared2 = size_list2[0]
|
| + delta_info = DeltaInfo(file_path, symbol_type, symbol_name,
|
| + shared1 or shared2)
|
| + delta_info.old_size = symbol_size
|
| + delta_info.new_size = size2
|
| if symbol_size != size2:
|
| # Symbol has change size in bucket.
|
| - changed.append((key[0], key[1], symbol_name, symbol_size, size2))
|
| + changed.append(delta_info)
|
| else:
|
| # Symbol is unchanged.
|
| - unchanged.append((key[0], key[1], symbol_name, symbol_size,
|
| - size2))
|
| + unchanged.append(delta_info)
|
| else:
|
| # Complex comparison for when a symbol exists multiple times
|
| # in the same file (where file can be "unknown file").
|
| symbol_size_counter = collections.Counter(symbol_size_list)
|
| delta_counter = collections.Counter(symbol_size_list)
|
| delta_counter.subtract(size_list2)
|
| - for symbol_size in sorted(delta_counter.keys()):
|
| - delta = delta_counter[symbol_size]
|
| - unchanged_count = symbol_size_counter[symbol_size]
|
| + for delta_counter_key in sorted(delta_counter.keys()):
|
| + delta = delta_counter[delta_counter_key]
|
| + unchanged_count = symbol_size_counter[delta_counter_key]
|
| + (symbol_size, shared) = delta_counter_key
|
| if delta > 0:
|
| unchanged_count -= delta
|
| for _ in range(unchanged_count):
|
| - unchanged.append((key[0], key[1], symbol_name, symbol_size,
|
| - symbol_size))
|
| + delta_info = DeltaInfo(file_path, symbol_type,
|
| + symbol_name, shared)
|
| + delta_info.old_size = symbol_size
|
| + delta_info.new_size = symbol_size
|
| + unchanged.append(delta_info)
|
| if delta > 0: # Used to be more of these than there is now.
|
| for _ in range(delta):
|
| - removed.append((key[0], key[1], symbol_name, symbol_size,
|
| - None))
|
| + delta_info = DeltaInfo(file_path, symbol_type,
|
| + symbol_name, shared)
|
| + delta_info.old_size = symbol_size
|
| + removed.append(delta_info)
|
| elif delta < 0: # More of this (symbol,size) now.
|
| for _ in range(-delta):
|
| - added.append((key[0], key[1], symbol_name, None, symbol_size))
|
| + delta_info = DeltaInfo(file_path, symbol_type,
|
| + symbol_name, shared)
|
| + delta_info.new_size = symbol_size
|
| + added.append(delta_info)
|
|
|
| if len(bucket2) == 0:
|
| del cache1[key] # Entire bucket is empty, delete from cache2
|
| @@ -135,11 +232,15 @@ def Compare(symbols1, symbols2):
|
| # the encountered symbols from cache2. What's left in cache2 is the new
|
| # symbols.
|
| for key, bucket2 in cache2.iteritems():
|
| + file_path, symbol_type = key;
|
| for symbol_name, symbol_size_list in bucket2.items():
|
| - for symbol_size in symbol_size_list:
|
| - added.append((key[0], key[1], symbol_name, None, symbol_size))
|
| + for (symbol_size, shared) in symbol_size_list:
|
| + delta_info = DeltaInfo(file_path, symbol_type, symbol_name, shared)
|
| + delta_info.new_size = symbol_size
|
| + added.append(delta_info)
|
| return (added, removed, changed, unchanged)
|
|
|
| +
|
| def DeltaStr(number):
|
| """Returns the number as a string with a '+' prefix if it's > 0 and
|
| a '-' prefix if it's < 0."""
|
| @@ -149,6 +250,16 @@ def DeltaStr(number):
|
| return result
|
|
|
|
|
| +def SharedInfoStr(symbol_info):
|
| + """Returns a string (prefixed by space) explaining that numbers are
|
| + adjusted because of shared space between symbols, or an empty string
|
| + if space had not been shared."""
|
| +
|
| + if symbol_info.shares_space_with_other_symbols:
|
| + return " (adjusted sizes because of memory sharing)"
|
| +
|
| + return ""
|
| +
|
| class CrunchStatsData(object):
|
| """Stores a summary of data of a certain kind."""
|
| def __init__(self, symbols):
|
| @@ -166,8 +277,7 @@ def CrunchStats(added, removed, changed, unchanged, showsources, showsymbols):
|
| grown = []
|
| shrunk = []
|
| for item in changed:
|
| - file_path, symbol_type, symbol_name, size1, size2 = item
|
| - if size1 < size2:
|
| + if item.old_size < item.new_size:
|
| grown.append(item)
|
| else:
|
| shrunk.append(item)
|
| @@ -178,14 +288,15 @@ def CrunchStats(added, removed, changed, unchanged, showsources, showsymbols):
|
| shrunk_symbols = CrunchStatsData(shrunk)
|
| sections = [new_symbols, removed_symbols, grown_symbols, shrunk_symbols]
|
| for section in sections:
|
| - for file_path, symbol_type, symbol_name, size1, size2 in section.symbols:
|
| - section.sources.add(file_path)
|
| - if size1 is not None:
|
| - section.before_size += size1
|
| - if size2 is not None:
|
| - section.after_size += size2
|
| - bucket = section.symbols_by_path.setdefault(file_path, [])
|
| - bucket.append((symbol_name, symbol_type, size1, size2))
|
| + for item in section.symbols:
|
| + section.sources.add(item.file_path)
|
| + if item.old_size is not None:
|
| + section.before_size += item.old_size
|
| + if item.new_size is not None:
|
| + section.after_size += item.new_size
|
| + bucket = section.symbols_by_path.setdefault(item.file_path, [])
|
| + bucket.append((item.symbol_name, item.symbol_type,
|
| + item.ExtractSymbolDelta()))
|
|
|
| total_change = sum(s.after_size - s.before_size for s in sections)
|
| summary = 'Total change: %s bytes' % DeltaStr(total_change)
|
| @@ -213,9 +324,9 @@ def CrunchStats(added, removed, changed, unchanged, showsources, showsymbols):
|
|
|
| maybe_unchanged_sources = set()
|
| unchanged_symbols_size = 0
|
| - for file_path, symbol_type, symbol_name, size1, size2 in unchanged:
|
| - maybe_unchanged_sources.add(file_path)
|
| - unchanged_symbols_size += size1 # == size2
|
| + for item in unchanged:
|
| + maybe_unchanged_sources.add(item.file_path)
|
| + unchanged_symbols_size += item.old_size # == item.new_size
|
| print(' %d unchanged, totalling %d bytes' %
|
| (len(unchanged), unchanged_symbols_size))
|
|
|
| @@ -256,14 +367,14 @@ def CrunchStats(added, removed, changed, unchanged, showsources, showsymbols):
|
| if not entry:
|
| entry = {'plus': 0, 'minus': 0}
|
| delta_by_path[path] = entry
|
| - for symbol_name, symbol_type, size1, size2 in \
|
| + for symbol_name, symbol_type, symbol_delta in \
|
| section.symbols_by_path[path]:
|
| - if size1 is None:
|
| - delta = size2
|
| - elif size2 is None:
|
| - delta = -size1
|
| + if symbol_delta.old_size is None:
|
| + delta = symbol_delta.new_size
|
| + elif symbol_delta.new_size is None:
|
| + delta = -symbol_delta.old_size
|
| else:
|
| - delta = size2 - size1
|
| + delta = symbol_delta.new_size - symbol_delta.old_size
|
|
|
| if delta > 0:
|
| entry['plus'] += delta
|
| @@ -288,34 +399,45 @@ def CrunchStats(added, removed, changed, unchanged, showsources, showsymbols):
|
| print header
|
| print divider
|
| if showsymbols:
|
| + def ExtractNewSize(tup):
|
| + symbol_delta = tup[2]
|
| + return symbol_delta.new_size
|
| + def ExtractOldSize(tup):
|
| + symbol_delta = tup[2]
|
| + return symbol_delta.old_size
|
| if path in new_symbols.symbols_by_path:
|
| print ' New symbols:'
|
| - for symbol_name, symbol_type, size1, size2 in \
|
| + for symbol_name, symbol_type, symbol_delta in \
|
| sorted(new_symbols.symbols_by_path[path],
|
| - key=operator.itemgetter(3),
|
| + key=ExtractNewSize,
|
| reverse=True):
|
| - print (' %8s: %s type=%s, size=%d bytes' %
|
| - (DeltaStr(size2), symbol_name, symbol_type, size2))
|
| + print (' %8s: %s type=%s, size=%d bytes%s' %
|
| + (DeltaStr(symbol_delta.new_size), symbol_name, symbol_type,
|
| + symbol_delta.new_size, SharedInfoStr(symbol_delta)))
|
| if path in removed_symbols.symbols_by_path:
|
| print ' Removed symbols:'
|
| - for symbol_name, symbol_type, size1, size2 in \
|
| + for symbol_name, symbol_type, symbol_delta in \
|
| sorted(removed_symbols.symbols_by_path[path],
|
| - key=operator.itemgetter(2)):
|
| - print (' %8s: %s type=%s, size=%d bytes' %
|
| - (DeltaStr(-size1), symbol_name, symbol_type, size1))
|
| + key=ExtractOldSize):
|
| + print (' %8s: %s type=%s, size=%d bytes%s' %
|
| + (DeltaStr(-symbol_delta.old_size), symbol_name, symbol_type,
|
| + symbol_delta.old_size,
|
| + SharedInfoStr(symbol_delta)))
|
| for (changed_symbols_by_path, type_str) in [
|
| (grown_symbols.symbols_by_path, "Grown"),
|
| (shrunk_symbols.symbols_by_path, "Shrunk")]:
|
| if path in changed_symbols_by_path:
|
| print ' %s symbols:' % type_str
|
| def changed_symbol_sortkey(item):
|
| - symbol_name, _symbol_type, size1, size2 = item
|
| - return (size1 - size2, symbol_name)
|
| - for symbol_name, symbol_type, size1, size2 in \
|
| + symbol_name, _symbol_type, symbol_delta = item
|
| + return (symbol_delta.old_size - symbol_delta.new_size, symbol_name)
|
| + for symbol_name, symbol_type, symbol_delta in \
|
| sorted(changed_symbols_by_path[path], key=changed_symbol_sortkey):
|
| - print (' %8s: %s type=%s, (was %d bytes, now %d bytes)'
|
| - % (DeltaStr(size2 - size1), symbol_name,
|
| - symbol_type, size1, size2))
|
| + print (' %8s: %s type=%s, (was %d bytes, now %d bytes)%s'
|
| + % (DeltaStr(symbol_delta.new_size - symbol_delta.old_size),
|
| + symbol_name, symbol_type,
|
| + symbol_delta.old_size, symbol_delta.new_size,
|
| + SharedInfoStr(symbol_delta)))
|
|
|
|
|
| def main():
|
|
|