Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(250)

Unified Diff: runtime/third_party/binary_size/src/explain_binary_size_delta.py

Issue 1754443002: - Add binary size analysis tool from Chromium. (Closed) Base URL: git@github.com:dart-lang/sdk.git@master
Patch Set: Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: runtime/third_party/binary_size/src/explain_binary_size_delta.py
diff --git a/runtime/third_party/binary_size/src/explain_binary_size_delta.py b/runtime/third_party/binary_size/src/explain_binary_size_delta.py
new file mode 100755
index 0000000000000000000000000000000000000000..45c1236271f1b9ca843df6cc154a850f26ca5317
--- /dev/null
+++ b/runtime/third_party/binary_size/src/explain_binary_size_delta.py
@@ -0,0 +1,484 @@
+#!/usr/bin/env python
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Describe the size difference of two binaries.
+
+Generates a description of the size difference of two binaries based
+on the difference of the size of various symbols.
+
+This tool needs "nm" dumps of each binary with full symbol
+information. You can obtain the necessary dumps by running the
+run_binary_size_analysis.py script upon each binary, with the
+"--nm-out" parameter set to the location in which you want to save the
+dumps. Example:
+
+ # obtain symbol data from first binary in /tmp/nm1.dump
+ cd $CHECKOUT1_SRC
+ ninja -C out/Release binary_size_tool
+ tools/binary_size/run_binary_size_analysis \
+ --library <path_to_library>
+ --destdir /tmp/throwaway
+ --nm-out /tmp/nm1.dump
+
+ # obtain symbol data from second binary in /tmp/nm2.dump
+ cd $CHECKOUT2_SRC
+ ninja -C out/Release binary_size_tool
+ tools/binary_size/run_binary_size_analysis \
+ --library <path_to_library>
+ --destdir /tmp/throwaway
+ --nm-out /tmp/nm2.dump
+
+ # cleanup useless files
+ rm -r /tmp/throwaway
+
+ # run this tool
+ explain_binary_size_delta.py --nm1 /tmp/nm1.dump --nm2 /tmp/nm2.dump
+"""
+
+import collections
+from collections import Counter
+from math import ceil
+import operator
+import optparse
+import os
+import sys
+
+import binary_size_utils
+
+
+def CalculateSharedAddresses(symbols):
+ """Checks how many symbols share the same memory space. This returns a
+Counter result where result[address] will tell you how many times address was
+used by symbols."""
+ count = Counter()
+ for _, _, _, _, address in symbols:
+ count[address] += 1
+
+ return count
+
+
+def CalculateEffectiveSize(share_count, address, symbol_size):
+ """Given a raw symbol_size and an address, this method returns the
+ size we should blame on this symbol considering it might share the
+ machine code/data with other symbols. Using the raw symbol_size for
+ each symbol would in those cases over estimate the true cost of that
+ block.
+
+ """
+ shared_count = share_count[address]
+ if shared_count == 1:
+ return symbol_size
+
+ assert shared_count > 1
+ return int(ceil(symbol_size / float(shared_count)))
+
+class SymbolDelta(object):
+ """Stores old size, new size and some metadata."""
+ def __init__(self, shared):
+ self.old_size = None
+ self.new_size = None
+ self.shares_space_with_other_symbols = shared
+
+ def __eq__(self, other):
+ return (self.old_size == other.old_size and
+ self.new_size == other.new_size and
+ self.shares_space_with_other_symbols ==
+ other.shares_space_with_other_symbols)
+
+ def __ne__(self, other):
+ return not self.__eq__(other)
+
+ def copy_symbol_delta(self):
+ symbol_delta = SymbolDelta(self.shares_space_with_other_symbols)
+ symbol_delta.old_size = self.old_size
+ symbol_delta.new_size = self.new_size
+ return symbol_delta
+
+class DeltaInfo(SymbolDelta):
+ """Summary of a the change for one symbol between two instances."""
+ def __init__(self, file_path, symbol_type, symbol_name, shared):
+ SymbolDelta.__init__(self, shared)
+ self.file_path = file_path
+ self.symbol_type = symbol_type
+ self.symbol_name = symbol_name
+
+ def __eq__(self, other):
+ return (self.file_path == other.file_path and
+ self.symbol_type == other.symbol_type and
+ self.symbol_name == other.symbol_name and
+ SymbolDelta.__eq__(self, other))
+
+ def __ne__(self, other):
+ return not self.__eq__(other)
+
+ def ExtractSymbolDelta(self):
+ """Returns a copy of the SymbolDelta for this DeltaInfo."""
+ return SymbolDelta.copy_symbol_delta(self)
+
+def Compare(symbols1, symbols2):
+ """Executes a comparison of the symbols in symbols1 and symbols2.
+
+ Returns:
+ tuple of lists: (added_symbols, removed_symbols, changed_symbols, others)
+ where each list contains DeltaInfo objects.
+ """
+ added = [] # tuples
+ removed = [] # tuples
+ changed = [] # tuples
+ unchanged = [] # tuples
+
+ cache1 = {}
+ cache2 = {}
+ # Make a map of (file, symbol_type) : (symbol_name, effective_symbol_size)
+ share_count1 = CalculateSharedAddresses(symbols1)
+ share_count2 = CalculateSharedAddresses(symbols2)
+ for cache, symbols, share_count in ((cache1, symbols1, share_count1),
+ (cache2, symbols2, share_count2)):
+ for symbol_name, symbol_type, symbol_size, file_path, address in symbols:
+ if 'vtable for ' in symbol_name:
+ symbol_type = '@' # hack to categorize these separately
+ if file_path:
+ file_path = os.path.normpath(file_path)
+ if sys.platform.startswith('win'):
+ file_path = file_path.replace('\\', '/')
+ else:
+ file_path = '(No Path)'
+ # Take into consideration that multiple symbols might share the same
+ # block of code.
+ effective_symbol_size = CalculateEffectiveSize(share_count, address,
+ symbol_size)
+ key = (file_path, symbol_type)
+ bucket = cache.setdefault(key, {})
+ size_list = bucket.setdefault(symbol_name, [])
+ size_list.append((effective_symbol_size,
+ effective_symbol_size != symbol_size))
+
+ # Now diff them. We iterate over the elements in cache1. For each symbol
+ # that we find in cache2, we record whether it was deleted, changed, or
+ # unchanged. We then remove it from cache2; all the symbols that remain
+ # in cache2 at the end of the iteration over cache1 are the 'new' symbols.
+ for key, bucket1 in cache1.items():
+ bucket2 = cache2.get(key)
+ file_path, symbol_type = key;
+ if not bucket2:
+ # A file was removed. Everything in bucket1 is dead.
+ for symbol_name, symbol_size_list in bucket1.items():
+ for (symbol_size, shared) in symbol_size_list:
+ delta_info = DeltaInfo(file_path, symbol_type, symbol_name, shared)
+ delta_info.old_size = symbol_size
+ removed.append(delta_info)
+ else:
+ # File still exists, look for changes within.
+ for symbol_name, symbol_size_list in bucket1.items():
+ size_list2 = bucket2.get(symbol_name)
+ if size_list2 is None:
+ # Symbol no longer exists in bucket2.
+ for (symbol_size, shared) in symbol_size_list:
+ delta_info = DeltaInfo(file_path, symbol_type, symbol_name, shared)
+ delta_info.old_size = symbol_size
+ removed.append(delta_info)
+ else:
+ del bucket2[symbol_name] # Symbol is not new, delete from cache2.
+ if len(symbol_size_list) == 1 and len(size_list2) == 1:
+ symbol_size, shared1 = symbol_size_list[0]
+ size2, shared2 = size_list2[0]
+ delta_info = DeltaInfo(file_path, symbol_type, symbol_name,
+ shared1 or shared2)
+ delta_info.old_size = symbol_size
+ delta_info.new_size = size2
+ if symbol_size != size2:
+ # Symbol has change size in bucket.
+ changed.append(delta_info)
+ else:
+ # Symbol is unchanged.
+ unchanged.append(delta_info)
+ else:
+ # Complex comparison for when a symbol exists multiple times
+ # in the same file (where file can be "unknown file").
+ symbol_size_counter = collections.Counter(symbol_size_list)
+ delta_counter = collections.Counter(symbol_size_list)
+ delta_counter.subtract(size_list2)
+ for delta_counter_key in sorted(delta_counter.keys()):
+ delta = delta_counter[delta_counter_key]
+ unchanged_count = symbol_size_counter[delta_counter_key]
+ (symbol_size, shared) = delta_counter_key
+ if delta > 0:
+ unchanged_count -= delta
+ for _ in range(unchanged_count):
+ delta_info = DeltaInfo(file_path, symbol_type,
+ symbol_name, shared)
+ delta_info.old_size = symbol_size
+ delta_info.new_size = symbol_size
+ unchanged.append(delta_info)
+ if delta > 0: # Used to be more of these than there is now.
+ for _ in range(delta):
+ delta_info = DeltaInfo(file_path, symbol_type,
+ symbol_name, shared)
+ delta_info.old_size = symbol_size
+ removed.append(delta_info)
+ elif delta < 0: # More of this (symbol,size) now.
+ for _ in range(-delta):
+ delta_info = DeltaInfo(file_path, symbol_type,
+ symbol_name, shared)
+ delta_info.new_size = symbol_size
+ added.append(delta_info)
+
+ if len(bucket2) == 0:
+ del cache1[key] # Entire bucket is empty, delete from cache2
+
+ # We have now analyzed all symbols that are in cache1 and removed all of
+ # the encountered symbols from cache2. What's left in cache2 is the new
+ # symbols.
+ for key, bucket2 in cache2.iteritems():
+ file_path, symbol_type = key;
+ for symbol_name, symbol_size_list in bucket2.items():
+ for (symbol_size, shared) in symbol_size_list:
+ delta_info = DeltaInfo(file_path, symbol_type, symbol_name, shared)
+ delta_info.new_size = symbol_size
+ added.append(delta_info)
+ return (added, removed, changed, unchanged)
+
+
+def DeltaStr(number):
+ """Returns the number as a string with a '+' prefix if it's > 0 and
+ a '-' prefix if it's < 0."""
+ result = str(number)
+ if number > 0:
+ result = '+' + result
+ return result
+
+
+def SharedInfoStr(symbol_info):
+ """Returns a string (prefixed by space) explaining that numbers are
+ adjusted because of shared space between symbols, or an empty string
+ if space had not been shared."""
+
+ if symbol_info.shares_space_with_other_symbols:
+ return " (adjusted sizes because of memory sharing)"
+
+ return ""
+
+class CrunchStatsData(object):
+ """Stores a summary of data of a certain kind."""
+ def __init__(self, symbols):
+ self.symbols = symbols
+ self.sources = set()
+ self.before_size = 0
+ self.after_size = 0
+ self.symbols_by_path = {}
+
+
+def CrunchStats(added, removed, changed, unchanged, showsources, showsymbols):
+ """Outputs to stdout a summary of changes based on the symbol lists."""
+ # Split changed into grown and shrunk because that is easier to
+ # discuss.
+ grown = []
+ shrunk = []
+ for item in changed:
+ if item.old_size < item.new_size:
+ grown.append(item)
+ else:
+ shrunk.append(item)
+
+ new_symbols = CrunchStatsData(added)
+ removed_symbols = CrunchStatsData(removed)
+ grown_symbols = CrunchStatsData(grown)
+ shrunk_symbols = CrunchStatsData(shrunk)
+ sections = [new_symbols, removed_symbols, grown_symbols, shrunk_symbols]
+ for section in sections:
+ for item in section.symbols:
+ section.sources.add(item.file_path)
+ if item.old_size is not None:
+ section.before_size += item.old_size
+ if item.new_size is not None:
+ section.after_size += item.new_size
+ bucket = section.symbols_by_path.setdefault(item.file_path, [])
+ bucket.append((item.symbol_name, item.symbol_type,
+ item.ExtractSymbolDelta()))
+
+ total_change = sum(s.after_size - s.before_size for s in sections)
+ summary = 'Total change: %s bytes' % DeltaStr(total_change)
+ print(summary)
+ print('=' * len(summary))
+ for section in sections:
+ if not section.symbols:
+ continue
+ if section.before_size == 0:
+ description = ('added, totalling %s bytes' % DeltaStr(section.after_size))
+ elif section.after_size == 0:
+ description = ('removed, totalling %s bytes' %
+ DeltaStr(-section.before_size))
+ else:
+ if section.after_size > section.before_size:
+ type_str = 'grown'
+ else:
+ type_str = 'shrunk'
+ description = ('%s, for a net change of %s bytes '
+ '(%d bytes before, %d bytes after)' %
+ (type_str, DeltaStr(section.after_size - section.before_size),
+ section.before_size, section.after_size))
+ print(' %d %s across %d sources' %
+ (len(section.symbols), description, len(section.sources)))
+
+ maybe_unchanged_sources = set()
+ unchanged_symbols_size = 0
+ for item in unchanged:
+ maybe_unchanged_sources.add(item.file_path)
+ unchanged_symbols_size += item.old_size # == item.new_size
+ print(' %d unchanged, totalling %d bytes' %
+ (len(unchanged), unchanged_symbols_size))
+
+ # High level analysis, always output.
+ unchanged_sources = maybe_unchanged_sources
+ for section in sections:
+ unchanged_sources = unchanged_sources - section.sources
+ new_sources = (new_symbols.sources -
+ maybe_unchanged_sources -
+ removed_symbols.sources)
+ removed_sources = (removed_symbols.sources -
+ maybe_unchanged_sources -
+ new_symbols.sources)
+ partially_changed_sources = (grown_symbols.sources |
+ shrunk_symbols.sources | new_symbols.sources |
+ removed_symbols.sources) - removed_sources - new_sources
+ allFiles = set()
+ for section in sections:
+ allFiles = allFiles | section.sources
+ allFiles = allFiles | maybe_unchanged_sources
+ print 'Source stats:'
+ print(' %d sources encountered.' % len(allFiles))
+ print(' %d completely new.' % len(new_sources))
+ print(' %d removed completely.' % len(removed_sources))
+ print(' %d partially changed.' % len(partially_changed_sources))
+ print(' %d completely unchanged.' % len(unchanged_sources))
+ remainder = (allFiles - new_sources - removed_sources -
+ partially_changed_sources - unchanged_sources)
+ assert len(remainder) == 0
+
+ if not showsources:
+ return # Per-source analysis, only if requested
+ print 'Per-source Analysis:'
+ delta_by_path = {}
+ for section in sections:
+ for path in section.symbols_by_path:
+ entry = delta_by_path.get(path)
+ if not entry:
+ entry = {'plus': 0, 'minus': 0}
+ delta_by_path[path] = entry
+ for symbol_name, symbol_type, symbol_delta in \
+ section.symbols_by_path[path]:
+ if symbol_delta.old_size is None:
+ delta = symbol_delta.new_size
+ elif symbol_delta.new_size is None:
+ delta = -symbol_delta.old_size
+ else:
+ delta = symbol_delta.new_size - symbol_delta.old_size
+
+ if delta > 0:
+ entry['plus'] += delta
+ else:
+ entry['minus'] += (-1 * delta)
+
+ def delta_sort_key(item):
+ _path, size_data = item
+ growth = size_data['plus'] - size_data['minus']
+ return growth
+
+ for path, size_data in sorted(delta_by_path.iteritems(), key=delta_sort_key,
+ reverse=True):
+ gain = size_data['plus']
+ loss = size_data['minus']
+ delta = size_data['plus'] - size_data['minus']
+ header = ' %s - Source: %s - (gained %d, lost %d)' % (DeltaStr(delta),
+ path, gain, loss)
+ divider = '-' * len(header)
+ print ''
+ print divider
+ print header
+ print divider
+ if showsymbols:
+ def ExtractNewSize(tup):
+ symbol_delta = tup[2]
+ return symbol_delta.new_size
+ def ExtractOldSize(tup):
+ symbol_delta = tup[2]
+ return symbol_delta.old_size
+ if path in new_symbols.symbols_by_path:
+ print ' New symbols:'
+ for symbol_name, symbol_type, symbol_delta in \
+ sorted(new_symbols.symbols_by_path[path],
+ key=ExtractNewSize,
+ reverse=True):
+ print (' %8s: %s type=%s, size=%d bytes%s' %
+ (DeltaStr(symbol_delta.new_size), symbol_name, symbol_type,
+ symbol_delta.new_size, SharedInfoStr(symbol_delta)))
+ if path in removed_symbols.symbols_by_path:
+ print ' Removed symbols:'
+ for symbol_name, symbol_type, symbol_delta in \
+ sorted(removed_symbols.symbols_by_path[path],
+ key=ExtractOldSize):
+ print (' %8s: %s type=%s, size=%d bytes%s' %
+ (DeltaStr(-symbol_delta.old_size), symbol_name, symbol_type,
+ symbol_delta.old_size,
+ SharedInfoStr(symbol_delta)))
+ for (changed_symbols_by_path, type_str) in [
+ (grown_symbols.symbols_by_path, "Grown"),
+ (shrunk_symbols.symbols_by_path, "Shrunk")]:
+ if path in changed_symbols_by_path:
+ print ' %s symbols:' % type_str
+ def changed_symbol_sortkey(item):
+ symbol_name, _symbol_type, symbol_delta = item
+ return (symbol_delta.old_size - symbol_delta.new_size, symbol_name)
+ for symbol_name, symbol_type, symbol_delta in \
+ sorted(changed_symbols_by_path[path], key=changed_symbol_sortkey):
+ print (' %8s: %s type=%s, (was %d bytes, now %d bytes)%s'
+ % (DeltaStr(symbol_delta.new_size - symbol_delta.old_size),
+ symbol_name, symbol_type,
+ symbol_delta.old_size, symbol_delta.new_size,
+ SharedInfoStr(symbol_delta)))
+
+
+def main():
+ usage = """%prog [options]
+
+ Analyzes the symbolic differences between two binary files
+ (typically, not necessarily, two different builds of the same
+ library) and produces a detailed description of symbols that have
+ been added, removed, or whose size has changed.
+
+ Example:
+ explain_binary_size_delta.py --nm1 /tmp/nm1.dump --nm2 /tmp/nm2.dump
+
+ Options are available via '--help'.
+ """
+ parser = optparse.OptionParser(usage=usage)
+ parser.add_option('--nm1', metavar='PATH',
+ help='the nm dump of the first library')
+ parser.add_option('--nm2', metavar='PATH',
+ help='the nm dump of the second library')
+ parser.add_option('--showsources', action='store_true', default=False,
+ help='show per-source statistics')
+ parser.add_option('--showsymbols', action='store_true', default=False,
+ help='show all symbol information; implies --showsources')
+ parser.add_option('--verbose', action='store_true', default=False,
+ help='output internal debugging stuff')
+ opts, _args = parser.parse_args()
+
+ if not opts.nm1:
+ parser.error('--nm1 is required')
+ if not opts.nm2:
+ parser.error('--nm2 is required')
+ symbols = []
+ for path in [opts.nm1, opts.nm2]:
+ with file(path, 'r') as nm_input:
+ if opts.verbose:
+ print 'parsing ' + path + '...'
+ symbols.append(list(binary_size_utils.ParseNm(nm_input)))
+ (added, removed, changed, unchanged) = Compare(symbols[0], symbols[1])
+ CrunchStats(added, removed, changed, unchanged,
+ opts.showsources | opts.showsymbols, opts.showsymbols)
+
+if __name__ == '__main__':
+ sys.exit(main())
« no previous file with comments | « runtime/third_party/binary_size/src/elf_symbolizer.py ('k') | runtime/third_party/binary_size/src/run_binary_size_analysis.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698