runtime/third_party/binary_size/src/explain_binary_size_delta.py - Issue 1754443002: - Add binary size analysis tool from Chromium.

Unified Diff: runtime/third_party/binary_size/src/explain_binary_size_delta.py

Issue 1754443002: - Add binary size analysis tool from Chromium. (Closed) Base URL: git@github.com:dart-lang/sdk.git@master

Patch Set: Created 4 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « runtime/third_party/binary_size/src/elf_symbolizer.py ('k') | runtime/third_party/binary_size/src/run_binary_size_analysis.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: runtime/third_party/binary_size/src/explain_binary_size_delta.py

diff --git a/runtime/third_party/binary_size/src/explain_binary_size_delta.py b/runtime/third_party/binary_size/src/explain_binary_size_delta.py

new file mode 100755

index 0000000000000000000000000000000000000000..45c1236271f1b9ca843df6cc154a850f26ca5317

--- /dev/null

+++ b/runtime/third_party/binary_size/src/explain_binary_size_delta.py

@@ -0,0 +1,484 @@

+#!/usr/bin/env python

+# Use of this source code is governed by a BSD-style license that can be

+# found in the LICENSE file.

+"""Describe the size difference of two binaries.

+Generates a description of the size difference of two binaries based

+on the difference of the size of various symbols.

+This tool needs "nm" dumps of each binary with full symbol

+information. You can obtain the necessary dumps by running the

+run_binary_size_analysis.py script upon each binary, with the

+"--nm-out" parameter set to the location in which you want to save the

+dumps. Example:

+ # obtain symbol data from first binary in /tmp/nm1.dump

+ cd $CHECKOUT1_SRC

+ ninja -C out/Release binary_size_tool

+ tools/binary_size/run_binary_size_analysis \

+ --library <path_to_library>

+ --destdir /tmp/throwaway

+ --nm-out /tmp/nm1.dump

+ # obtain symbol data from second binary in /tmp/nm2.dump

+ cd $CHECKOUT2_SRC

+ ninja -C out/Release binary_size_tool

+ tools/binary_size/run_binary_size_analysis \

+ --library <path_to_library>

+ --destdir /tmp/throwaway

+ --nm-out /tmp/nm2.dump

+ # cleanup useless files

+ rm -r /tmp/throwaway

+ # run this tool

+ explain_binary_size_delta.py --nm1 /tmp/nm1.dump --nm2 /tmp/nm2.dump

+"""

+import collections

+from collections import Counter

+from math import ceil

+import operator

+import optparse

+import os

+import sys

+import binary_size_utils

+def CalculateSharedAddresses(symbols):

+ """Checks how many symbols share the same memory space. This returns a

+Counter result where result[address] will tell you how many times address was

+used by symbols."""

+ count = Counter()

+ for _, _, _, _, address in symbols:

+ count[address] += 1

+ return count

+def CalculateEffectiveSize(share_count, address, symbol_size):

+ """Given a raw symbol_size and an address, this method returns the

+ size we should blame on this symbol considering it might share the

+ machine code/data with other symbols. Using the raw symbol_size for

+ each symbol would in those cases over estimate the true cost of that

+ block.

+ """

+ shared_count = share_count[address]

+ if shared_count == 1:

+ return symbol_size

+ assert shared_count > 1

+ return int(ceil(symbol_size / float(shared_count)))

+class SymbolDelta(object):

+ """Stores old size, new size and some metadata."""

+ def __init__(self, shared):

+ self.old_size = None

+ self.new_size = None

+ self.shares_space_with_other_symbols = shared

+ def __eq__(self, other):

+ return (self.old_size == other.old_size and

+ self.new_size == other.new_size and

+ self.shares_space_with_other_symbols ==

+ other.shares_space_with_other_symbols)

+ def __ne__(self, other):

+ return not self.__eq__(other)

+ def copy_symbol_delta(self):

+ symbol_delta = SymbolDelta(self.shares_space_with_other_symbols)

+ symbol_delta.old_size = self.old_size

+ symbol_delta.new_size = self.new_size

+ return symbol_delta

+class DeltaInfo(SymbolDelta):

+ """Summary of a the change for one symbol between two instances."""

+ def __init__(self, file_path, symbol_type, symbol_name, shared):

+ SymbolDelta.__init__(self, shared)

+ self.file_path = file_path

+ self.symbol_type = symbol_type

+ self.symbol_name = symbol_name

+ def __eq__(self, other):

+ return (self.file_path == other.file_path and

+ self.symbol_type == other.symbol_type and

+ self.symbol_name == other.symbol_name and

+ SymbolDelta.__eq__(self, other))

+ def __ne__(self, other):

+ return not self.__eq__(other)

+ def ExtractSymbolDelta(self):

+ """Returns a copy of the SymbolDelta for this DeltaInfo."""

+ return SymbolDelta.copy_symbol_delta(self)

+def Compare(symbols1, symbols2):

+ """Executes a comparison of the symbols in symbols1 and symbols2.

+ Returns:

+ tuple of lists: (added_symbols, removed_symbols, changed_symbols, others)

+ where each list contains DeltaInfo objects.

+ """

+ added = [] # tuples

+ removed = [] # tuples

+ changed = [] # tuples

+ unchanged = [] # tuples

+ cache1 = {}

+ cache2 = {}

+ # Make a map of (file, symbol_type) : (symbol_name, effective_symbol_size)

+ share_count1 = CalculateSharedAddresses(symbols1)

+ share_count2 = CalculateSharedAddresses(symbols2)

+ for cache, symbols, share_count in ((cache1, symbols1, share_count1),

+ (cache2, symbols2, share_count2)):

+ for symbol_name, symbol_type, symbol_size, file_path, address in symbols:

+ if 'vtable for ' in symbol_name:

+ symbol_type = '@' # hack to categorize these separately

+ if file_path:

+ file_path = os.path.normpath(file_path)

+ if sys.platform.startswith('win'):

+ file_path = file_path.replace('\\', '/')

+ else:

+ file_path = '(No Path)'

+ # Take into consideration that multiple symbols might share the same

+ # block of code.

+ effective_symbol_size = CalculateEffectiveSize(share_count, address,

+ symbol_size)

+ key = (file_path, symbol_type)

+ bucket = cache.setdefault(key, {})

+ size_list = bucket.setdefault(symbol_name, [])

+ size_list.append((effective_symbol_size,

+ effective_symbol_size != symbol_size))

+ # Now diff them. We iterate over the elements in cache1. For each symbol

+ # that we find in cache2, we record whether it was deleted, changed, or

+ # unchanged. We then remove it from cache2; all the symbols that remain

+ # in cache2 at the end of the iteration over cache1 are the 'new' symbols.

+ for key, bucket1 in cache1.items():

+ bucket2 = cache2.get(key)

+ file_path, symbol_type = key;

+ if not bucket2:

+ # A file was removed. Everything in bucket1 is dead.

+ for symbol_name, symbol_size_list in bucket1.items():

+ for (symbol_size, shared) in symbol_size_list:

+ delta_info = DeltaInfo(file_path, symbol_type, symbol_name, shared)

+ delta_info.old_size = symbol_size

+ removed.append(delta_info)

+ else:

+ # File still exists, look for changes within.

+ for symbol_name, symbol_size_list in bucket1.items():

+ size_list2 = bucket2.get(symbol_name)

+ if size_list2 is None:

+ # Symbol no longer exists in bucket2.

+ for (symbol_size, shared) in symbol_size_list:

+ delta_info = DeltaInfo(file_path, symbol_type, symbol_name, shared)

+ delta_info.old_size = symbol_size

+ removed.append(delta_info)

+ else:

+ del bucket2[symbol_name] # Symbol is not new, delete from cache2.

+ if len(symbol_size_list) == 1 and len(size_list2) == 1:

+ symbol_size, shared1 = symbol_size_list[0]

+ size2, shared2 = size_list2[0]

+ delta_info = DeltaInfo(file_path, symbol_type, symbol_name,

+ shared1 or shared2)

+ delta_info.old_size = symbol_size

+ delta_info.new_size = size2

+ if symbol_size != size2:

+ # Symbol has change size in bucket.

+ changed.append(delta_info)

+ else:

+ # Symbol is unchanged.

+ unchanged.append(delta_info)

+ else:

+ # Complex comparison for when a symbol exists multiple times

+ # in the same file (where file can be "unknown file").

+ symbol_size_counter = collections.Counter(symbol_size_list)

+ delta_counter = collections.Counter(symbol_size_list)

+ delta_counter.subtract(size_list2)

+ for delta_counter_key in sorted(delta_counter.keys()):

+ delta = delta_counter[delta_counter_key]

+ unchanged_count = symbol_size_counter[delta_counter_key]

+ (symbol_size, shared) = delta_counter_key

+ if delta > 0:

+ unchanged_count -= delta

+ for _ in range(unchanged_count):

+ delta_info = DeltaInfo(file_path, symbol_type,

+ symbol_name, shared)

+ delta_info.old_size = symbol_size

+ delta_info.new_size = symbol_size

+ unchanged.append(delta_info)

+ if delta > 0: # Used to be more of these than there is now.

+ for _ in range(delta):

+ delta_info = DeltaInfo(file_path, symbol_type,

+ symbol_name, shared)

+ delta_info.old_size = symbol_size

+ removed.append(delta_info)

+ elif delta < 0: # More of this (symbol,size) now.

+ for _ in range(-delta):

+ delta_info = DeltaInfo(file_path, symbol_type,

+ symbol_name, shared)

+ delta_info.new_size = symbol_size

+ added.append(delta_info)

+ if len(bucket2) == 0:

+ del cache1[key] # Entire bucket is empty, delete from cache2

+ # We have now analyzed all symbols that are in cache1 and removed all of

+ # the encountered symbols from cache2. What's left in cache2 is the new

+ # symbols.

+ for key, bucket2 in cache2.iteritems():

+ file_path, symbol_type = key;

+ for symbol_name, symbol_size_list in bucket2.items():

+ for (symbol_size, shared) in symbol_size_list:

+ delta_info = DeltaInfo(file_path, symbol_type, symbol_name, shared)

+ delta_info.new_size = symbol_size

+ added.append(delta_info)

+ return (added, removed, changed, unchanged)

+def DeltaStr(number):

+ """Returns the number as a string with a '+' prefix if it's > 0 and

+ a '-' prefix if it's < 0."""

+ result = str(number)

+ if number > 0:

+ result = '+' + result

+ return result

+def SharedInfoStr(symbol_info):

+ """Returns a string (prefixed by space) explaining that numbers are

+ adjusted because of shared space between symbols, or an empty string

+ if space had not been shared."""

+ if symbol_info.shares_space_with_other_symbols:

+ return " (adjusted sizes because of memory sharing)"

+ return ""

+class CrunchStatsData(object):

+ """Stores a summary of data of a certain kind."""

+ def __init__(self, symbols):

+ self.symbols = symbols

+ self.sources = set()

+ self.before_size = 0

+ self.after_size = 0

+ self.symbols_by_path = {}

+def CrunchStats(added, removed, changed, unchanged, showsources, showsymbols):

+ """Outputs to stdout a summary of changes based on the symbol lists."""

+ # Split changed into grown and shrunk because that is easier to

+ # discuss.

+ grown = []

+ shrunk = []

+ for item in changed:

+ if item.old_size < item.new_size:

+ grown.append(item)

+ else:

+ shrunk.append(item)

+ new_symbols = CrunchStatsData(added)

+ removed_symbols = CrunchStatsData(removed)

+ grown_symbols = CrunchStatsData(grown)

+ shrunk_symbols = CrunchStatsData(shrunk)

+ sections = [new_symbols, removed_symbols, grown_symbols, shrunk_symbols]

+ for section in sections:

+ for item in section.symbols:

+ section.sources.add(item.file_path)

+ if item.old_size is not None:

+ section.before_size += item.old_size

+ if item.new_size is not None:

+ section.after_size += item.new_size

+ bucket = section.symbols_by_path.setdefault(item.file_path, [])

+ bucket.append((item.symbol_name, item.symbol_type,

+ item.ExtractSymbolDelta()))

+ total_change = sum(s.after_size - s.before_size for s in sections)

+ summary = 'Total change: %s bytes' % DeltaStr(total_change)

+ print(summary)

+ print('=' * len(summary))

+ for section in sections:

+ if not section.symbols:

+ continue

+ if section.before_size == 0:

+ description = ('added, totalling %s bytes' % DeltaStr(section.after_size))

+ elif section.after_size == 0:

+ description = ('removed, totalling %s bytes' %

+ DeltaStr(-section.before_size))

+ else:

+ if section.after_size > section.before_size:

+ type_str = 'grown'

+ else:

+ type_str = 'shrunk'

+ description = ('%s, for a net change of %s bytes '

+ '(%d bytes before, %d bytes after)' %

+ (type_str, DeltaStr(section.after_size - section.before_size),

+ section.before_size, section.after_size))

+ print(' %d %s across %d sources' %

+ (len(section.symbols), description, len(section.sources)))

+ maybe_unchanged_sources = set()

+ unchanged_symbols_size = 0

+ for item in unchanged:

+ maybe_unchanged_sources.add(item.file_path)

+ unchanged_symbols_size += item.old_size # == item.new_size

+ print(' %d unchanged, totalling %d bytes' %

+ (len(unchanged), unchanged_symbols_size))

+ # High level analysis, always output.

+ unchanged_sources = maybe_unchanged_sources

+ for section in sections:

+ unchanged_sources = unchanged_sources - section.sources

+ new_sources = (new_symbols.sources -

+ maybe_unchanged_sources -

+ removed_symbols.sources)

+ removed_sources = (removed_symbols.sources -

+ maybe_unchanged_sources -

+ new_symbols.sources)

+ partially_changed_sources = (grown_symbols.sources |

+ shrunk_symbols.sources | new_symbols.sources |

+ removed_symbols.sources) - removed_sources - new_sources

+ allFiles = set()

+ for section in sections:

+ allFiles = allFiles | section.sources

+ allFiles = allFiles | maybe_unchanged_sources

+ print 'Source stats:'

+ print(' %d sources encountered.' % len(allFiles))

+ print(' %d completely new.' % len(new_sources))

+ print(' %d removed completely.' % len(removed_sources))

+ print(' %d partially changed.' % len(partially_changed_sources))

+ print(' %d completely unchanged.' % len(unchanged_sources))

+ remainder = (allFiles - new_sources - removed_sources -

+ partially_changed_sources - unchanged_sources)

+ assert len(remainder) == 0

+ if not showsources:

+ return # Per-source analysis, only if requested

+ print 'Per-source Analysis:'

+ delta_by_path = {}

+ for section in sections:

+ for path in section.symbols_by_path:

+ entry = delta_by_path.get(path)

+ if not entry:

+ entry = {'plus': 0, 'minus': 0}

+ delta_by_path[path] = entry

+ for symbol_name, symbol_type, symbol_delta in \

+ section.symbols_by_path[path]:

+ if symbol_delta.old_size is None:

+ delta = symbol_delta.new_size

+ elif symbol_delta.new_size is None:

+ delta = -symbol_delta.old_size

+ else:

+ delta = symbol_delta.new_size - symbol_delta.old_size

+ if delta > 0:

+ entry['plus'] += delta

+ else:

+ entry['minus'] += (-1 * delta)

+ def delta_sort_key(item):

+ _path, size_data = item

+ growth = size_data['plus'] - size_data['minus']

+ return growth

+ for path, size_data in sorted(delta_by_path.iteritems(), key=delta_sort_key,

+ reverse=True):

+ gain = size_data['plus']

+ loss = size_data['minus']

+ delta = size_data['plus'] - size_data['minus']

+ header = ' %s - Source: %s - (gained %d, lost %d)' % (DeltaStr(delta),

+ path, gain, loss)

+ divider = '-' * len(header)

+ print ''

+ print divider

+ print header

+ print divider

+ if showsymbols:

+ def ExtractNewSize(tup):

+ symbol_delta = tup[2]

+ return symbol_delta.new_size

+ def ExtractOldSize(tup):

+ symbol_delta = tup[2]

+ return symbol_delta.old_size

+ if path in new_symbols.symbols_by_path:

+ print ' New symbols:'

+ for symbol_name, symbol_type, symbol_delta in \

+ sorted(new_symbols.symbols_by_path[path],

+ key=ExtractNewSize,

+ reverse=True):

+ print (' %8s: %s type=%s, size=%d bytes%s' %

+ (DeltaStr(symbol_delta.new_size), symbol_name, symbol_type,

+ symbol_delta.new_size, SharedInfoStr(symbol_delta)))

+ if path in removed_symbols.symbols_by_path:

+ print ' Removed symbols:'

+ for symbol_name, symbol_type, symbol_delta in \

+ sorted(removed_symbols.symbols_by_path[path],

+ key=ExtractOldSize):

+ print (' %8s: %s type=%s, size=%d bytes%s' %

+ (DeltaStr(-symbol_delta.old_size), symbol_name, symbol_type,

+ symbol_delta.old_size,

+ SharedInfoStr(symbol_delta)))

+ for (changed_symbols_by_path, type_str) in [

+ (grown_symbols.symbols_by_path, "Grown"),

+ (shrunk_symbols.symbols_by_path, "Shrunk")]:

+ if path in changed_symbols_by_path:

+ print ' %s symbols:' % type_str

+ def changed_symbol_sortkey(item):

+ symbol_name, _symbol_type, symbol_delta = item

+ return (symbol_delta.old_size - symbol_delta.new_size, symbol_name)

+ for symbol_name, symbol_type, symbol_delta in \

+ sorted(changed_symbols_by_path[path], key=changed_symbol_sortkey):

+ print (' %8s: %s type=%s, (was %d bytes, now %d bytes)%s'

+ % (DeltaStr(symbol_delta.new_size - symbol_delta.old_size),

+ symbol_name, symbol_type,

+ symbol_delta.old_size, symbol_delta.new_size,

+ SharedInfoStr(symbol_delta)))

+def main():

+ usage = """%prog [options]

+ Analyzes the symbolic differences between two binary files

+ (typically, not necessarily, two different builds of the same

+ library) and produces a detailed description of symbols that have

+ been added, removed, or whose size has changed.

+ Example:

+ explain_binary_size_delta.py --nm1 /tmp/nm1.dump --nm2 /tmp/nm2.dump

+ Options are available via '--help'.

+ """

+ parser = optparse.OptionParser(usage=usage)

+ parser.add_option('--nm1', metavar='PATH',

+ help='the nm dump of the first library')

+ parser.add_option('--nm2', metavar='PATH',

+ help='the nm dump of the second library')

+ parser.add_option('--showsources', action='store_true', default=False,

+ help='show per-source statistics')

+ parser.add_option('--showsymbols', action='store_true', default=False,

+ help='show all symbol information; implies --showsources')

+ parser.add_option('--verbose', action='store_true', default=False,

+ help='output internal debugging stuff')

+ opts, _args = parser.parse_args()

+ if not opts.nm1:

+ parser.error('--nm1 is required')

+ if not opts.nm2:

+ parser.error('--nm2 is required')

+ symbols = []

+ for path in [opts.nm1, opts.nm2]:

+ with file(path, 'r') as nm_input:

+ if opts.verbose:

+ print 'parsing ' + path + '...'

+ symbols.append(list(binary_size_utils.ParseNm(nm_input)))

+ (added, removed, changed, unchanged) = Compare(symbols[0], symbols[1])

+ CrunchStats(added, removed, changed, unchanged,

+ opts.showsources | opts.showsymbols, opts.showsymbols)

+if __name__ == '__main__':

+ sys.exit(main())