Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(265)

Unified Diff: tools/binary_size/explain_binary_size_delta.py

Issue 2785483002: Reland of V2 of //tools/binary_size rewrite (diffs). (Closed)
Patch Set: add missing name= Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « tools/binary_size/describe.py ('k') | tools/binary_size/explain_binary_size_delta_unittest.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/binary_size/explain_binary_size_delta.py
diff --git a/tools/binary_size/explain_binary_size_delta.py b/tools/binary_size/explain_binary_size_delta.py
deleted file mode 100755
index 45c1236271f1b9ca843df6cc154a850f26ca5317..0000000000000000000000000000000000000000
--- a/tools/binary_size/explain_binary_size_delta.py
+++ /dev/null
@@ -1,484 +0,0 @@
-#!/usr/bin/env python
-# Copyright 2014 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Describe the size difference of two binaries.
-
-Generates a description of the size difference of two binaries based
-on the difference of the size of various symbols.
-
-This tool needs "nm" dumps of each binary with full symbol
-information. You can obtain the necessary dumps by running the
-run_binary_size_analysis.py script upon each binary, with the
-"--nm-out" parameter set to the location in which you want to save the
-dumps. Example:
-
- # obtain symbol data from first binary in /tmp/nm1.dump
- cd $CHECKOUT1_SRC
- ninja -C out/Release binary_size_tool
- tools/binary_size/run_binary_size_analysis \
- --library <path_to_library>
- --destdir /tmp/throwaway
- --nm-out /tmp/nm1.dump
-
- # obtain symbol data from second binary in /tmp/nm2.dump
- cd $CHECKOUT2_SRC
- ninja -C out/Release binary_size_tool
- tools/binary_size/run_binary_size_analysis \
- --library <path_to_library>
- --destdir /tmp/throwaway
- --nm-out /tmp/nm2.dump
-
- # cleanup useless files
- rm -r /tmp/throwaway
-
- # run this tool
- explain_binary_size_delta.py --nm1 /tmp/nm1.dump --nm2 /tmp/nm2.dump
-"""
-
-import collections
-from collections import Counter
-from math import ceil
-import operator
-import optparse
-import os
-import sys
-
-import binary_size_utils
-
-
-def CalculateSharedAddresses(symbols):
- """Checks how many symbols share the same memory space. This returns a
-Counter result where result[address] will tell you how many times address was
-used by symbols."""
- count = Counter()
- for _, _, _, _, address in symbols:
- count[address] += 1
-
- return count
-
-
-def CalculateEffectiveSize(share_count, address, symbol_size):
- """Given a raw symbol_size and an address, this method returns the
- size we should blame on this symbol considering it might share the
- machine code/data with other symbols. Using the raw symbol_size for
- each symbol would in those cases over estimate the true cost of that
- block.
-
- """
- shared_count = share_count[address]
- if shared_count == 1:
- return symbol_size
-
- assert shared_count > 1
- return int(ceil(symbol_size / float(shared_count)))
-
-class SymbolDelta(object):
- """Stores old size, new size and some metadata."""
- def __init__(self, shared):
- self.old_size = None
- self.new_size = None
- self.shares_space_with_other_symbols = shared
-
- def __eq__(self, other):
- return (self.old_size == other.old_size and
- self.new_size == other.new_size and
- self.shares_space_with_other_symbols ==
- other.shares_space_with_other_symbols)
-
- def __ne__(self, other):
- return not self.__eq__(other)
-
- def copy_symbol_delta(self):
- symbol_delta = SymbolDelta(self.shares_space_with_other_symbols)
- symbol_delta.old_size = self.old_size
- symbol_delta.new_size = self.new_size
- return symbol_delta
-
-class DeltaInfo(SymbolDelta):
- """Summary of a the change for one symbol between two instances."""
- def __init__(self, file_path, symbol_type, symbol_name, shared):
- SymbolDelta.__init__(self, shared)
- self.file_path = file_path
- self.symbol_type = symbol_type
- self.symbol_name = symbol_name
-
- def __eq__(self, other):
- return (self.file_path == other.file_path and
- self.symbol_type == other.symbol_type and
- self.symbol_name == other.symbol_name and
- SymbolDelta.__eq__(self, other))
-
- def __ne__(self, other):
- return not self.__eq__(other)
-
- def ExtractSymbolDelta(self):
- """Returns a copy of the SymbolDelta for this DeltaInfo."""
- return SymbolDelta.copy_symbol_delta(self)
-
-def Compare(symbols1, symbols2):
- """Executes a comparison of the symbols in symbols1 and symbols2.
-
- Returns:
- tuple of lists: (added_symbols, removed_symbols, changed_symbols, others)
- where each list contains DeltaInfo objects.
- """
- added = [] # tuples
- removed = [] # tuples
- changed = [] # tuples
- unchanged = [] # tuples
-
- cache1 = {}
- cache2 = {}
- # Make a map of (file, symbol_type) : (symbol_name, effective_symbol_size)
- share_count1 = CalculateSharedAddresses(symbols1)
- share_count2 = CalculateSharedAddresses(symbols2)
- for cache, symbols, share_count in ((cache1, symbols1, share_count1),
- (cache2, symbols2, share_count2)):
- for symbol_name, symbol_type, symbol_size, file_path, address in symbols:
- if 'vtable for ' in symbol_name:
- symbol_type = '@' # hack to categorize these separately
- if file_path:
- file_path = os.path.normpath(file_path)
- if sys.platform.startswith('win'):
- file_path = file_path.replace('\\', '/')
- else:
- file_path = '(No Path)'
- # Take into consideration that multiple symbols might share the same
- # block of code.
- effective_symbol_size = CalculateEffectiveSize(share_count, address,
- symbol_size)
- key = (file_path, symbol_type)
- bucket = cache.setdefault(key, {})
- size_list = bucket.setdefault(symbol_name, [])
- size_list.append((effective_symbol_size,
- effective_symbol_size != symbol_size))
-
- # Now diff them. We iterate over the elements in cache1. For each symbol
- # that we find in cache2, we record whether it was deleted, changed, or
- # unchanged. We then remove it from cache2; all the symbols that remain
- # in cache2 at the end of the iteration over cache1 are the 'new' symbols.
- for key, bucket1 in cache1.items():
- bucket2 = cache2.get(key)
- file_path, symbol_type = key;
- if not bucket2:
- # A file was removed. Everything in bucket1 is dead.
- for symbol_name, symbol_size_list in bucket1.items():
- for (symbol_size, shared) in symbol_size_list:
- delta_info = DeltaInfo(file_path, symbol_type, symbol_name, shared)
- delta_info.old_size = symbol_size
- removed.append(delta_info)
- else:
- # File still exists, look for changes within.
- for symbol_name, symbol_size_list in bucket1.items():
- size_list2 = bucket2.get(symbol_name)
- if size_list2 is None:
- # Symbol no longer exists in bucket2.
- for (symbol_size, shared) in symbol_size_list:
- delta_info = DeltaInfo(file_path, symbol_type, symbol_name, shared)
- delta_info.old_size = symbol_size
- removed.append(delta_info)
- else:
- del bucket2[symbol_name] # Symbol is not new, delete from cache2.
- if len(symbol_size_list) == 1 and len(size_list2) == 1:
- symbol_size, shared1 = symbol_size_list[0]
- size2, shared2 = size_list2[0]
- delta_info = DeltaInfo(file_path, symbol_type, symbol_name,
- shared1 or shared2)
- delta_info.old_size = symbol_size
- delta_info.new_size = size2
- if symbol_size != size2:
- # Symbol has change size in bucket.
- changed.append(delta_info)
- else:
- # Symbol is unchanged.
- unchanged.append(delta_info)
- else:
- # Complex comparison for when a symbol exists multiple times
- # in the same file (where file can be "unknown file").
- symbol_size_counter = collections.Counter(symbol_size_list)
- delta_counter = collections.Counter(symbol_size_list)
- delta_counter.subtract(size_list2)
- for delta_counter_key in sorted(delta_counter.keys()):
- delta = delta_counter[delta_counter_key]
- unchanged_count = symbol_size_counter[delta_counter_key]
- (symbol_size, shared) = delta_counter_key
- if delta > 0:
- unchanged_count -= delta
- for _ in range(unchanged_count):
- delta_info = DeltaInfo(file_path, symbol_type,
- symbol_name, shared)
- delta_info.old_size = symbol_size
- delta_info.new_size = symbol_size
- unchanged.append(delta_info)
- if delta > 0: # Used to be more of these than there is now.
- for _ in range(delta):
- delta_info = DeltaInfo(file_path, symbol_type,
- symbol_name, shared)
- delta_info.old_size = symbol_size
- removed.append(delta_info)
- elif delta < 0: # More of this (symbol,size) now.
- for _ in range(-delta):
- delta_info = DeltaInfo(file_path, symbol_type,
- symbol_name, shared)
- delta_info.new_size = symbol_size
- added.append(delta_info)
-
- if len(bucket2) == 0:
- del cache1[key] # Entire bucket is empty, delete from cache2
-
- # We have now analyzed all symbols that are in cache1 and removed all of
- # the encountered symbols from cache2. What's left in cache2 is the new
- # symbols.
- for key, bucket2 in cache2.iteritems():
- file_path, symbol_type = key;
- for symbol_name, symbol_size_list in bucket2.items():
- for (symbol_size, shared) in symbol_size_list:
- delta_info = DeltaInfo(file_path, symbol_type, symbol_name, shared)
- delta_info.new_size = symbol_size
- added.append(delta_info)
- return (added, removed, changed, unchanged)
-
-
-def DeltaStr(number):
- """Returns the number as a string with a '+' prefix if it's > 0 and
- a '-' prefix if it's < 0."""
- result = str(number)
- if number > 0:
- result = '+' + result
- return result
-
-
-def SharedInfoStr(symbol_info):
- """Returns a string (prefixed by space) explaining that numbers are
- adjusted because of shared space between symbols, or an empty string
- if space had not been shared."""
-
- if symbol_info.shares_space_with_other_symbols:
- return " (adjusted sizes because of memory sharing)"
-
- return ""
-
-class CrunchStatsData(object):
- """Stores a summary of data of a certain kind."""
- def __init__(self, symbols):
- self.symbols = symbols
- self.sources = set()
- self.before_size = 0
- self.after_size = 0
- self.symbols_by_path = {}
-
-
-def CrunchStats(added, removed, changed, unchanged, showsources, showsymbols):
- """Outputs to stdout a summary of changes based on the symbol lists."""
- # Split changed into grown and shrunk because that is easier to
- # discuss.
- grown = []
- shrunk = []
- for item in changed:
- if item.old_size < item.new_size:
- grown.append(item)
- else:
- shrunk.append(item)
-
- new_symbols = CrunchStatsData(added)
- removed_symbols = CrunchStatsData(removed)
- grown_symbols = CrunchStatsData(grown)
- shrunk_symbols = CrunchStatsData(shrunk)
- sections = [new_symbols, removed_symbols, grown_symbols, shrunk_symbols]
- for section in sections:
- for item in section.symbols:
- section.sources.add(item.file_path)
- if item.old_size is not None:
- section.before_size += item.old_size
- if item.new_size is not None:
- section.after_size += item.new_size
- bucket = section.symbols_by_path.setdefault(item.file_path, [])
- bucket.append((item.symbol_name, item.symbol_type,
- item.ExtractSymbolDelta()))
-
- total_change = sum(s.after_size - s.before_size for s in sections)
- summary = 'Total change: %s bytes' % DeltaStr(total_change)
- print(summary)
- print('=' * len(summary))
- for section in sections:
- if not section.symbols:
- continue
- if section.before_size == 0:
- description = ('added, totalling %s bytes' % DeltaStr(section.after_size))
- elif section.after_size == 0:
- description = ('removed, totalling %s bytes' %
- DeltaStr(-section.before_size))
- else:
- if section.after_size > section.before_size:
- type_str = 'grown'
- else:
- type_str = 'shrunk'
- description = ('%s, for a net change of %s bytes '
- '(%d bytes before, %d bytes after)' %
- (type_str, DeltaStr(section.after_size - section.before_size),
- section.before_size, section.after_size))
- print(' %d %s across %d sources' %
- (len(section.symbols), description, len(section.sources)))
-
- maybe_unchanged_sources = set()
- unchanged_symbols_size = 0
- for item in unchanged:
- maybe_unchanged_sources.add(item.file_path)
- unchanged_symbols_size += item.old_size # == item.new_size
- print(' %d unchanged, totalling %d bytes' %
- (len(unchanged), unchanged_symbols_size))
-
- # High level analysis, always output.
- unchanged_sources = maybe_unchanged_sources
- for section in sections:
- unchanged_sources = unchanged_sources - section.sources
- new_sources = (new_symbols.sources -
- maybe_unchanged_sources -
- removed_symbols.sources)
- removed_sources = (removed_symbols.sources -
- maybe_unchanged_sources -
- new_symbols.sources)
- partially_changed_sources = (grown_symbols.sources |
- shrunk_symbols.sources | new_symbols.sources |
- removed_symbols.sources) - removed_sources - new_sources
- allFiles = set()
- for section in sections:
- allFiles = allFiles | section.sources
- allFiles = allFiles | maybe_unchanged_sources
- print 'Source stats:'
- print(' %d sources encountered.' % len(allFiles))
- print(' %d completely new.' % len(new_sources))
- print(' %d removed completely.' % len(removed_sources))
- print(' %d partially changed.' % len(partially_changed_sources))
- print(' %d completely unchanged.' % len(unchanged_sources))
- remainder = (allFiles - new_sources - removed_sources -
- partially_changed_sources - unchanged_sources)
- assert len(remainder) == 0
-
- if not showsources:
- return # Per-source analysis, only if requested
- print 'Per-source Analysis:'
- delta_by_path = {}
- for section in sections:
- for path in section.symbols_by_path:
- entry = delta_by_path.get(path)
- if not entry:
- entry = {'plus': 0, 'minus': 0}
- delta_by_path[path] = entry
- for symbol_name, symbol_type, symbol_delta in \
- section.symbols_by_path[path]:
- if symbol_delta.old_size is None:
- delta = symbol_delta.new_size
- elif symbol_delta.new_size is None:
- delta = -symbol_delta.old_size
- else:
- delta = symbol_delta.new_size - symbol_delta.old_size
-
- if delta > 0:
- entry['plus'] += delta
- else:
- entry['minus'] += (-1 * delta)
-
- def delta_sort_key(item):
- _path, size_data = item
- growth = size_data['plus'] - size_data['minus']
- return growth
-
- for path, size_data in sorted(delta_by_path.iteritems(), key=delta_sort_key,
- reverse=True):
- gain = size_data['plus']
- loss = size_data['minus']
- delta = size_data['plus'] - size_data['minus']
- header = ' %s - Source: %s - (gained %d, lost %d)' % (DeltaStr(delta),
- path, gain, loss)
- divider = '-' * len(header)
- print ''
- print divider
- print header
- print divider
- if showsymbols:
- def ExtractNewSize(tup):
- symbol_delta = tup[2]
- return symbol_delta.new_size
- def ExtractOldSize(tup):
- symbol_delta = tup[2]
- return symbol_delta.old_size
- if path in new_symbols.symbols_by_path:
- print ' New symbols:'
- for symbol_name, symbol_type, symbol_delta in \
- sorted(new_symbols.symbols_by_path[path],
- key=ExtractNewSize,
- reverse=True):
- print (' %8s: %s type=%s, size=%d bytes%s' %
- (DeltaStr(symbol_delta.new_size), symbol_name, symbol_type,
- symbol_delta.new_size, SharedInfoStr(symbol_delta)))
- if path in removed_symbols.symbols_by_path:
- print ' Removed symbols:'
- for symbol_name, symbol_type, symbol_delta in \
- sorted(removed_symbols.symbols_by_path[path],
- key=ExtractOldSize):
- print (' %8s: %s type=%s, size=%d bytes%s' %
- (DeltaStr(-symbol_delta.old_size), symbol_name, symbol_type,
- symbol_delta.old_size,
- SharedInfoStr(symbol_delta)))
- for (changed_symbols_by_path, type_str) in [
- (grown_symbols.symbols_by_path, "Grown"),
- (shrunk_symbols.symbols_by_path, "Shrunk")]:
- if path in changed_symbols_by_path:
- print ' %s symbols:' % type_str
- def changed_symbol_sortkey(item):
- symbol_name, _symbol_type, symbol_delta = item
- return (symbol_delta.old_size - symbol_delta.new_size, symbol_name)
- for symbol_name, symbol_type, symbol_delta in \
- sorted(changed_symbols_by_path[path], key=changed_symbol_sortkey):
- print (' %8s: %s type=%s, (was %d bytes, now %d bytes)%s'
- % (DeltaStr(symbol_delta.new_size - symbol_delta.old_size),
- symbol_name, symbol_type,
- symbol_delta.old_size, symbol_delta.new_size,
- SharedInfoStr(symbol_delta)))
-
-
-def main():
- usage = """%prog [options]
-
- Analyzes the symbolic differences between two binary files
- (typically, not necessarily, two different builds of the same
- library) and produces a detailed description of symbols that have
- been added, removed, or whose size has changed.
-
- Example:
- explain_binary_size_delta.py --nm1 /tmp/nm1.dump --nm2 /tmp/nm2.dump
-
- Options are available via '--help'.
- """
- parser = optparse.OptionParser(usage=usage)
- parser.add_option('--nm1', metavar='PATH',
- help='the nm dump of the first library')
- parser.add_option('--nm2', metavar='PATH',
- help='the nm dump of the second library')
- parser.add_option('--showsources', action='store_true', default=False,
- help='show per-source statistics')
- parser.add_option('--showsymbols', action='store_true', default=False,
- help='show all symbol information; implies --showsources')
- parser.add_option('--verbose', action='store_true', default=False,
- help='output internal debugging stuff')
- opts, _args = parser.parse_args()
-
- if not opts.nm1:
- parser.error('--nm1 is required')
- if not opts.nm2:
- parser.error('--nm2 is required')
- symbols = []
- for path in [opts.nm1, opts.nm2]:
- with file(path, 'r') as nm_input:
- if opts.verbose:
- print 'parsing ' + path + '...'
- symbols.append(list(binary_size_utils.ParseNm(nm_input)))
- (added, removed, changed, unchanged) = Compare(symbols[0], symbols[1])
- CrunchStats(added, removed, changed, unchanged,
- opts.showsources | opts.showsymbols, opts.showsymbols)
-
-if __name__ == '__main__':
- sys.exit(main())
« no previous file with comments | « tools/binary_size/describe.py ('k') | tools/binary_size/explain_binary_size_delta_unittest.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698