| OLD | NEW |
| (Empty) | |
| 1 #!/usr/bin/env python |
| 2 # Copyright 2014 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. |
| 5 |
| 6 """Describe the size difference of two binaries. |
| 7 |
| 8 Generates a description of the size difference of two binaries based |
| 9 on the difference of the size of various symbols. |
| 10 |
| 11 This tool needs "nm" dumps of each binary with full symbol |
| 12 information. You can obtain the necessary dumps by running the |
| 13 run_binary_size_analysis.py script upon each binary, with the |
| 14 "--nm-out" parameter set to the location in which you want to save the |
| 15 dumps. Example: |
| 16 |
| 17 # obtain symbol data from first binary in /tmp/nm1.dump |
| 18 cd $CHECKOUT1_SRC |
| 19 ninja -C out/Release binary_size_tool |
| 20 tools/binary_size/run_binary_size_analysis \ |
| 21 --library <path_to_library> |
| 22 --destdir /tmp/throwaway |
| 23 --nm-out /tmp/nm1.dump |
| 24 |
| 25 # obtain symbol data from second binary in /tmp/nm2.dump |
| 26 cd $CHECKOUT2_SRC |
| 27 ninja -C out/Release binary_size_tool |
| 28 tools/binary_size/run_binary_size_analysis \ |
| 29 --library <path_to_library> |
| 30 --destdir /tmp/throwaway |
| 31 --nm-out /tmp/nm2.dump |
| 32 |
| 33 # cleanup useless files |
| 34 rm -r /tmp/throwaway |
| 35 |
| 36 # run this tool |
| 37 explain_binary_size_delta.py --nm1 /tmp/nm1.dump --nm2 /tmp/nm2.dump |
| 38 """ |
| 39 |
| 40 import collections |
| 41 from collections import Counter |
| 42 from math import ceil |
| 43 import operator |
| 44 import optparse |
| 45 import os |
| 46 import sys |
| 47 |
| 48 import binary_size_utils |
| 49 |
| 50 |
| 51 def CalculateSharedAddresses(symbols): |
| 52 """Checks how many symbols share the same memory space. This returns a |
| 53 Counter result where result[address] will tell you how many times address was |
| 54 used by symbols.""" |
| 55 count = Counter() |
| 56 for _, _, _, _, address in symbols: |
| 57 count[address] += 1 |
| 58 |
| 59 return count |
| 60 |
| 61 |
| 62 def CalculateEffectiveSize(share_count, address, symbol_size): |
| 63 """Given a raw symbol_size and an address, this method returns the |
| 64 size we should blame on this symbol considering it might share the |
| 65 machine code/data with other symbols. Using the raw symbol_size for |
| 66 each symbol would in those cases over estimate the true cost of that |
| 67 block. |
| 68 |
| 69 """ |
| 70 shared_count = share_count[address] |
| 71 if shared_count == 1: |
| 72 return symbol_size |
| 73 |
| 74 assert shared_count > 1 |
| 75 return int(ceil(symbol_size / float(shared_count))) |
| 76 |
| 77 class SymbolDelta(object): |
| 78 """Stores old size, new size and some metadata.""" |
| 79 def __init__(self, shared): |
| 80 self.old_size = None |
| 81 self.new_size = None |
| 82 self.shares_space_with_other_symbols = shared |
| 83 |
| 84 def __eq__(self, other): |
| 85 return (self.old_size == other.old_size and |
| 86 self.new_size == other.new_size and |
| 87 self.shares_space_with_other_symbols == |
| 88 other.shares_space_with_other_symbols) |
| 89 |
| 90 def __ne__(self, other): |
| 91 return not self.__eq__(other) |
| 92 |
| 93 def copy_symbol_delta(self): |
| 94 symbol_delta = SymbolDelta(self.shares_space_with_other_symbols) |
| 95 symbol_delta.old_size = self.old_size |
| 96 symbol_delta.new_size = self.new_size |
| 97 return symbol_delta |
| 98 |
| 99 class DeltaInfo(SymbolDelta): |
| 100 """Summary of a the change for one symbol between two instances.""" |
| 101 def __init__(self, file_path, symbol_type, symbol_name, shared): |
| 102 SymbolDelta.__init__(self, shared) |
| 103 self.file_path = file_path |
| 104 self.symbol_type = symbol_type |
| 105 self.symbol_name = symbol_name |
| 106 |
| 107 def __eq__(self, other): |
| 108 return (self.file_path == other.file_path and |
| 109 self.symbol_type == other.symbol_type and |
| 110 self.symbol_name == other.symbol_name and |
| 111 SymbolDelta.__eq__(self, other)) |
| 112 |
| 113 def __ne__(self, other): |
| 114 return not self.__eq__(other) |
| 115 |
| 116 def ExtractSymbolDelta(self): |
| 117 """Returns a copy of the SymbolDelta for this DeltaInfo.""" |
| 118 return SymbolDelta.copy_symbol_delta(self) |
| 119 |
| 120 def Compare(symbols1, symbols2): |
| 121 """Executes a comparison of the symbols in symbols1 and symbols2. |
| 122 |
| 123 Returns: |
| 124 tuple of lists: (added_symbols, removed_symbols, changed_symbols, others) |
| 125 where each list contains DeltaInfo objects. |
| 126 """ |
| 127 added = [] # tuples |
| 128 removed = [] # tuples |
| 129 changed = [] # tuples |
| 130 unchanged = [] # tuples |
| 131 |
| 132 cache1 = {} |
| 133 cache2 = {} |
| 134 # Make a map of (file, symbol_type) : (symbol_name, effective_symbol_size) |
| 135 share_count1 = CalculateSharedAddresses(symbols1) |
| 136 share_count2 = CalculateSharedAddresses(symbols2) |
| 137 for cache, symbols, share_count in ((cache1, symbols1, share_count1), |
| 138 (cache2, symbols2, share_count2)): |
| 139 for symbol_name, symbol_type, symbol_size, file_path, address in symbols: |
| 140 if 'vtable for ' in symbol_name: |
| 141 symbol_type = '@' # hack to categorize these separately |
| 142 if file_path: |
| 143 file_path = os.path.normpath(file_path) |
| 144 if sys.platform.startswith('win'): |
| 145 file_path = file_path.replace('\\', '/') |
| 146 else: |
| 147 file_path = '(No Path)' |
| 148 # Take into consideration that multiple symbols might share the same |
| 149 # block of code. |
| 150 effective_symbol_size = CalculateEffectiveSize(share_count, address, |
| 151 symbol_size) |
| 152 key = (file_path, symbol_type) |
| 153 bucket = cache.setdefault(key, {}) |
| 154 size_list = bucket.setdefault(symbol_name, []) |
| 155 size_list.append((effective_symbol_size, |
| 156 effective_symbol_size != symbol_size)) |
| 157 |
| 158 # Now diff them. We iterate over the elements in cache1. For each symbol |
| 159 # that we find in cache2, we record whether it was deleted, changed, or |
| 160 # unchanged. We then remove it from cache2; all the symbols that remain |
| 161 # in cache2 at the end of the iteration over cache1 are the 'new' symbols. |
| 162 for key, bucket1 in cache1.items(): |
| 163 bucket2 = cache2.get(key) |
| 164 file_path, symbol_type = key; |
| 165 if not bucket2: |
| 166 # A file was removed. Everything in bucket1 is dead. |
| 167 for symbol_name, symbol_size_list in bucket1.items(): |
| 168 for (symbol_size, shared) in symbol_size_list: |
| 169 delta_info = DeltaInfo(file_path, symbol_type, symbol_name, shared) |
| 170 delta_info.old_size = symbol_size |
| 171 removed.append(delta_info) |
| 172 else: |
| 173 # File still exists, look for changes within. |
| 174 for symbol_name, symbol_size_list in bucket1.items(): |
| 175 size_list2 = bucket2.get(symbol_name) |
| 176 if size_list2 is None: |
| 177 # Symbol no longer exists in bucket2. |
| 178 for (symbol_size, shared) in symbol_size_list: |
| 179 delta_info = DeltaInfo(file_path, symbol_type, symbol_name, shared) |
| 180 delta_info.old_size = symbol_size |
| 181 removed.append(delta_info) |
| 182 else: |
| 183 del bucket2[symbol_name] # Symbol is not new, delete from cache2. |
| 184 if len(symbol_size_list) == 1 and len(size_list2) == 1: |
| 185 symbol_size, shared1 = symbol_size_list[0] |
| 186 size2, shared2 = size_list2[0] |
| 187 delta_info = DeltaInfo(file_path, symbol_type, symbol_name, |
| 188 shared1 or shared2) |
| 189 delta_info.old_size = symbol_size |
| 190 delta_info.new_size = size2 |
| 191 if symbol_size != size2: |
| 192 # Symbol has change size in bucket. |
| 193 changed.append(delta_info) |
| 194 else: |
| 195 # Symbol is unchanged. |
| 196 unchanged.append(delta_info) |
| 197 else: |
| 198 # Complex comparison for when a symbol exists multiple times |
| 199 # in the same file (where file can be "unknown file"). |
| 200 symbol_size_counter = collections.Counter(symbol_size_list) |
| 201 delta_counter = collections.Counter(symbol_size_list) |
| 202 delta_counter.subtract(size_list2) |
| 203 for delta_counter_key in sorted(delta_counter.keys()): |
| 204 delta = delta_counter[delta_counter_key] |
| 205 unchanged_count = symbol_size_counter[delta_counter_key] |
| 206 (symbol_size, shared) = delta_counter_key |
| 207 if delta > 0: |
| 208 unchanged_count -= delta |
| 209 for _ in range(unchanged_count): |
| 210 delta_info = DeltaInfo(file_path, symbol_type, |
| 211 symbol_name, shared) |
| 212 delta_info.old_size = symbol_size |
| 213 delta_info.new_size = symbol_size |
| 214 unchanged.append(delta_info) |
| 215 if delta > 0: # Used to be more of these than there is now. |
| 216 for _ in range(delta): |
| 217 delta_info = DeltaInfo(file_path, symbol_type, |
| 218 symbol_name, shared) |
| 219 delta_info.old_size = symbol_size |
| 220 removed.append(delta_info) |
| 221 elif delta < 0: # More of this (symbol,size) now. |
| 222 for _ in range(-delta): |
| 223 delta_info = DeltaInfo(file_path, symbol_type, |
| 224 symbol_name, shared) |
| 225 delta_info.new_size = symbol_size |
| 226 added.append(delta_info) |
| 227 |
| 228 if len(bucket2) == 0: |
| 229 del cache1[key] # Entire bucket is empty, delete from cache2 |
| 230 |
| 231 # We have now analyzed all symbols that are in cache1 and removed all of |
| 232 # the encountered symbols from cache2. What's left in cache2 is the new |
| 233 # symbols. |
| 234 for key, bucket2 in cache2.iteritems(): |
| 235 file_path, symbol_type = key; |
| 236 for symbol_name, symbol_size_list in bucket2.items(): |
| 237 for (symbol_size, shared) in symbol_size_list: |
| 238 delta_info = DeltaInfo(file_path, symbol_type, symbol_name, shared) |
| 239 delta_info.new_size = symbol_size |
| 240 added.append(delta_info) |
| 241 return (added, removed, changed, unchanged) |
| 242 |
| 243 |
| 244 def DeltaStr(number): |
| 245 """Returns the number as a string with a '+' prefix if it's > 0 and |
| 246 a '-' prefix if it's < 0.""" |
| 247 result = str(number) |
| 248 if number > 0: |
| 249 result = '+' + result |
| 250 return result |
| 251 |
| 252 |
| 253 def SharedInfoStr(symbol_info): |
| 254 """Returns a string (prefixed by space) explaining that numbers are |
| 255 adjusted because of shared space between symbols, or an empty string |
| 256 if space had not been shared.""" |
| 257 |
| 258 if symbol_info.shares_space_with_other_symbols: |
| 259 return " (adjusted sizes because of memory sharing)" |
| 260 |
| 261 return "" |
| 262 |
| 263 class CrunchStatsData(object): |
| 264 """Stores a summary of data of a certain kind.""" |
| 265 def __init__(self, symbols): |
| 266 self.symbols = symbols |
| 267 self.sources = set() |
| 268 self.before_size = 0 |
| 269 self.after_size = 0 |
| 270 self.symbols_by_path = {} |
| 271 |
| 272 |
| 273 def CrunchStats(added, removed, changed, unchanged, showsources, showsymbols): |
| 274 """Outputs to stdout a summary of changes based on the symbol lists.""" |
| 275 # Split changed into grown and shrunk because that is easier to |
| 276 # discuss. |
| 277 grown = [] |
| 278 shrunk = [] |
| 279 for item in changed: |
| 280 if item.old_size < item.new_size: |
| 281 grown.append(item) |
| 282 else: |
| 283 shrunk.append(item) |
| 284 |
| 285 new_symbols = CrunchStatsData(added) |
| 286 removed_symbols = CrunchStatsData(removed) |
| 287 grown_symbols = CrunchStatsData(grown) |
| 288 shrunk_symbols = CrunchStatsData(shrunk) |
| 289 sections = [new_symbols, removed_symbols, grown_symbols, shrunk_symbols] |
| 290 for section in sections: |
| 291 for item in section.symbols: |
| 292 section.sources.add(item.file_path) |
| 293 if item.old_size is not None: |
| 294 section.before_size += item.old_size |
| 295 if item.new_size is not None: |
| 296 section.after_size += item.new_size |
| 297 bucket = section.symbols_by_path.setdefault(item.file_path, []) |
| 298 bucket.append((item.symbol_name, item.symbol_type, |
| 299 item.ExtractSymbolDelta())) |
| 300 |
| 301 total_change = sum(s.after_size - s.before_size for s in sections) |
| 302 summary = 'Total change: %s bytes' % DeltaStr(total_change) |
| 303 print(summary) |
| 304 print('=' * len(summary)) |
| 305 for section in sections: |
| 306 if not section.symbols: |
| 307 continue |
| 308 if section.before_size == 0: |
| 309 description = ('added, totalling %s bytes' % DeltaStr(section.after_size)) |
| 310 elif section.after_size == 0: |
| 311 description = ('removed, totalling %s bytes' % |
| 312 DeltaStr(-section.before_size)) |
| 313 else: |
| 314 if section.after_size > section.before_size: |
| 315 type_str = 'grown' |
| 316 else: |
| 317 type_str = 'shrunk' |
| 318 description = ('%s, for a net change of %s bytes ' |
| 319 '(%d bytes before, %d bytes after)' % |
| 320 (type_str, DeltaStr(section.after_size - section.before_size), |
| 321 section.before_size, section.after_size)) |
| 322 print(' %d %s across %d sources' % |
| 323 (len(section.symbols), description, len(section.sources))) |
| 324 |
| 325 maybe_unchanged_sources = set() |
| 326 unchanged_symbols_size = 0 |
| 327 for item in unchanged: |
| 328 maybe_unchanged_sources.add(item.file_path) |
| 329 unchanged_symbols_size += item.old_size # == item.new_size |
| 330 print(' %d unchanged, totalling %d bytes' % |
| 331 (len(unchanged), unchanged_symbols_size)) |
| 332 |
| 333 # High level analysis, always output. |
| 334 unchanged_sources = maybe_unchanged_sources |
| 335 for section in sections: |
| 336 unchanged_sources = unchanged_sources - section.sources |
| 337 new_sources = (new_symbols.sources - |
| 338 maybe_unchanged_sources - |
| 339 removed_symbols.sources) |
| 340 removed_sources = (removed_symbols.sources - |
| 341 maybe_unchanged_sources - |
| 342 new_symbols.sources) |
| 343 partially_changed_sources = (grown_symbols.sources | |
| 344 shrunk_symbols.sources | new_symbols.sources | |
| 345 removed_symbols.sources) - removed_sources - new_sources |
| 346 allFiles = set() |
| 347 for section in sections: |
| 348 allFiles = allFiles | section.sources |
| 349 allFiles = allFiles | maybe_unchanged_sources |
| 350 print 'Source stats:' |
| 351 print(' %d sources encountered.' % len(allFiles)) |
| 352 print(' %d completely new.' % len(new_sources)) |
| 353 print(' %d removed completely.' % len(removed_sources)) |
| 354 print(' %d partially changed.' % len(partially_changed_sources)) |
| 355 print(' %d completely unchanged.' % len(unchanged_sources)) |
| 356 remainder = (allFiles - new_sources - removed_sources - |
| 357 partially_changed_sources - unchanged_sources) |
| 358 assert len(remainder) == 0 |
| 359 |
| 360 if not showsources: |
| 361 return # Per-source analysis, only if requested |
| 362 print 'Per-source Analysis:' |
| 363 delta_by_path = {} |
| 364 for section in sections: |
| 365 for path in section.symbols_by_path: |
| 366 entry = delta_by_path.get(path) |
| 367 if not entry: |
| 368 entry = {'plus': 0, 'minus': 0} |
| 369 delta_by_path[path] = entry |
| 370 for symbol_name, symbol_type, symbol_delta in \ |
| 371 section.symbols_by_path[path]: |
| 372 if symbol_delta.old_size is None: |
| 373 delta = symbol_delta.new_size |
| 374 elif symbol_delta.new_size is None: |
| 375 delta = -symbol_delta.old_size |
| 376 else: |
| 377 delta = symbol_delta.new_size - symbol_delta.old_size |
| 378 |
| 379 if delta > 0: |
| 380 entry['plus'] += delta |
| 381 else: |
| 382 entry['minus'] += (-1 * delta) |
| 383 |
| 384 def delta_sort_key(item): |
| 385 _path, size_data = item |
| 386 growth = size_data['plus'] - size_data['minus'] |
| 387 return growth |
| 388 |
| 389 for path, size_data in sorted(delta_by_path.iteritems(), key=delta_sort_key, |
| 390 reverse=True): |
| 391 gain = size_data['plus'] |
| 392 loss = size_data['minus'] |
| 393 delta = size_data['plus'] - size_data['minus'] |
| 394 header = ' %s - Source: %s - (gained %d, lost %d)' % (DeltaStr(delta), |
| 395 path, gain, loss) |
| 396 divider = '-' * len(header) |
| 397 print '' |
| 398 print divider |
| 399 print header |
| 400 print divider |
| 401 if showsymbols: |
| 402 def ExtractNewSize(tup): |
| 403 symbol_delta = tup[2] |
| 404 return symbol_delta.new_size |
| 405 def ExtractOldSize(tup): |
| 406 symbol_delta = tup[2] |
| 407 return symbol_delta.old_size |
| 408 if path in new_symbols.symbols_by_path: |
| 409 print ' New symbols:' |
| 410 for symbol_name, symbol_type, symbol_delta in \ |
| 411 sorted(new_symbols.symbols_by_path[path], |
| 412 key=ExtractNewSize, |
| 413 reverse=True): |
| 414 print (' %8s: %s type=%s, size=%d bytes%s' % |
| 415 (DeltaStr(symbol_delta.new_size), symbol_name, symbol_type, |
| 416 symbol_delta.new_size, SharedInfoStr(symbol_delta))) |
| 417 if path in removed_symbols.symbols_by_path: |
| 418 print ' Removed symbols:' |
| 419 for symbol_name, symbol_type, symbol_delta in \ |
| 420 sorted(removed_symbols.symbols_by_path[path], |
| 421 key=ExtractOldSize): |
| 422 print (' %8s: %s type=%s, size=%d bytes%s' % |
| 423 (DeltaStr(-symbol_delta.old_size), symbol_name, symbol_type, |
| 424 symbol_delta.old_size, |
| 425 SharedInfoStr(symbol_delta))) |
| 426 for (changed_symbols_by_path, type_str) in [ |
| 427 (grown_symbols.symbols_by_path, "Grown"), |
| 428 (shrunk_symbols.symbols_by_path, "Shrunk")]: |
| 429 if path in changed_symbols_by_path: |
| 430 print ' %s symbols:' % type_str |
| 431 def changed_symbol_sortkey(item): |
| 432 symbol_name, _symbol_type, symbol_delta = item |
| 433 return (symbol_delta.old_size - symbol_delta.new_size, symbol_name) |
| 434 for symbol_name, symbol_type, symbol_delta in \ |
| 435 sorted(changed_symbols_by_path[path], key=changed_symbol_sortkey): |
| 436 print (' %8s: %s type=%s, (was %d bytes, now %d bytes)%s' |
| 437 % (DeltaStr(symbol_delta.new_size - symbol_delta.old_size), |
| 438 symbol_name, symbol_type, |
| 439 symbol_delta.old_size, symbol_delta.new_size, |
| 440 SharedInfoStr(symbol_delta))) |
| 441 |
| 442 |
| 443 def main(): |
| 444 usage = """%prog [options] |
| 445 |
| 446 Analyzes the symbolic differences between two binary files |
| 447 (typically, not necessarily, two different builds of the same |
| 448 library) and produces a detailed description of symbols that have |
| 449 been added, removed, or whose size has changed. |
| 450 |
| 451 Example: |
| 452 explain_binary_size_delta.py --nm1 /tmp/nm1.dump --nm2 /tmp/nm2.dump |
| 453 |
| 454 Options are available via '--help'. |
| 455 """ |
| 456 parser = optparse.OptionParser(usage=usage) |
| 457 parser.add_option('--nm1', metavar='PATH', |
| 458 help='the nm dump of the first library') |
| 459 parser.add_option('--nm2', metavar='PATH', |
| 460 help='the nm dump of the second library') |
| 461 parser.add_option('--showsources', action='store_true', default=False, |
| 462 help='show per-source statistics') |
| 463 parser.add_option('--showsymbols', action='store_true', default=False, |
| 464 help='show all symbol information; implies --showsources') |
| 465 parser.add_option('--verbose', action='store_true', default=False, |
| 466 help='output internal debugging stuff') |
| 467 opts, _args = parser.parse_args() |
| 468 |
| 469 if not opts.nm1: |
| 470 parser.error('--nm1 is required') |
| 471 if not opts.nm2: |
| 472 parser.error('--nm2 is required') |
| 473 symbols = [] |
| 474 for path in [opts.nm1, opts.nm2]: |
| 475 with file(path, 'r') as nm_input: |
| 476 if opts.verbose: |
| 477 print 'parsing ' + path + '...' |
| 478 symbols.append(list(binary_size_utils.ParseNm(nm_input))) |
| 479 (added, removed, changed, unchanged) = Compare(symbols[0], symbols[1]) |
| 480 CrunchStats(added, removed, changed, unchanged, |
| 481 opts.showsources | opts.showsymbols, opts.showsymbols) |
| 482 |
| 483 if __name__ == '__main__': |
| 484 sys.exit(main()) |
| OLD | NEW |