OLD | NEW |
(Empty) | |
| 1 #!/usr/bin/env python |
| 2 # Copyright 2014 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. |
| 5 |
| 6 """Describe the size difference of two binaries. |
| 7 |
| 8 Generates a description of the size difference of two binaries based |
| 9 on the difference of the size of various symbols. |
| 10 |
| 11 This tool needs "nm" dumps of each binary with full symbol |
| 12 information. You can obtain the necessary dumps by running the |
| 13 run_binary_size_analysis.py script upon each binary, with the |
| 14 "--nm-out" parameter set to the location in which you want to save the |
| 15 dumps. Example: |
| 16 |
| 17 # obtain symbol data from first binary in /tmp/nm1.dump |
| 18 cd $CHECKOUT1_SRC |
| 19 ninja -C out/Release binary_size_tool |
| 20 tools/binary_size/run_binary_size_analysis \ |
| 21 --library <path_to_library> |
| 22 --destdir /tmp/throwaway |
| 23 --nm-out /tmp/nm1.dump |
| 24 |
| 25 # obtain symbol data from second binary in /tmp/nm2.dump |
| 26 cd $CHECKOUT2_SRC |
| 27 ninja -C out/Release binary_size_tool |
| 28 tools/binary_size/run_binary_size_analysis \ |
| 29 --library <path_to_library> |
| 30 --destdir /tmp/throwaway |
| 31 --nm-out /tmp/nm2.dump |
| 32 |
| 33 # cleanup useless files |
| 34 rm -r /tmp/throwaway |
| 35 |
| 36 # run this tool |
| 37 explain_binary_size_delta.py --nm1 /tmp/nm1.dump --nm2 /tmp/nm2.dump |
| 38 """ |
| 39 |
| 40 import optparse |
| 41 import os |
| 42 import sys |
| 43 |
| 44 import binary_size_utils |
| 45 |
| 46 |
| 47 def Compare(symbols1, symbols2): |
| 48 """Executes a comparison of the symbols in symbols1 and symbols2. |
| 49 |
| 50 Returns: |
| 51 tuple of lists: (added_symbols, removed_symbols, changed_symbols, others) |
| 52 """ |
| 53 added = [] # tuples |
| 54 removed = [] # tuples |
| 55 changed = [] # tuples |
| 56 unchanged = [] # tuples |
| 57 |
| 58 cache1 = {} |
| 59 cache2 = {} |
| 60 # Make a map of (file, symbol_type) : (symbol_name, symbol_size) |
| 61 for cache, symbols in ((cache1, symbols1), (cache2, symbols2)): |
| 62 for symbol_name, symbol_type, symbol_size, file_path in symbols: |
| 63 if 'vtable for ' in symbol_name: |
| 64 symbol_type = '@' # hack to categorize these separately |
| 65 if file_path: |
| 66 file_path = os.path.normpath(file_path) |
| 67 else: |
| 68 file_path = '(No Path)' |
| 69 key = (file_path, symbol_type) |
| 70 bucket = cache.setdefault(key, {}) |
| 71 bucket[symbol_name] = symbol_size |
| 72 |
| 73 # Now diff them. We iterate over the elements in cache1. For each symbol |
| 74 # that we find in cache2, we record whether it was deleted, changed, or |
| 75 # unchanged. We then remove it from cache2; all the symbols that remain |
| 76 # in cache2 at the end of the iteration over cache1 are the 'new' symbols. |
| 77 for key, bucket1 in cache1.items(): |
| 78 bucket2 = cache2.get(key) |
| 79 if not bucket2: |
| 80 # A file was removed. Everything in bucket1 is dead. |
| 81 for symbol_name, symbol_size in bucket1.items(): |
| 82 removed.append((key[0], key[1], symbol_name, symbol_size, None)) |
| 83 else: |
| 84 # File still exists, look for changes within. |
| 85 for symbol_name, symbol_size in bucket1.items(): |
| 86 size2 = bucket2.get(symbol_name) |
| 87 if size2 is None: |
| 88 # Symbol no longer exists in bucket2. |
| 89 removed.append((key[0], key[1], symbol_name, symbol_size, None)) |
| 90 else: |
| 91 del bucket2[symbol_name] # Symbol is not new, delete from cache2. |
| 92 if len(bucket2) == 0: |
| 93 del cache1[key] # Entire bucket is empty, delete from cache2 |
| 94 if symbol_size != size2: |
| 95 # Symbol has change size in bucket. |
| 96 changed.append((key[0], key[1], symbol_name, symbol_size, size2)) |
| 97 else: |
| 98 # Symbol is unchanged. |
| 99 unchanged.append((key[0], key[1], symbol_name, symbol_size, size2)) |
| 100 |
| 101 # We have now analyzed all symbols that are in cache1 and removed all of |
| 102 # the encountered symbols from cache2. What's left in cache2 is the new |
| 103 # symbols. |
| 104 for key, bucket2 in cache2.iteritems(): |
| 105 for symbol_name, symbol_size in bucket2.items(): |
| 106 added.append((key[0], key[1], symbol_name, None, symbol_size)) |
| 107 return (added, removed, changed, unchanged) |
| 108 |
| 109 |
| 110 def TestCompare(): |
| 111 # List entries have form: symbol_name, symbol_type, symbol_size, file_path |
| 112 symbol_list1 = ( |
| 113 # File with one symbol, left as-is. |
| 114 ( 'unchanged', 't', 1000, '/file_unchanged' ), |
| 115 # File with one symbol, changed. |
| 116 ( 'changed', 't', 1000, '/file_all_changed' ), |
| 117 # File with one symbol, deleted. |
| 118 ( 'removed', 't', 1000, '/file_all_deleted' ), |
| 119 # File with two symbols, one unchanged, one changed, same bucket |
| 120 ( 'unchanged', 't', 1000, '/file_pair_unchanged_changed' ), |
| 121 ( 'changed', 't', 1000, '/file_pair_unchanged_changed' ), |
| 122 # File with two symbols, one unchanged, one deleted, same bucket |
| 123 ( 'unchanged', 't', 1000, '/file_pair_unchanged_removed' ), |
| 124 ( 'removed', 't', 1000, '/file_pair_unchanged_removed' ), |
| 125 # File with two symbols, one unchanged, one added, same bucket |
| 126 ( 'unchanged', 't', 1000, '/file_pair_unchanged_added' ), |
| 127 # File with two symbols, one unchanged, one changed, different bucket |
| 128 ( 'unchanged', 't', 1000, '/file_pair_unchanged_diffbuck_changed' ), |
| 129 ( 'changed', '@', 1000, '/file_pair_unchanged_diffbuck_changed' ), |
| 130 # File with two symbols, one unchanged, one deleted, different bucket |
| 131 ( 'unchanged', 't', 1000, '/file_pair_unchanged_diffbuck_removed' ), |
| 132 ( 'removed', '@', 1000, '/file_pair_unchanged_diffbuck_removed' ), |
| 133 # File with two symbols, one unchanged, one added, different bucket |
| 134 ( 'unchanged', 't', 1000, '/file_pair_unchanged_diffbuck_added' ), |
| 135 # File with four symbols, one added, one removed, one changed, one unchanged |
| 136 ( 'size_changed', 't', 1000, '/file_tetra' ), |
| 137 ( 'removed', 't', 1000, '/file_tetra' ), |
| 138 ( 'unchanged', 't', 1000, '/file_tetra' ), |
| 139 ) |
| 140 |
| 141 symbol_list2 = ( |
| 142 # File with one symbol, left as-is. |
| 143 ( 'unchanged', 't', 1000, '/file_unchanged' ), |
| 144 # File with one symbol, changed. |
| 145 ( 'changed', 't', 2000, '/file_all_changed' ), |
| 146 # File with two symbols, one unchanged, one changed, same bucket |
| 147 ( 'unchanged', 't', 1000, '/file_pair_unchanged_changed' ), |
| 148 ( 'changed', 't', 2000, '/file_pair_unchanged_changed' ), |
| 149 # File with two symbols, one unchanged, one deleted, same bucket |
| 150 ( 'unchanged', 't', 1000, '/file_pair_unchanged_removed' ), |
| 151 # File with two symbols, one unchanged, one added, same bucket |
| 152 ( 'unchanged', 't', 1000, '/file_pair_unchanged_added' ), |
| 153 ( 'added', 't', 1000, '/file_pair_unchanged_added' ), |
| 154 # File with two symbols, one unchanged, one changed, different bucket |
| 155 ( 'unchanged', 't', 1000, '/file_pair_unchanged_diffbuck_changed' ), |
| 156 ( 'changed', '@', 2000, '/file_pair_unchanged_diffbuck_changed' ), |
| 157 # File with two symbols, one unchanged, one deleted, different bucket |
| 158 ( 'unchanged', 't', 1000, '/file_pair_unchanged_diffbuck_removed' ), |
| 159 # File with two symbols, one unchanged, one added, different bucket |
| 160 ( 'unchanged', 't', 1000, '/file_pair_unchanged_diffbuck_added' ), |
| 161 ( 'added', '@', 1000, '/file_pair_unchanged_diffbuck_added' ), |
| 162 # File with four symbols, one added, one removed, one changed, one unchanged |
| 163 ( 'size_changed', 't', 2000, '/file_tetra' ), |
| 164 ( 'unchanged', 't', 1000, '/file_tetra' ), |
| 165 ( 'added', 't', 1000, '/file_tetra' ), |
| 166 # New file with one symbol added |
| 167 ( 'added', 't', 1000, '/file_new' ), |
| 168 ) |
| 169 |
| 170 # Here we go |
| 171 (added, removed, changed, unchanged) = Compare(symbol_list1, symbol_list2) |
| 172 |
| 173 # File with one symbol, left as-is. |
| 174 assert ('/file_unchanged', 't', 'unchanged', 1000, 1000) in unchanged |
| 175 # File with one symbol, changed. |
| 176 assert ('/file_all_changed', 't', 'changed', 1000, 2000) in changed |
| 177 # File with one symbol, deleted. |
| 178 assert ('/file_all_deleted', 't', 'removed', 1000, None) in removed |
| 179 # New file with one symbol added |
| 180 assert ('/file_new', 't', 'added', None, 1000) in added |
| 181 # File with two symbols, one unchanged, one changed, same bucket |
| 182 assert ('/file_pair_unchanged_changed', |
| 183 't', 'unchanged', 1000, 1000) in unchanged |
| 184 assert ('/file_pair_unchanged_changed', |
| 185 't', 'changed', 1000, 2000) in changed |
| 186 # File with two symbols, one unchanged, one removed, same bucket |
| 187 assert ('/file_pair_unchanged_removed', |
| 188 't', 'unchanged', 1000, 1000) in unchanged |
| 189 assert ('/file_pair_unchanged_removed', |
| 190 't', 'removed', 1000, None) in removed |
| 191 # File with two symbols, one unchanged, one added, same bucket |
| 192 assert ('/file_pair_unchanged_added', |
| 193 't', 'unchanged', 1000, 1000) in unchanged |
| 194 assert ('/file_pair_unchanged_added', |
| 195 't', 'added', None, 1000) in added |
| 196 # File with two symbols, one unchanged, one changed, different bucket |
| 197 assert ('/file_pair_unchanged_diffbuck_changed', |
| 198 't', 'unchanged', 1000, 1000) in unchanged |
| 199 assert ('/file_pair_unchanged_diffbuck_changed', |
| 200 '@', 'changed', 1000, 2000) in changed |
| 201 # File with two symbols, one unchanged, one removed, different bucket |
| 202 assert ('/file_pair_unchanged_diffbuck_removed', |
| 203 't', 'unchanged', 1000, 1000) in unchanged |
| 204 assert ('/file_pair_unchanged_diffbuck_removed', |
| 205 '@', 'removed', 1000, None) in removed |
| 206 # File with two symbols, one unchanged, one added, different bucket |
| 207 assert ('/file_pair_unchanged_diffbuck_added', |
| 208 't', 'unchanged', 1000, 1000) in unchanged |
| 209 assert ('/file_pair_unchanged_diffbuck_added', |
| 210 '@', 'added', None, 1000) in added |
| 211 # File with four symbols, one added, one removed, one changed, one unchanged |
| 212 assert ('/file_tetra', 't', 'size_changed', 1000, 2000) in changed |
| 213 assert ('/file_tetra', 't', 'unchanged', 1000, 1000) in unchanged |
| 214 assert ('/file_tetra', 't', 'added', None, 1000) in added |
| 215 assert ('/file_tetra', 't', 'removed', 1000, None) in removed |
| 216 |
| 217 # Now check final stats. |
| 218 CrunchStats(added, removed, changed, unchanged, True, True) |
| 219 |
| 220 |
| 221 def CrunchStats(added, removed, changed, unchanged, showsources, showsymbols): |
| 222 """Outputs to stdout a summary of changes based on the symbol lists.""" |
| 223 print 'Symbol statistics:' |
| 224 sources_with_new_symbols = set() |
| 225 new_symbols_size = 0 |
| 226 new_symbols_by_path = {} |
| 227 for file_path, symbol_type, symbol_name, size1, size2 in added: |
| 228 sources_with_new_symbols.add(file_path) |
| 229 new_symbols_size += size2 |
| 230 bucket = new_symbols_by_path.setdefault(file_path, []) |
| 231 bucket.append((symbol_name, symbol_type, None, size2)) |
| 232 print(' %d added, totalling %d bytes across %d sources' % |
| 233 (len(added), new_symbols_size, len(sources_with_new_symbols))) |
| 234 |
| 235 sources_with_removed_symbols = set() |
| 236 removed_symbols_size = 0 |
| 237 removed_symbols_by_path = {} |
| 238 for file_path, symbol_type, symbol_name, size1, size2 in removed: |
| 239 sources_with_removed_symbols.add(file_path) |
| 240 removed_symbols_size += size1 |
| 241 bucket = removed_symbols_by_path.setdefault(file_path, []) |
| 242 bucket.append((symbol_name, symbol_type, size1, None)) |
| 243 print(' %d removed, totalling %d bytes removed across %d sources' % |
| 244 (len(removed), removed_symbols_size, len(sources_with_removed_symbols))) |
| 245 |
| 246 sources_with_changed_symbols = set() |
| 247 before_size = 0 |
| 248 after_size = 0 |
| 249 changed_symbols_by_path = {} |
| 250 for file_path, symbol_type, symbol_name, size1, size2 in changed: |
| 251 sources_with_changed_symbols.add(file_path) |
| 252 before_size += size1 |
| 253 after_size += size2 |
| 254 bucket = changed_symbols_by_path.setdefault(file_path, []) |
| 255 bucket.append((symbol_name, symbol_type, size1, size2)) |
| 256 print(' %d changed, resulting in a net change of %d bytes ' |
| 257 '(%d bytes before, %d bytes after) across %d sources' % |
| 258 (len(changed), (after_size - before_size), before_size, after_size, |
| 259 len(sources_with_changed_symbols))) |
| 260 |
| 261 maybe_unchanged_sources = set() |
| 262 unchanged_symbols_size = 0 |
| 263 for file_path, symbol_type, symbol_name, size1, size2 in unchanged: |
| 264 maybe_unchanged_sources.add(file_path) |
| 265 unchanged_symbols_size += size1 # == size2 |
| 266 print(' %d unchanged, totalling %d bytes' % |
| 267 (len(unchanged), unchanged_symbols_size)) |
| 268 |
| 269 # High level analysis, always output. |
| 270 unchanged_sources = (maybe_unchanged_sources - |
| 271 sources_with_changed_symbols - |
| 272 sources_with_removed_symbols - |
| 273 sources_with_new_symbols) |
| 274 new_sources = (sources_with_new_symbols - |
| 275 maybe_unchanged_sources - |
| 276 sources_with_removed_symbols) |
| 277 removed_sources = (sources_with_removed_symbols - |
| 278 maybe_unchanged_sources - |
| 279 sources_with_new_symbols) |
| 280 partially_changed_sources = (sources_with_changed_symbols | |
| 281 sources_with_new_symbols | |
| 282 sources_with_removed_symbols) - removed_sources - new_sources |
| 283 allFiles = (sources_with_new_symbols | |
| 284 sources_with_removed_symbols | |
| 285 sources_with_changed_symbols | |
| 286 maybe_unchanged_sources) |
| 287 print 'Source stats: ' |
| 288 print(' %d sources encountered.' % len(allFiles)) |
| 289 print(' %d completely new.' % len(new_sources)) |
| 290 print(' %d removed completely.' % len(removed_sources)) |
| 291 print(' %d partially changed.' % len(partially_changed_sources)) |
| 292 print(' %d completely unchanged.' % len(unchanged_sources)) |
| 293 remainder = (allFiles - new_sources - removed_sources - |
| 294 partially_changed_sources - unchanged_sources) |
| 295 assert len(remainder) == 0 |
| 296 |
| 297 if not showsources: |
| 298 return # Per-source analysis, only if requested |
| 299 print 'Per-source Analysis:' |
| 300 delta_by_path = {} |
| 301 for path in new_symbols_by_path: |
| 302 entry = delta_by_path.get(path) |
| 303 if not entry: |
| 304 entry = {'plus': 0, 'minus': 0} |
| 305 delta_by_path[path] = entry |
| 306 for symbol_name, symbol_type, size1, size2 in new_symbols_by_path[path]: |
| 307 entry['plus'] += size2 |
| 308 for path in removed_symbols_by_path: |
| 309 entry = delta_by_path.get(path) |
| 310 if not entry: |
| 311 entry = {'plus': 0, 'minus': 0} |
| 312 delta_by_path[path] = entry |
| 313 for symbol_name, symbol_type, size1, size2 in removed_symbols_by_path[path]: |
| 314 entry['minus'] += size1 |
| 315 for path in changed_symbols_by_path: |
| 316 entry = delta_by_path.get(path) |
| 317 if not entry: |
| 318 entry = {'plus': 0, 'minus': 0} |
| 319 delta_by_path[path] = entry |
| 320 for symbol_name, symbol_type, size1, size2 in changed_symbols_by_path[path]: |
| 321 delta = size2 - size1 |
| 322 if delta > 0: |
| 323 entry['plus'] += delta |
| 324 else: |
| 325 entry['minus'] += (-1 * delta) |
| 326 |
| 327 for path in sorted(delta_by_path): |
| 328 print ' Source: ' + path |
| 329 size_data = delta_by_path[path] |
| 330 gain = size_data['plus'] |
| 331 loss = size_data['minus'] |
| 332 delta = size_data['plus'] - size_data['minus'] |
| 333 print (' Change: %d bytes (gained %d, lost %d)' % (delta, gain, loss)) |
| 334 if showsymbols: |
| 335 if path in new_symbols_by_path: |
| 336 print ' New symbols:' |
| 337 for symbol_name, symbol_type, size1, size2 in \ |
| 338 new_symbols_by_path[path]: |
| 339 print (' %s type=%s, size=%d bytes' % |
| 340 (symbol_name, symbol_type, size2)) |
| 341 if path in removed_symbols_by_path: |
| 342 print ' Removed symbols:' |
| 343 for symbol_name, symbol_type, size1, size2 in \ |
| 344 removed_symbols_by_path[path]: |
| 345 print (' %s type=%s, size=%d bytes' % |
| 346 (symbol_name, symbol_type, size1)) |
| 347 if path in changed_symbols_by_path: |
| 348 print ' Changed symbols:' |
| 349 def sortkey(item): |
| 350 symbol_name, _symbol_type, size1, size2 = item |
| 351 return (size1 - size2, symbol_name) |
| 352 for symbol_name, symbol_type, size1, size2 in \ |
| 353 sorted(changed_symbols_by_path[path], key=sortkey): |
| 354 print (' %s type=%s, delta=%d bytes (was %d bytes, now %d bytes)' |
| 355 % (symbol_name, symbol_type, (size2 - size1), size1, size2)) |
| 356 |
| 357 |
| 358 def main(): |
| 359 usage = """%prog [options] |
| 360 |
| 361 Analyzes the symbolic differences between two binary files |
| 362 (typically, not necessarily, two different builds of the same |
| 363 library) and produces a detailed description of symbols that have |
| 364 been added, removed, or whose size has changed. |
| 365 |
| 366 Example: |
| 367 explain_binary_size_delta.py --nm1 /tmp/nm1.dump --nm2 /tmp/nm2.dump |
| 368 |
| 369 Options are available via '--help'. |
| 370 """ |
| 371 parser = optparse.OptionParser(usage=usage) |
| 372 parser.add_option('--nm1', metavar='PATH', |
| 373 help='the nm dump of the first library') |
| 374 parser.add_option('--nm2', metavar='PATH', |
| 375 help='the nm dump of the second library') |
| 376 parser.add_option('--showsources', action='store_true', default=False, |
| 377 help='show per-source statistics') |
| 378 parser.add_option('--showsymbols', action='store_true', default=False, |
| 379 help='show all symbol information; implies --showfiles') |
| 380 parser.add_option('--verbose', action='store_true', default=False, |
| 381 help='output internal debugging stuff') |
| 382 parser.add_option('--selftest', action='store_true', default=False, |
| 383 help='run internal diagnosis') |
| 384 opts, _args = parser.parse_args() |
| 385 |
| 386 if opts.selftest: |
| 387 TestCompare() |
| 388 return |
| 389 |
| 390 if not opts.nm1: |
| 391 parser.error('--nm1 is required') |
| 392 if not opts.nm2: |
| 393 parser.error('--nm2 is required') |
| 394 symbols = [] |
| 395 for path in [opts.nm1, opts.nm2]: |
| 396 with file(path, 'r') as nm_input: |
| 397 if opts.verbose: |
| 398 print 'parsing ' + path + '...' |
| 399 symbols.append(list(binary_size_utils.ParseNm(nm_input))) |
| 400 (added, removed, changed, unchanged) = Compare(symbols[0], symbols[1]) |
| 401 CrunchStats(added, removed, changed, unchanged, |
| 402 opts.showsources | opts.showsymbols, opts.showsymbols) |
| 403 |
| 404 if __name__ == '__main__': |
| 405 sys.exit(main()) |
OLD | NEW |