Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(33)

Side by Side Diff: tools/binary_size/explain_binary_size_delta.py

Issue 258633003: Graphical version of the run_binary_size_analysis tool. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Made the code fully pylint clean. Created 6 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 # Copyright 2014 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 """Describe the size difference of two binaries.
7
8 Generates a description of the size difference of two binaries based
9 on the difference of the size of various symbols.
10
11 This tool needs "nm" dumps of each binary with full symbol
12 information. You can obtain the necessary dumps by running the
13 run_binary_size_analysis.py script upon each binary, with the
14 "--nm-out" parameter set to the location in which you want to save the
15 dumps. Example:
16
17 # obtain symbol data from first binary in /tmp/nm1.dump
18 cd $CHECKOUT1_SRC
19 ninja -C out/Release binary_size_tool
20 tools/binary_size/run_binary_size_analysis \
21 --library <path_to_library>
22 --destdir /tmp/throwaway
23 --nm-out /tmp/nm1.dump
24
25 # obtain symbol data from second binary in /tmp/nm2.dump
26 cd $CHECKOUT2_SRC
27 ninja -C out/Release binary_size_tool
28 tools/binary_size/run_binary_size_analysis \
29 --library <path_to_library>
30 --destdir /tmp/throwaway
31 --nm-out /tmp/nm2.dump
32
33 # cleanup useless files
34 rm -r /tmp/throwaway
35
36 # run this tool
37 explain_binary_size_delta.py --nm1 /tmp/nm1.dump --nm2 /tmp/nm2.dump
38 """
39
40 import optparse
41 import os
42 import sys
43
44 import binary_size_utils
45
46
47 def Compare(symbols1, symbols2):
48 """Executes a comparison of the symbols in symbols1 and symbols2.
49
50 Returns:
51 tuple of lists: (added_symbols, removed_symbols, changed_symbols, others)
52 """
53 added = [] # tuples
54 removed = [] # tuples
55 changed = [] # tuples
56 unchanged = [] # tuples
57
58 cache1 = {}
59 cache2 = {}
60 # Make a map of (file, symbol_type) : (symbol_name, symbol_size)
61 for cache, symbols in ((cache1, symbols1), (cache2, symbols2)):
62 for symbol_name, symbol_type, symbol_size, file_path in symbols:
63 if 'vtable for ' in symbol_name:
64 symbol_type = '@' # hack to categorize these separately
65 if file_path:
66 file_path = os.path.normpath(file_path)
67 else:
68 file_path = '(No Path)'
69 key = (file_path, symbol_type)
70 bucket = cache.setdefault(key, {})
71 bucket[symbol_name] = symbol_size
72
73 # Now diff them. We iterate over the elements in cache1. For each symbol
74 # that we find in cache2, we record whether it was deleted, changed, or
75 # unchanged. We then remove it from cache2; all the symbols that remain
76 # in cache2 at the end of the iteration over cache1 are the 'new' symbols.
77 for key, bucket1 in cache1.items():
78 bucket2 = cache2.get(key)
79 if not bucket2:
80 # A file was removed. Everything in bucket1 is dead.
81 for symbol_name, symbol_size in bucket1.items():
82 removed.append((key[0], key[1], symbol_name, symbol_size, None))
83 else:
84 # File still exists, look for changes within.
85 for symbol_name, symbol_size in bucket1.items():
86 size2 = bucket2.get(symbol_name)
87 if size2 is None:
88 # Symbol no longer exists in bucket2.
89 removed.append((key[0], key[1], symbol_name, symbol_size, None))
90 else:
91 del bucket2[symbol_name] # Symbol is not new, delete from cache2.
92 if len(bucket2) == 0:
93 del cache1[key] # Entire bucket is empty, delete from cache2
94 if symbol_size != size2:
95 # Symbol has change size in bucket.
96 changed.append((key[0], key[1], symbol_name, symbol_size, size2))
97 else:
98 # Symbol is unchanged.
99 unchanged.append((key[0], key[1], symbol_name, symbol_size, size2))
100
101 # We have now analyzed all symbols that are in cache1 and removed all of
102 # the encountered symbols from cache2. What's left in cache2 is the new
103 # symbols.
104 for key, bucket2 in cache2.iteritems():
105 for symbol_name, symbol_size in bucket2.items():
106 added.append((key[0], key[1], symbol_name, None, symbol_size))
107 return (added, removed, changed, unchanged)
108
109
110 def TestCompare():
111 # List entries have form: symbol_name, symbol_type, symbol_size, file_path
112 symbol_list1 = (
113 # File with one symbol, left as-is.
114 ( 'unchanged', 't', 1000, '/file_unchanged' ),
115 # File with one symbol, changed.
116 ( 'changed', 't', 1000, '/file_all_changed' ),
117 # File with one symbol, deleted.
118 ( 'removed', 't', 1000, '/file_all_deleted' ),
119 # File with two symbols, one unchanged, one changed, same bucket
120 ( 'unchanged', 't', 1000, '/file_pair_unchanged_changed' ),
121 ( 'changed', 't', 1000, '/file_pair_unchanged_changed' ),
122 # File with two symbols, one unchanged, one deleted, same bucket
123 ( 'unchanged', 't', 1000, '/file_pair_unchanged_removed' ),
124 ( 'removed', 't', 1000, '/file_pair_unchanged_removed' ),
125 # File with two symbols, one unchanged, one added, same bucket
126 ( 'unchanged', 't', 1000, '/file_pair_unchanged_added' ),
127 # File with two symbols, one unchanged, one changed, different bucket
128 ( 'unchanged', 't', 1000, '/file_pair_unchanged_diffbuck_changed' ),
129 ( 'changed', '@', 1000, '/file_pair_unchanged_diffbuck_changed' ),
130 # File with two symbols, one unchanged, one deleted, different bucket
131 ( 'unchanged', 't', 1000, '/file_pair_unchanged_diffbuck_removed' ),
132 ( 'removed', '@', 1000, '/file_pair_unchanged_diffbuck_removed' ),
133 # File with two symbols, one unchanged, one added, different bucket
134 ( 'unchanged', 't', 1000, '/file_pair_unchanged_diffbuck_added' ),
135 # File with four symbols, one added, one removed, one changed, one unchanged
136 ( 'size_changed', 't', 1000, '/file_tetra' ),
137 ( 'removed', 't', 1000, '/file_tetra' ),
138 ( 'unchanged', 't', 1000, '/file_tetra' ),
139 )
140
141 symbol_list2 = (
142 # File with one symbol, left as-is.
143 ( 'unchanged', 't', 1000, '/file_unchanged' ),
144 # File with one symbol, changed.
145 ( 'changed', 't', 2000, '/file_all_changed' ),
146 # File with two symbols, one unchanged, one changed, same bucket
147 ( 'unchanged', 't', 1000, '/file_pair_unchanged_changed' ),
148 ( 'changed', 't', 2000, '/file_pair_unchanged_changed' ),
149 # File with two symbols, one unchanged, one deleted, same bucket
150 ( 'unchanged', 't', 1000, '/file_pair_unchanged_removed' ),
151 # File with two symbols, one unchanged, one added, same bucket
152 ( 'unchanged', 't', 1000, '/file_pair_unchanged_added' ),
153 ( 'added', 't', 1000, '/file_pair_unchanged_added' ),
154 # File with two symbols, one unchanged, one changed, different bucket
155 ( 'unchanged', 't', 1000, '/file_pair_unchanged_diffbuck_changed' ),
156 ( 'changed', '@', 2000, '/file_pair_unchanged_diffbuck_changed' ),
157 # File with two symbols, one unchanged, one deleted, different bucket
158 ( 'unchanged', 't', 1000, '/file_pair_unchanged_diffbuck_removed' ),
159 # File with two symbols, one unchanged, one added, different bucket
160 ( 'unchanged', 't', 1000, '/file_pair_unchanged_diffbuck_added' ),
161 ( 'added', '@', 1000, '/file_pair_unchanged_diffbuck_added' ),
162 # File with four symbols, one added, one removed, one changed, one unchanged
163 ( 'size_changed', 't', 2000, '/file_tetra' ),
164 ( 'unchanged', 't', 1000, '/file_tetra' ),
165 ( 'added', 't', 1000, '/file_tetra' ),
166 # New file with one symbol added
167 ( 'added', 't', 1000, '/file_new' ),
168 )
169
170 # Here we go
171 (added, removed, changed, unchanged) = Compare(symbol_list1, symbol_list2)
172
173 # File with one symbol, left as-is.
174 assert ('/file_unchanged', 't', 'unchanged', 1000, 1000) in unchanged
175 # File with one symbol, changed.
176 assert ('/file_all_changed', 't', 'changed', 1000, 2000) in changed
177 # File with one symbol, deleted.
178 assert ('/file_all_deleted', 't', 'removed', 1000, None) in removed
179 # New file with one symbol added
180 assert ('/file_new', 't', 'added', None, 1000) in added
181 # File with two symbols, one unchanged, one changed, same bucket
182 assert ('/file_pair_unchanged_changed',
183 't', 'unchanged', 1000, 1000) in unchanged
184 assert ('/file_pair_unchanged_changed',
185 't', 'changed', 1000, 2000) in changed
186 # File with two symbols, one unchanged, one removed, same bucket
187 assert ('/file_pair_unchanged_removed',
188 't', 'unchanged', 1000, 1000) in unchanged
189 assert ('/file_pair_unchanged_removed',
190 't', 'removed', 1000, None) in removed
191 # File with two symbols, one unchanged, one added, same bucket
192 assert ('/file_pair_unchanged_added',
193 't', 'unchanged', 1000, 1000) in unchanged
194 assert ('/file_pair_unchanged_added',
195 't', 'added', None, 1000) in added
196 # File with two symbols, one unchanged, one changed, different bucket
197 assert ('/file_pair_unchanged_diffbuck_changed',
198 't', 'unchanged', 1000, 1000) in unchanged
199 assert ('/file_pair_unchanged_diffbuck_changed',
200 '@', 'changed', 1000, 2000) in changed
201 # File with two symbols, one unchanged, one removed, different bucket
202 assert ('/file_pair_unchanged_diffbuck_removed',
203 't', 'unchanged', 1000, 1000) in unchanged
204 assert ('/file_pair_unchanged_diffbuck_removed',
205 '@', 'removed', 1000, None) in removed
206 # File with two symbols, one unchanged, one added, different bucket
207 assert ('/file_pair_unchanged_diffbuck_added',
208 't', 'unchanged', 1000, 1000) in unchanged
209 assert ('/file_pair_unchanged_diffbuck_added',
210 '@', 'added', None, 1000) in added
211 # File with four symbols, one added, one removed, one changed, one unchanged
212 assert ('/file_tetra', 't', 'size_changed', 1000, 2000) in changed
213 assert ('/file_tetra', 't', 'unchanged', 1000, 1000) in unchanged
214 assert ('/file_tetra', 't', 'added', None, 1000) in added
215 assert ('/file_tetra', 't', 'removed', 1000, None) in removed
216
217 # Now check final stats.
218 CrunchStats(added, removed, changed, unchanged, True, True)
219
220
221 def CrunchStats(added, removed, changed, unchanged, showsources, showsymbols):
222 """Outputs to stdout a summary of changes based on the symbol lists."""
223 print 'Symbol statistics:'
224 sources_with_new_symbols = set()
225 new_symbols_size = 0
226 new_symbols_by_path = {}
227 for file_path, symbol_type, symbol_name, size1, size2 in added:
228 sources_with_new_symbols.add(file_path)
229 new_symbols_size += size2
230 bucket = new_symbols_by_path.setdefault(file_path, [])
231 bucket.append((symbol_name, symbol_type, None, size2))
232 print(' %d added, totalling %d bytes across %d sources' %
233 (len(added), new_symbols_size, len(sources_with_new_symbols)))
234
235 sources_with_removed_symbols = set()
236 removed_symbols_size = 0
237 removed_symbols_by_path = {}
238 for file_path, symbol_type, symbol_name, size1, size2 in removed:
239 sources_with_removed_symbols.add(file_path)
240 removed_symbols_size += size1
241 bucket = removed_symbols_by_path.setdefault(file_path, [])
242 bucket.append((symbol_name, symbol_type, size1, None))
243 print(' %d removed, totalling %d bytes removed across %d sources' %
244 (len(removed), removed_symbols_size, len(sources_with_removed_symbols)))
245
246 sources_with_changed_symbols = set()
247 before_size = 0
248 after_size = 0
249 changed_symbols_by_path = {}
250 for file_path, symbol_type, symbol_name, size1, size2 in changed:
251 sources_with_changed_symbols.add(file_path)
252 before_size += size1
253 after_size += size2
254 bucket = changed_symbols_by_path.setdefault(file_path, [])
255 bucket.append((symbol_name, symbol_type, size1, size2))
256 print(' %d changed, resulting in a net change of %d bytes '
257 '(%d bytes before, %d bytes after) across %d sources' %
258 (len(changed), (after_size - before_size), before_size, after_size,
259 len(sources_with_changed_symbols)))
260
261 maybe_unchanged_sources = set()
262 unchanged_symbols_size = 0
263 for file_path, symbol_type, symbol_name, size1, size2 in unchanged:
264 maybe_unchanged_sources.add(file_path)
265 unchanged_symbols_size += size1 # == size2
266 print(' %d unchanged, totalling %d bytes' %
267 (len(unchanged), unchanged_symbols_size))
268
269 # High level analysis, always output.
270 unchanged_sources = (maybe_unchanged_sources -
271 sources_with_changed_symbols -
272 sources_with_removed_symbols -
273 sources_with_new_symbols)
274 new_sources = (sources_with_new_symbols -
275 maybe_unchanged_sources -
276 sources_with_removed_symbols)
277 removed_sources = (sources_with_removed_symbols -
278 maybe_unchanged_sources -
279 sources_with_new_symbols)
280 partially_changed_sources = (sources_with_changed_symbols |
281 sources_with_new_symbols |
282 sources_with_removed_symbols) - removed_sources - new_sources
283 allFiles = (sources_with_new_symbols |
284 sources_with_removed_symbols |
285 sources_with_changed_symbols |
286 maybe_unchanged_sources)
287 print 'Source stats: '
288 print(' %d sources encountered.' % len(allFiles))
289 print(' %d completely new.' % len(new_sources))
290 print(' %d removed completely.' % len(removed_sources))
291 print(' %d partially changed.' % len(partially_changed_sources))
292 print(' %d completely unchanged.' % len(unchanged_sources))
293 remainder = (allFiles - new_sources - removed_sources -
294 partially_changed_sources - unchanged_sources)
295 assert len(remainder) == 0
296
297 if not showsources:
298 return # Per-source analysis, only if requested
299 print 'Per-source Analysis:'
300 delta_by_path = {}
301 for path in new_symbols_by_path:
302 entry = delta_by_path.get(path)
303 if not entry:
304 entry = {'plus': 0, 'minus': 0}
305 delta_by_path[path] = entry
306 for symbol_name, symbol_type, size1, size2 in new_symbols_by_path[path]:
307 entry['plus'] += size2
308 for path in removed_symbols_by_path:
309 entry = delta_by_path.get(path)
310 if not entry:
311 entry = {'plus': 0, 'minus': 0}
312 delta_by_path[path] = entry
313 for symbol_name, symbol_type, size1, size2 in removed_symbols_by_path[path]:
314 entry['minus'] += size1
315 for path in changed_symbols_by_path:
316 entry = delta_by_path.get(path)
317 if not entry:
318 entry = {'plus': 0, 'minus': 0}
319 delta_by_path[path] = entry
320 for symbol_name, symbol_type, size1, size2 in changed_symbols_by_path[path]:
321 delta = size2 - size1
322 if delta > 0:
323 entry['plus'] += delta
324 else:
325 entry['minus'] += (-1 * delta)
326
327 for path in sorted(delta_by_path):
328 print ' Source: ' + path
329 size_data = delta_by_path[path]
330 gain = size_data['plus']
331 loss = size_data['minus']
332 delta = size_data['plus'] - size_data['minus']
333 print (' Change: %d bytes (gained %d, lost %d)' % (delta, gain, loss))
334 if showsymbols:
335 if path in new_symbols_by_path:
336 print ' New symbols:'
337 for symbol_name, symbol_type, size1, size2 in \
338 new_symbols_by_path[path]:
339 print (' %s type=%s, size=%d bytes' %
340 (symbol_name, symbol_type, size2))
341 if path in removed_symbols_by_path:
342 print ' Removed symbols:'
343 for symbol_name, symbol_type, size1, size2 in \
344 removed_symbols_by_path[path]:
345 print (' %s type=%s, size=%d bytes' %
346 (symbol_name, symbol_type, size1))
347 if path in changed_symbols_by_path:
348 print ' Changed symbols:'
349 def sortkey(item):
350 symbol_name, _symbol_type, size1, size2 = item
351 return (size1 - size2, symbol_name)
352 for symbol_name, symbol_type, size1, size2 in \
353 sorted(changed_symbols_by_path[path], key=sortkey):
354 print (' %s type=%s, delta=%d bytes (was %d bytes, now %d bytes)'
355 % (symbol_name, symbol_type, (size2 - size1), size1, size2))
356
357
358 def main():
359 usage = """%prog [options]
360
361 Analyzes the symbolic differences between two binary files
362 (typically, not necessarily, two different builds of the same
363 library) and produces a detailed description of symbols that have
364 been added, removed, or whose size has changed.
365
366 Example:
367 explain_binary_size_delta.py --nm1 /tmp/nm1.dump --nm2 /tmp/nm2.dump
368
369 Options are available via '--help'.
370 """
371 parser = optparse.OptionParser(usage=usage)
372 parser.add_option('--nm1', metavar='PATH',
373 help='the nm dump of the first library')
374 parser.add_option('--nm2', metavar='PATH',
375 help='the nm dump of the second library')
376 parser.add_option('--showsources', action='store_true', default=False,
377 help='show per-source statistics')
378 parser.add_option('--showsymbols', action='store_true', default=False,
379 help='show all symbol information; implies --showfiles')
380 parser.add_option('--verbose', action='store_true', default=False,
381 help='output internal debugging stuff')
382 parser.add_option('--selftest', action='store_true', default=False,
383 help='run internal diagnosis')
384 opts, _args = parser.parse_args()
385
386 if opts.selftest:
387 TestCompare()
388 return
389
390 if not opts.nm1:
391 parser.error('--nm1 is required')
392 if not opts.nm2:
393 parser.error('--nm2 is required')
394 symbols = []
395 for path in [opts.nm1, opts.nm2]:
396 with file(path, 'r') as nm_input:
397 if opts.verbose:
398 print 'parsing ' + path + '...'
399 symbols.append(list(binary_size_utils.ParseNm(nm_input)))
400 (added, removed, changed, unchanged) = Compare(symbols[0], symbols[1])
401 CrunchStats(added, removed, changed, unchanged,
402 opts.showsources | opts.showsymbols, opts.showsymbols)
403
404 if __name__ == '__main__':
405 sys.exit(main())
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698