OLD | NEW |
---|---|
(Empty) | |
1 #!/usr/bin/python | |
Primiano Tucci (use gerrit)
2014/05/20 15:22:57
#!/usr/bin/env python ?
The difference is that the
| |
2 # Copyright 2014 The Chromium Authors. All rights reserved. | |
3 # Use of this source code is governed by a BSD-style license that can be | |
4 # found in the LICENSE file. | |
5 | |
6 """Generate a description of the size differences between two binaries | |
Primiano Tucci (use gerrit)
2014/05/20 15:22:57
The first line of a docstring should fit in one li
| |
7 based on an analysis of symbols. | |
8 | |
9 This tool needs "nm" dumps of each binary with full symbol information. | |
10 In order to obtain such dumps you need full source checkouts of each | |
11 binary that you want to anaylze. You can obtain the necessary dumps by | |
Primiano Tucci (use gerrit)
2014/05/20 15:22:57
Out of curiosity, why you need the source?
Isn't t
Daniel Bratell
2014/05/21 08:42:13
I don't know for sure, but we've talked about comp
| |
12 running the run_binary_size_analysis.py script upon each binary, with | |
13 the "--nm-out" parameter set to the location in which you want to save | |
14 the dumps. Example: | |
15 | |
16 # obtain symbol data from first binary in /tmp/nm1.dump | |
17 cd $CHECKOUT1_SRC | |
18 ninja -C out/Release binary_size_tool | |
19 tools/binary_size/run_binary_size_analysis \ | |
20 --library <path_to_library> | |
21 --destdir /tmp/throwaway | |
22 --nm-out /tmp/nm1.dump | |
23 | |
24 # obtain symbol data from second binary in /tmp/nm2.dump | |
25 cd $CHECKOUT2_SRC | |
26 ninja -C out/Release binary_size_tool | |
27 tools/binary_size/run_binary_size_analysis \ | |
28 --library <path_to_library> | |
29 --destdir /tmp/throwaway | |
30 --nm-out /tmp/nm2.dump | |
31 | |
32 # cleanup useless files | |
33 rm -r /tmp/throwaway | |
34 | |
35 # run this tool | |
36 explain_binary_size_delta.py --nm1 /tmp/nm1.dump --nm2 /tmp/nm2.dump | |
37 """ | |
38 | |
39 import collections | |
40 import fileinput | |
41 import json | |
42 import optparse | |
43 import os | |
44 import pprint | |
45 import sys | |
46 | |
47 import binary_size_utils | |
48 | |
Primiano Tucci (use gerrit)
2014/05/20 15:22:57
Nit, add an extra line (two lines between top leve
| |
49 def compare(symbols1, symbols2): | |
Primiano Tucci (use gerrit)
2014/05/20 15:22:57
Nit: Compare (capital case)
Also top level functio
| |
50 added = [] # tuples | |
51 removed = [] # tuples | |
52 changed = [] # tuples | |
53 unchanged = [] # tuples | |
54 | |
55 cache1 = {} | |
56 cache2 = {} | |
57 # Make a map of (file, symbol_type) : (symbol_name, symbol_size) | |
58 for cache, symbols in ((cache1, symbols1), (cache2, symbols2)): | |
59 for symbol_name, symbol_type, symbol_size, file_path in symbols: | |
60 if 'vtable for ' in symbol_name: | |
61 symbol_type = '@' # hack to categorize these separately | |
62 if file_path: | |
63 file_path = os.path.normpath(file_path) | |
64 else: | |
65 file_path = '(No Path)' | |
66 key = (file_path, symbol_type) | |
67 bucket = cache.get(key, None) | |
68 if not bucket: | |
Primiano Tucci (use gerrit)
2014/05/20 15:22:57
lines 68.70 can be just:
bucket = cache.setdefault
| |
69 bucket = {} | |
70 cache[key] = bucket | |
71 bucket[symbol_name] = symbol_size | |
72 | |
73 # Now diff them. We iterate over the elements in cache1. For each symbol | |
74 # that we find in cache2, we record whether it was deleted, changed, or | |
75 # unchanged. We then remove it from cache2; all the symbols that remain | |
76 # in cache2 at the end of the iteration over cache1 are the 'new' symbols. | |
77 for key, bucket1 in cache1.items(): | |
Primiano Tucci (use gerrit)
2014/05/20 15:22:57
s/items/iteritems/
Daniel Bratell
2014/05/21 08:42:13
cache1 is mutated during the iteration so it has t
| |
78 bucket2 = cache2.get(key, None) | |
Primiano Tucci (use gerrit)
2014/05/20 15:22:57
", None" is redundant, the default value for the
| |
79 if not bucket2: | |
80 # A file was removed. Everything in bucket1 is dead. | |
81 for symbol_name, symbol_size in bucket1.items(): | |
82 removed.append((key[0], key[1], symbol_name, symbol_size, None)) | |
83 else: | |
84 # File still exists, look for changes within. | |
85 for symbol_name, symbol_size in bucket1.items(): | |
86 size2 = bucket2.get(symbol_name, None) | |
87 if not size2: | |
88 # Symbol no longer exists in bucket2. | |
89 removed.append((key[0], key[1], symbol_name, symbol_size, None)) | |
90 else: | |
91 del bucket2[symbol_name] # Symbol is not new, delete from cache2. | |
92 if len(bucket2) == 0: | |
93 del cache1[key] # Entire bucket is empty, delete from cache2 | |
94 if symbol_size != size2: | |
95 # Symbol has change size in bucket. | |
96 changed.append((key[0], key[1], symbol_name, symbol_size, size2)) | |
97 else: | |
98 # Symbol is unchanged. | |
99 unchanged.append((key[0], key[1], symbol_name, symbol_size, size2)) | |
100 | |
101 # We have now analyzed all symbols that are in cache1 and removed all of | |
102 # the encountered symbols from cache2. What's left in cache2 is the new | |
103 # symbols. | |
104 for key, bucket2 in cache2.items(): | |
Primiano Tucci (use gerrit)
2014/05/20 15:22:57
s/items/iteritems/ here and below
| |
105 for symbol_name, symbol_size in bucket2.items(): | |
106 added.append((key[0], key[1], symbol_name, None, symbol_size)) | |
107 return [added, removed, changed, unchanged] | |
Primiano Tucci (use gerrit)
2014/05/20 15:22:57
Shouldn't this be a tuple rather than a list?
You
| |
108 | |
109 | |
110 def test_compare(): | |
Primiano Tucci (use gerrit)
2014/05/20 15:22:57
Many thanks for the test, yay! :)
Just, we typical
Daniel Bratell
2014/05/21 08:42:13
Let me make a case for keeping it in the same file
Primiano Tucci (use gerrit)
2014/05/21 10:05:59
Oh, actually, good point.
This should have a PRESU
| |
111 # List entries have form: symbol_name, symbol_type, symbol_size, file_path | |
112 symbol_list1 = ( | |
113 # File with one symbol, left as-is. | |
114 ( 'unchanged', 't', 1000, '/file_unchanged' ), | |
115 # File with one symbol, changed. | |
116 ( 'changed', 't', 1000, '/file_all_changed' ), | |
117 # File with one symbol, deleted. | |
118 ( 'removed', 't', 1000, '/file_all_deleted' ), | |
119 # File with two symbols, one unchanged, one changed, same bucket | |
120 ( 'unchanged', 't', 1000, '/file_pair_unchanged_changed' ), | |
121 ( 'changed', 't', 1000, '/file_pair_unchanged_changed' ), | |
122 # File with two symbols, one unchanged, one deleted, same bucket | |
123 ( 'unchanged', 't', 1000, '/file_pair_unchanged_removed' ), | |
124 ( 'removed', 't', 1000, '/file_pair_unchanged_removed' ), | |
125 # File with two symbols, one unchanged, one added, same bucket | |
126 ( 'unchanged', 't', 1000, '/file_pair_unchanged_added' ), | |
127 # File with two symbols, one unchanged, one changed, different bucket | |
128 ( 'unchanged', 't', 1000, '/file_pair_unchanged_diffbuck_changed' ), | |
129 ( 'changed', '@', 1000, '/file_pair_unchanged_diffbuck_changed' ), | |
130 # File with two symbols, one unchanged, one deleted, different bucket | |
131 ( 'unchanged', 't', 1000, '/file_pair_unchanged_diffbuck_removed' ), | |
132 ( 'removed', '@', 1000, '/file_pair_unchanged_diffbuck_removed' ), | |
133 # File with two symbols, one unchanged, one added, different bucket | |
134 ( 'unchanged', 't', 1000, '/file_pair_unchanged_diffbuck_added' ), | |
135 # File with four symbols, one added, one removed, one changed, one unchanged | |
136 ( 'size_changed', 't', 1000, '/file_tetra' ), | |
137 ( 'removed', 't', 1000, '/file_tetra' ), | |
138 ( 'unchanged', 't', 1000, '/file_tetra' ), | |
139 ); | |
140 | |
141 symbol_list2 = ( | |
142 # File with one symbol, left as-is. | |
143 ( 'unchanged', 't', 1000, '/file_unchanged' ), | |
144 # File with one symbol, changed. | |
145 ( 'changed', 't', 2000, '/file_all_changed' ), | |
146 # File with two symbols, one unchanged, one changed, same bucket | |
147 ( 'unchanged', 't', 1000, '/file_pair_unchanged_changed' ), | |
148 ( 'changed', 't', 2000, '/file_pair_unchanged_changed' ), | |
149 # File with two symbols, one unchanged, one deleted, same bucket | |
150 ( 'unchanged', 't', 1000, '/file_pair_unchanged_removed' ), | |
151 # File with two symbols, one unchanged, one added, same bucket | |
152 ( 'unchanged', 't', 1000, '/file_pair_unchanged_added' ), | |
153 ( 'added', 't', 1000, '/file_pair_unchanged_added' ), | |
154 # File with two symbols, one unchanged, one changed, different bucket | |
155 ( 'unchanged', 't', 1000, '/file_pair_unchanged_diffbuck_changed' ), | |
156 ( 'changed', '@', 2000, '/file_pair_unchanged_diffbuck_changed' ), | |
157 # File with two symbols, one unchanged, one deleted, different bucket | |
158 ( 'unchanged', 't', 1000, '/file_pair_unchanged_diffbuck_removed' ), | |
159 # File with two symbols, one unchanged, one added, different bucket | |
160 ( 'unchanged', 't', 1000, '/file_pair_unchanged_diffbuck_added' ), | |
161 ( 'added', '@', 1000, '/file_pair_unchanged_diffbuck_added' ), | |
162 # File with four symbols, one added, one removed, one changed, one unchanged | |
163 ( 'size_changed', 't', 2000, '/file_tetra' ), | |
164 ( 'unchanged', 't', 1000, '/file_tetra' ), | |
165 ( 'added', 't', 1000, '/file_tetra' ), | |
166 # New file with one symbol added | |
167 ( 'added', 't', 1000, '/file_new' ), | |
168 ); | |
169 | |
170 # Here we go | |
171 (added, removed, changed, unchanged) = compare(symbol_list1, symbol_list2) | |
172 | |
173 # File with one symbol, left as-is. | |
174 assert ('/file_unchanged', 't', 'unchanged', 1000, 1000) in unchanged | |
Primiano Tucci (use gerrit)
2014/05/20 15:22:57
Also, you might want to take a look to python unit
| |
175 # File with one symbol, changed. | |
176 assert ('/file_all_changed', 't', 'changed', 1000, 2000) in changed | |
177 # File with one symbol, deleted. | |
178 assert ('/file_all_deleted', 't', 'removed', 1000, None) in removed | |
179 # New file with one symbol added | |
180 assert ('/file_new', 't', 'added', None, 1000) in added | |
181 # File with two symbols, one unchanged, one changed, same bucket | |
182 assert ('/file_pair_unchanged_changed', | |
183 't', 'unchanged', 1000, 1000) in unchanged | |
184 assert ('/file_pair_unchanged_changed', | |
185 't', 'changed', 1000, 2000) in changed | |
186 # File with two symbols, one unchanged, one removed, same bucket | |
187 assert ('/file_pair_unchanged_removed', | |
188 't', 'unchanged', 1000, 1000) in unchanged | |
189 assert ('/file_pair_unchanged_removed', | |
190 't', 'removed', 1000, None) in removed | |
191 # File with two symbols, one unchanged, one added, same bucket | |
192 assert ('/file_pair_unchanged_added', | |
193 't', 'unchanged', 1000, 1000) in unchanged | |
194 assert ('/file_pair_unchanged_added', | |
195 't', 'added', None, 1000) in added | |
196 # File with two symbols, one unchanged, one changed, different bucket | |
197 assert ('/file_pair_unchanged_diffbuck_changed', | |
198 't', 'unchanged', 1000, 1000) in unchanged | |
199 assert ('/file_pair_unchanged_diffbuck_changed', | |
200 '@', 'changed', 1000, 2000) in changed | |
201 # File with two symbols, one unchanged, one removed, different bucket | |
202 assert ('/file_pair_unchanged_diffbuck_removed', | |
203 't', 'unchanged', 1000, 1000) in unchanged | |
204 assert ('/file_pair_unchanged_diffbuck_removed', | |
205 '@', 'removed', 1000, None) in removed | |
206 # File with two symbols, one unchanged, one added, different bucket | |
207 assert ('/file_pair_unchanged_diffbuck_added', | |
208 't', 'unchanged', 1000, 1000) in unchanged | |
209 assert ('/file_pair_unchanged_diffbuck_added', | |
210 '@', 'added', None, 1000) in added | |
211 # File with four symbols, one added, one removed, one changed, one unchanged | |
212 assert ('/file_tetra', 't', 'size_changed', 1000, 2000) in changed | |
213 assert ('/file_tetra', 't', 'unchanged', 1000, 1000) in unchanged | |
214 assert ('/file_tetra', 't', 'added', None, 1000) in added | |
215 assert ('/file_tetra', 't', 'removed', 1000, None) in removed | |
216 | |
217 # Now check final stats. | |
218 crunchStats(added, removed, changed, unchanged, True) | |
219 | |
Primiano Tucci (use gerrit)
2014/05/20 15:22:57
Nit: two lines betwen top levels
| |
220 def crunchStats(added, removed, changed, unchanged, showsources, showsymbols): | |
Primiano Tucci (use gerrit)
2014/05/20 15:22:57
Nit: casing s/crunch/Crunch/ + docstring
| |
221 print 'Symbol statistics:' | |
222 sources_with_new_symbols = set() | |
223 new_symbols_size = 0 | |
224 new_symbols_by_path = {} | |
225 for file_path, symbol_type, symbol_name, size1, size2 in added: | |
226 sources_with_new_symbols.add(file_path) | |
227 new_symbols_size += size2 | |
228 bucket = new_symbols_by_path.get(file_path, None) | |
Primiano Tucci (use gerrit)
2014/05/20 15:22:57
s/, None//
| |
229 if not bucket: | |
230 bucket = [] | |
231 new_symbols_by_path[file_path] = bucket | |
Primiano Tucci (use gerrit)
2014/05/20 15:22:57
229-231: bucket = new_symbols_by_path.setdefault(
| |
232 bucket.append((symbol_name, symbol_type, None, size2)) | |
233 print(' ' + str(len(added)) + ' added, totalling ' + | |
Primiano Tucci (use gerrit)
2014/05/20 15:22:57
Use string formatting?
i.e. print(' %d added, tot
| |
234 str(new_symbols_size) + ' bytes across ' + | |
235 str(len(sources_with_new_symbols)) + ' sources') | |
236 | |
237 sources_with_removed_symbols = set() | |
238 removed_symbols_size = 0 | |
239 removed_symbols_by_path = {} | |
240 for file_path, symbol_type, symbol_name, size1, size2 in removed: | |
241 sources_with_removed_symbols.add(file_path) | |
242 removed_symbols_size += size1 | |
243 bucket = removed_symbols_by_path.get(file_path, None) | |
244 if not bucket: | |
Primiano Tucci (use gerrit)
2014/05/20 15:22:57
ditto
| |
245 bucket = [] | |
246 removed_symbols_by_path[file_path] = bucket | |
247 bucket.append((symbol_name, symbol_type, size1, None)) | |
248 print(' ' + str(len(removed)) + ' removed, totalling ' + | |
249 str(removed_symbols_size) + ' bytes removed across ' + | |
250 str(len(sources_with_removed_symbols)) + ' sources') | |
251 | |
252 sources_with_changed_symbols = set() | |
253 before_size = 0 | |
254 after_size = 0 | |
255 changed_symbols_by_path = {} | |
256 for file_path, symbol_type, symbol_name, size1, size2 in changed: | |
257 sources_with_changed_symbols.add(file_path) | |
258 before_size += size1 | |
259 after_size += size2 | |
260 bucket = changed_symbols_by_path.get(file_path, None) | |
261 if not bucket: | |
Primiano Tucci (use gerrit)
2014/05/20 15:22:57
ditto (setdefault) here and below
| |
262 bucket = [] | |
263 changed_symbols_by_path[file_path] = bucket | |
264 bucket.append((symbol_name, symbol_type, size1, size2)) | |
265 print(' ' + str(len(changed)) + ' changed, resulting in a net change of ' + | |
Primiano Tucci (use gerrit)
2014/05/20 15:22:57
ditto (string format) here and below
| |
266 str(after_size - before_size) + ' bytes (' + | |
267 str(before_size) + ' bytes before, ' + | |
268 str(after_size) + ' bytes after) across ' + | |
269 str(len(sources_with_changed_symbols)) + ' sources') | |
270 | |
271 maybe_unchanged_sources = set() | |
272 unchanged_symbols_size = 0 | |
273 for file_path, symbol_type, symbol_name, size1, size2 in unchanged: | |
274 maybe_unchanged_sources.add(file_path) | |
275 unchanged_symbols_size += size1 # == size2 | |
276 print(' ' + str(len(unchanged)) + ' unchanged, totalling ' + | |
277 str(unchanged_symbols_size) + ' bytes') | |
278 | |
279 # High level analysis, always output. | |
280 unchanged_sources = (maybe_unchanged_sources - | |
281 sources_with_changed_symbols - | |
282 sources_with_removed_symbols - | |
283 sources_with_new_symbols) | |
284 new_sources = (sources_with_new_symbols - | |
285 maybe_unchanged_sources - | |
286 sources_with_removed_symbols) | |
287 removed_sources = (sources_with_removed_symbols - | |
288 maybe_unchanged_sources - | |
289 sources_with_new_symbols) | |
290 partially_changed_sources = (sources_with_changed_symbols | | |
291 sources_with_new_symbols | | |
292 sources_with_removed_symbols) - removed_sources - new_sources | |
293 allFiles = (sources_with_new_symbols | | |
294 sources_with_removed_symbols | | |
295 sources_with_changed_symbols | | |
296 maybe_unchanged_sources) | |
297 print 'Source stats: ' | |
298 print(' ' + str(len(allFiles)) + ' sources encountered.') | |
299 print(' ' + str(len(new_sources)) + ' completely new.') | |
300 print(' ' + str(len(removed_sources)) + ' removed completely.') | |
301 print(' ' + str(len(partially_changed_sources)) + ' partially changed.') | |
302 print(' ' + str(len(unchanged_sources)) + ' completely unchanged.') | |
303 remainder = (allFiles - new_sources - removed_sources - | |
304 partially_changed_sources - unchanged_sources) | |
305 assert len(remainder) == 0 | |
306 | |
307 if not showsources: return # Per-source analysis, only if requested | |
308 print 'Per-source Analysis:' | |
309 delta_by_path = {} | |
310 for path in new_symbols_by_path: | |
311 entry = delta_by_path.get(path, None) | |
312 if not entry: | |
313 entry = {'plus': 0, 'minus': 0}; | |
314 delta_by_path[path] = entry | |
315 for symbol_name, symbol_type, size1, size2 in new_symbols_by_path[path]: | |
316 entry['plus'] += size2 | |
317 for path in removed_symbols_by_path: | |
318 entry = delta_by_path.get(path, None) | |
319 if not entry: | |
320 entry = {'plus': 0, 'minus': 0}; | |
321 delta_by_path[path] = entry | |
322 for symbol_name, symbol_type, size1, size2 in removed_symbols_by_path[path]: | |
323 entry['minus'] += size1 | |
324 for path in changed_symbols_by_path: | |
325 entry = delta_by_path.get(path, None) | |
326 if not entry: | |
327 entry = {'plus': 0, 'minus': 0}; | |
328 delta_by_path[path] = entry | |
329 for symbol_name, symbol_type, size1, size2 in changed_symbols_by_path[path]: | |
330 delta = size2 - size1 | |
331 if delta > 0: | |
332 entry['plus'] += delta | |
333 else: | |
334 entry['minus'] += (-1 * delta) | |
335 | |
336 for path in sorted(delta_by_path): | |
337 print ' Source: ' + path | |
338 size_data = delta_by_path[path] | |
339 gain = size_data['plus'] | |
340 loss = size_data['minus'] | |
341 delta = size_data['plus'] - size_data['minus'] | |
342 print (' Change: ' + str(delta) + ' bytes (gained ' + | |
343 str(gain) + ', lost ' + str(loss) + ')') | |
344 if showsymbols: | |
345 if path in new_symbols_by_path: | |
346 print ' New symbols:' | |
347 for symbol_name, symbol_type, size1, size2 in \ | |
348 new_symbols_by_path[path]: | |
349 print (' ' + symbol_name + | |
350 ' type=' + symbol_type + ', size=' + str(size2) + ' bytes') | |
351 if path in removed_symbols_by_path: | |
352 print ' Removed symbols:' | |
353 for symbol_name, symbol_type, size1, size2 in \ | |
354 removed_symbols_by_path[path]: | |
355 print (' ' + symbol_name + | |
356 ' type=' + symbol_type + ', size=' + str(size1) + ' bytes') | |
357 if path in changed_symbols_by_path: | |
358 print ' Changed symbols:' | |
359 def sortkey(item): | |
360 symbol_name, symbol_type, size1, size2 = item | |
361 return (size1 - size2, symbol_name) | |
362 for symbol_name, symbol_type, size1, size2 in \ | |
363 sorted(changed_symbols_by_path[path], key=sortkey): | |
364 print (' ' + symbol_name + | |
365 ' type=' + symbol_type + ', delta=' + str(size2 - size1) + | |
366 ' bytes (was ' + str(size1) + | |
367 ' bytes, now ' + str(size2) + ' bytes)') | |
368 | |
369 def main(): | |
370 usage="""%prog [options] | |
371 | |
372 Runs a spatial analysis on a given library, looking up the source locations | |
Andrew Hayden (chromium.org)
2014/05/20 15:10:51
This stuff is out of date. As you can tell, I star
| |
373 of its symbols and calculating how much space each directory, source file, | |
374 and so on is taking. The result is a report that can be used to pinpoint | |
375 sources of large portions of the binary, etceteras. | |
376 | |
377 Under normal circumstances, you only need to pass two arguments, thusly: | |
378 | |
379 %prog --library /path/to/library --destdir /path/to/output | |
380 | |
381 In this mode, the program will dump the symbols from the specified library | |
382 and map those symbols back to source locations, producing a web-based | |
383 report in the specified output directory. | |
384 | |
385 Other options are available via '--help'. | |
386 """ | |
387 parser = optparse.OptionParser(usage=usage) | |
388 parser.add_option('--nm1', metavar='PATH', | |
389 help='the nm dump of the first library') | |
390 parser.add_option('--nm2', metavar='PATH', | |
391 help='the nm dump of the second library') | |
392 parser.add_option('--showsources', action='store_true', default=False, | |
393 help='show per-source statistics') | |
394 parser.add_option('--showsymbols', action='store_true', default=False, | |
395 help='show all symbol information; implies --showfiles') | |
396 parser.add_option('--verbose', action='store_true', default=False, | |
397 help='output internal debugging stuff') | |
398 opts, args = parser.parse_args() | |
399 | |
400 if not opts.nm1: | |
401 parser.error('--nm1 is required') | |
402 if not opts.nm2: | |
403 parser.error('--nm2 is required') | |
404 symbols = [] | |
405 for path in [opts.nm1, opts.nm2]: | |
406 with file(path, 'r') as input: | |
407 if opts.verbose: print 'parsing ' + path + '...' | |
408 symbols.append(list(binary_size_utils.ParseNm(input))) | |
409 (added, removed, changed, unchanged) = compare(symbols[0], symbols[1]) | |
410 crunchStats(added, removed, changed, unchanged, | |
411 opts.showsources | opts.showsymbols, opts.showsymbols) | |
412 | |
413 if __name__ == '__main__': | |
414 sys.exit(main()) | |
OLD | NEW |