Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1253)

Side by Side Diff: tools/binary_size/libsupersize/models.py

Issue 2832253004: supersize: nm in progress (Closed)
Patch Set: supersize: Track symbol aliases and shared symbols Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « tools/binary_size/libsupersize/main.py ('k') | tools/binary_size/libsupersize/ninja_parser.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2017 The Chromium Authors. All rights reserved. 1 # Copyright 2017 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 """Classes that comprise the data model for binary size analysis. 4 """Classes that comprise the data model for binary size analysis.
5 5
6 The primary classes are Symbol, and SymbolGroup. 6 The primary classes are Symbol, and SymbolGroup.
7 7
8 Description of common properties: 8 Description of common properties:
9 * address: The start address of the symbol. 9 * address: The start address of the symbol.
10 May be 0 (e.g. for .bss or for SymbolGroups). 10 May be 0 (e.g. for .bss or for SymbolGroups).
11 * size: The number of bytes this symbol takes up, including padding that comes 11 * size: The number of bytes this symbol takes up, including padding that comes
12 before |address|. 12 before |address|.
13 * num_aliases: The number of symbols with the same address (including self).
14 * pss: size / num_aliases.
13 * padding: The number of bytes of padding before |address| due to this symbol. 15 * padding: The number of bytes of padding before |address| due to this symbol.
14 * name: Symbol names with parameter list removed. 16 * name: Symbol names with parameter list removed.
15 Never None, but will be '' for anonymous symbols. 17 Never None, but will be '' for anonymous symbols.
16 * full_name: Symbols names with parameter list left in. 18 * full_name: Symbols names with parameter list left in.
17 Never None, but will be '' for anonymous symbols, and for symbols that do 19 Never None, but will be '' for anonymous symbols, and for symbols that do
18 not contain a parameter list. 20 not contain a parameter list.
19 * is_anonymous: True when the symbol exists in an anonymous namespace (which 21 * is_anonymous: True when the symbol exists in an anonymous namespace (which
20 are removed from both full_name and name during normalization). 22 are removed from both full_name and name during normalization).
21 * section_name: E.g. ".text", ".rodata", ".data.rel.local" 23 * section_name: E.g. ".text", ".rodata", ".data.rel.local"
22 * section: The second character of |section_name|. E.g. "t", "r", "d". 24 * section: The second character of |section_name|. E.g. "t", "r", "d".
23 """ 25 """
24 26
25 import collections 27 import collections
26 import copy 28 import logging
27 import os 29 import os
28 import re 30 import re
29 31
30 import match_util 32 import match_util
31 33
32 34
33 METADATA_GIT_REVISION = 'git_revision' 35 METADATA_GIT_REVISION = 'git_revision'
34 METADATA_APK_FILENAME = 'apk_file_name' # Path relative to output_directory. 36 METADATA_APK_FILENAME = 'apk_file_name' # Path relative to output_directory.
35 METADATA_MAP_FILENAME = 'map_file_name' # Path relative to output_directory. 37 METADATA_MAP_FILENAME = 'map_file_name' # Path relative to output_directory.
36 METADATA_ELF_ARCHITECTURE = 'elf_arch' # "Machine" field from readelf -h 38 METADATA_ELF_ARCHITECTURE = 'elf_arch' # "Machine" field from readelf -h
(...skipping 10 matching lines...) Expand all
47 't': '.text', 49 't': '.text',
48 } 50 }
49 51
50 FLAG_ANONYMOUS = 1 52 FLAG_ANONYMOUS = 1
51 FLAG_STARTUP = 2 53 FLAG_STARTUP = 2
52 FLAG_UNLIKELY = 4 54 FLAG_UNLIKELY = 4
53 FLAG_REL = 8 55 FLAG_REL = 8
54 FLAG_REL_LOCAL = 16 56 FLAG_REL_LOCAL = 16
55 57
56 58
59 def StripCloneSuffix(name):
60 clone_idx = name.find(' [clone ')
61 if clone_idx != -1:
62 return name[:clone_idx]
63 return name
64
65
57 class SizeInfo(object): 66 class SizeInfo(object):
58 """Represents all size information for a single binary. 67 """Represents all size information for a single binary.
59 68
60 Fields: 69 Fields:
61 section_sizes: A dict of section_name -> size. 70 section_sizes: A dict of section_name -> size.
62 raw_symbols: A flat list of all symbols. 71 raw_symbols: A flat list of all symbols.
63 symbols: A SymbolGroup containing raw_symbols, but with some Symbols grouped 72 symbols: A SymbolGroup containing raw_symbols, but with some Symbols grouped
64 into sub-SymbolGroups. 73 into sub-SymbolGroups.
65 metadata: A dict. 74 metadata: A dict.
66 """ 75 """
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after
125 return self.size - self.padding 134 return self.size - self.padding
126 135
127 @property 136 @property
128 def end_address(self): 137 def end_address(self):
129 return self.address + self.size_without_padding 138 return self.address + self.size_without_padding
130 139
131 @property 140 @property
132 def is_anonymous(self): 141 def is_anonymous(self):
133 return bool(self.flags & FLAG_ANONYMOUS) 142 return bool(self.flags & FLAG_ANONYMOUS)
134 143
144 @property
145 def num_aliases(self):
146 return len(self.aliases) if self.aliases else 1
147
135 def FlagsString(self): 148 def FlagsString(self):
136 # Most flags are 0. 149 # Most flags are 0.
137 flags = self.flags 150 flags = self.flags
138 if not flags: 151 if not flags and not self.aliases:
139 return '{}' 152 return '{}'
140 parts = [] 153 parts = []
141 if flags & FLAG_ANONYMOUS: 154 if flags & FLAG_ANONYMOUS:
142 parts.append('anon') 155 parts.append('anon')
143 if flags & FLAG_STARTUP: 156 if flags & FLAG_STARTUP:
144 parts.append('startup') 157 parts.append('startup')
145 if flags & FLAG_UNLIKELY: 158 if flags & FLAG_UNLIKELY:
146 parts.append('unlikely') 159 parts.append('unlikely')
147 if flags & FLAG_REL: 160 if flags & FLAG_REL:
148 parts.append('rel') 161 parts.append('rel')
149 if flags & FLAG_REL_LOCAL: 162 if flags & FLAG_REL_LOCAL:
150 parts.append('rel.loc') 163 parts.append('rel.loc')
164 # Not actually a part of flags, but useful to show it here.
165 if self.aliases:
166 parts.append('{} aliases'.format(self.num_aliases))
151 return '{%s}' % ','.join(parts) 167 return '{%s}' % ','.join(parts)
152 168
153 def IsBss(self): 169 def IsBss(self):
154 return self.section_name == '.bss' 170 return self.section_name == '.bss'
155 171
156 def IsGroup(self): 172 def IsGroup(self):
157 return False 173 return False
158 174
159 def IsGenerated(self): 175 def IsGenerated(self):
160 # TODO(agrieve): Also match generated functions such as: 176 # TODO(agrieve): Also match generated functions such as:
161 # startup._GLOBAL__sub_I_page_allocator.cc 177 # startup._GLOBAL__sub_I_page_allocator.cc
162 return self.name.endswith(']') and not self.name.endswith('[]') 178 return self.name.endswith(']') and not self.name.endswith('[]')
163 179
164 def _Key(self): 180 def _Key(self):
165 """Returns a tuple that can be used to see if two Symbol are the same. 181 """Returns a tuple that can be used to see if two Symbol are the same.
166 182
167 Keys are not guaranteed to be unique within a SymbolGroup. For example, it 183 Keys are not guaranteed to be unique within a SymbolGroup. For example, it
168 is common to have multiple "** merge strings" symbols, which will have a 184 is common to have multiple "** merge strings" symbols, which will have a
169 common key.""" 185 common key."""
170 stripped_full_name = self.full_name 186 stripped_full_name = self.full_name
171 if stripped_full_name: 187 if stripped_full_name:
172 clone_idx = stripped_full_name.find(' [clone ') 188 stripped_full_name = StripCloneSuffix(stripped_full_name)
173 if clone_idx != -1:
174 stripped_full_name = stripped_full_name[:clone_idx]
175 return (self.section_name, stripped_full_name or self.name) 189 return (self.section_name, stripped_full_name or self.name)
176 190
177 191
178 class Symbol(BaseSymbol): 192 class Symbol(BaseSymbol):
179 """Represents a single symbol within a binary. 193 """Represents a single symbol within a binary.
180 194
181 Refer to module docs for field descriptions. 195 Refer to module docs for field descriptions.
182 """ 196 """
183 197
184 __slots__ = ( 198 __slots__ = (
185 'address', 199 'address',
186 'full_name', 200 'full_name',
187 'flags', 201 'flags',
188 'object_path', 202 'object_path',
189 'name', 203 'name',
204 'aliases',
190 'padding', 205 'padding',
191 'section_name', 206 'section_name',
192 'source_path', 207 'source_path',
193 'size', 208 'size',
194 ) 209 )
195 210
196 def __init__(self, section_name, size_without_padding, address=None, 211 def __init__(self, section_name, size_without_padding, address=None,
197 name=None, source_path=None, object_path=None, full_name=None, 212 name=None, source_path=None, object_path=None, full_name=None,
198 flags=0): 213 flags=0, aliases=None):
199 self.section_name = section_name 214 self.section_name = section_name
200 self.address = address or 0 215 self.address = address or 0
201 self.name = name or '' 216 self.name = name or ''
202 self.full_name = full_name or '' 217 self.full_name = full_name or ''
203 self.source_path = source_path or '' 218 self.source_path = source_path or ''
204 self.object_path = object_path or '' 219 self.object_path = object_path or ''
205 self.size = size_without_padding 220 self.size = size_without_padding
206 self.flags = flags 221 self.flags = flags
222 self.aliases = aliases
207 self.padding = 0 223 self.padding = 0
208 224
209 def __repr__(self): 225 def __repr__(self):
210 return ('%s@%x(size_without_padding=%d,padding=%d,name=%s,path=%s,flags=%s)' 226 return ('%s@%x(size_without_padding=%d,padding=%d,name=%s,object_path=%s,sou rce_path=%s,flags=%s)'
211 % (self.section_name, self.address, self.size_without_padding, 227 % (self.section_name, self.address, self.size_without_padding,
212 self.padding, self.name, self.source_path or self.object_path, 228 self.padding, self.name, self.object_path, self.source_path,
213 self.FlagsString())) 229 self.FlagsString()))
214 230
231 @property
232 def pss(self):
233 return float(self.size) / self.num_aliases
234
235 @property
236 def pss_without_padding(self):
237 return float(self.size_without_padding) / self.num_aliases
238
215 239
216 class SymbolGroup(BaseSymbol): 240 class SymbolGroup(BaseSymbol):
217 """Represents a group of symbols using the same interface as Symbol. 241 """Represents a group of symbols using the same interface as Symbol.
218 242
219 SymbolGroups are immutable. All filtering / sorting will return new 243 SymbolGroups are immutable. All filtering / sorting will return new
220 SymbolGroups objects. 244 SymbolGroups objects.
221 245
222 Overrides many __functions__. E.g. the following are all valid: 246 Overrides many __functions__. E.g. the following are all valid:
223 * len(group) 247 * len(group)
224 * iter(group) 248 * iter(group)
225 * group[0] 249 * group[0]
226 * group['0x1234'] # By symbol address 250 * group['0x1234'] # By symbol address
227 * without_group2 = group1 - group2 251 * without_group2 = group1 - group2
228 * unioned = group1 + group2 252 * unioned = group1 + group2
229 """ 253 """
230 254
231 __slots__ = ( 255 __slots__ = (
232 '_padding', 256 '_padding',
233 '_size', 257 '_size',
258 '_pss',
234 '_symbols', 259 '_symbols',
235 '_filtered_symbols', 260 '_filtered_symbols',
236 'full_name', 261 'full_name',
237 'name', 262 'name',
238 'section_name', 263 'section_name',
239 'is_sorted', 264 'is_sorted',
240 ) 265 )
241 266
242 def __init__(self, symbols, filtered_symbols=None, name=None, 267 def __init__(self, symbols, filtered_symbols=None, name=None,
243 full_name=None, section_name=None, is_sorted=False): 268 full_name=None, section_name=None, is_sorted=False):
244 self._padding = None 269 self._padding = None
245 self._size = None 270 self._size = None
271 self._pss = None
246 self._symbols = symbols 272 self._symbols = symbols
247 self._filtered_symbols = filtered_symbols or [] 273 self._filtered_symbols = filtered_symbols or []
248 self.name = name or '' 274 self.name = name or ''
249 self.full_name = full_name 275 self.full_name = full_name
250 self.section_name = section_name or '.*' 276 self.section_name = section_name or '.*'
251 self.is_sorted = is_sorted 277 self.is_sorted = is_sorted
252 278
253 def __repr__(self): 279 def __repr__(self):
254 return 'Group(name=%s,count=%d,size=%d)' % ( 280 return 'Group(name=%s,count=%d,size=%d)' % (
255 self.name, len(self), self.size) 281 self.name, len(self), self.size)
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
295 return first if all(s.address == first for s in self._symbols) else 0 321 return first if all(s.address == first for s in self._symbols) else 0
296 322
297 @property 323 @property
298 def flags(self): 324 def flags(self):
299 first = self._symbols[0].flags 325 first = self._symbols[0].flags
300 return first if all(s.flags == first for s in self._symbols) else 0 326 return first if all(s.flags == first for s in self._symbols) else 0
301 327
302 @property 328 @property
303 def object_path(self): 329 def object_path(self):
304 first = self._symbols[0].object_path 330 first = self._symbols[0].object_path
305 return first if all(s.object_path == first for s in self._symbols) else None 331 return first if all(s.object_path == first for s in self._symbols) else ''
306 332
307 @property 333 @property
308 def source_path(self): 334 def source_path(self):
309 first = self._symbols[0].source_path 335 first = self._symbols[0].source_path
310 return first if all(s.source_path == first for s in self._symbols) else None 336 return first if all(s.source_path == first for s in self._symbols) else ''
337
338 def IterUniqueSymbols(self):
339 seen_aliases_lists = set()
340 for s in self:
341 if not s.aliases:
342 yield s
343 elif id(s.aliases) not in seen_aliases_lists:
344 seen_aliases_lists.add(id(s.aliases))
345 yield s
311 346
312 @property 347 @property
313 def size(self): 348 def size(self):
314 if self._size is None: 349 if self._size is None:
315 if self.IsBss(): 350 if self.IsBss():
316 self._size = sum(s.size for s in self) 351 self._size = sum(s.size for s in self)
317 self._size = sum(s.size for s in self if not s.IsBss()) 352 else:
353 self._size = sum(s.size for s in self.IterUniqueSymbols())
318 return self._size 354 return self._size
319 355
320 @property 356 @property
357 def pss(self):
358 if self._pss is None:
359 if self.IsBss():
360 self._pss = self.size
361 else:
362 self._pss = sum(s.pss for s in self)
363 return self._pss
364
365 @property
321 def padding(self): 366 def padding(self):
322 if self._padding is None: 367 if self._padding is None:
323 self._padding = sum(s.padding for s in self) 368 self._padding = sum(s.padding for s in self.IterUniqueSymbols())
324 return self._padding 369 return self._padding
325 370
371 @property
372 def aliases(self):
373 return None
374
326 def IsGroup(self): 375 def IsGroup(self):
327 return True 376 return True
328 377
329 def _CreateTransformed(self, symbols, filtered_symbols=None, name=None, 378 def _CreateTransformed(self, symbols, filtered_symbols=None, name=None,
330 section_name=None, is_sorted=None): 379 section_name=None, is_sorted=None):
331 if is_sorted is None: 380 if is_sorted is None:
332 is_sorted = self.is_sorted 381 is_sorted = self.is_sorted
333 return SymbolGroup(symbols, filtered_symbols=filtered_symbols, name=name, 382 return SymbolGroup(symbols, filtered_symbols=filtered_symbols, name=name,
334 section_name=section_name, is_sorted=is_sorted) 383 section_name=section_name, is_sorted=is_sorted)
335 384
(...skipping 13 matching lines...) Expand all
349 398
350 def SortedByAddress(self, reverse=False): 399 def SortedByAddress(self, reverse=False):
351 return self.Sorted(key=(lambda s:s.address), reverse=reverse) 400 return self.Sorted(key=(lambda s:s.address), reverse=reverse)
352 401
353 def SortedByCount(self, reverse=False): 402 def SortedByCount(self, reverse=False):
354 return self.Sorted(key=(lambda s:len(s) if s.IsGroup() else 1), 403 return self.Sorted(key=(lambda s:len(s) if s.IsGroup() else 1),
355 reverse=not reverse) 404 reverse=not reverse)
356 405
357 def Filter(self, func): 406 def Filter(self, func):
358 filtered_and_kept = ([], []) 407 filtered_and_kept = ([], [])
359 for symbol in self: 408 symbol = None
360 filtered_and_kept[int(bool(func(symbol)))].append(symbol) 409 try:
410 for symbol in self:
411 filtered_and_kept[int(bool(func(symbol)))].append(symbol)
412 except:
413 logging.warning('Filter failed on symbol %r', symbol)
414 raise
415
361 return self._CreateTransformed(filtered_and_kept[1], 416 return self._CreateTransformed(filtered_and_kept[1],
362 filtered_symbols=filtered_and_kept[0], 417 filtered_symbols=filtered_and_kept[0],
363 section_name=self.section_name) 418 section_name=self.section_name)
364 419
365 def WhereBiggerThan(self, min_size): 420 def WhereBiggerThan(self, min_size):
366 return self.Filter(lambda s: s.size >= min_size) 421 return self.Filter(lambda s: s.size >= min_size)
367 422
368 def WhereInSection(self, section): 423 def WhereInSection(self, section):
369 if len(section) == 1: 424 if len(section) == 1:
370 ret = self.Filter(lambda s: s.section == section) 425 ret = self.Filter(lambda s: s.section == section)
(...skipping 237 matching lines...) Expand 10 before | Expand all | Expand 10 after
608 key = id(sym) 663 key = id(sym)
609 return key not in self._added_ids and key not in self._removed_ids 664 return key not in self._added_ids and key not in self._removed_ids
610 665
611 def IsRemoved(self, sym): 666 def IsRemoved(self, sym):
612 return id(sym) in self._removed_ids 667 return id(sym) in self._removed_ids
613 668
614 def WhereNotUnchanged(self): 669 def WhereNotUnchanged(self):
615 return self.Filter(lambda s: not self.IsSimilar(s) or s.size) 670 return self.Filter(lambda s: not self.IsSimilar(s) or s.size)
616 671
617 672
618 def Diff(before, after):
619 """Diffs two SizeInfo or SymbolGroup objects.
620
621 When diffing SizeInfos, a SizeInfoDiff is returned.
622 When diffing SymbolGroups, a SymbolDiff is returned.
623
624 Returns:
625 Returns a SizeInfo when args are of type SizeInfo.
626 Returns a SymbolDiff when args are of type SymbolGroup.
627 """
628 if isinstance(after, SizeInfo):
629 assert isinstance(before, SizeInfo)
630 section_sizes = {k: after.section_sizes[k] - v
631 for k, v in before.section_sizes.iteritems()}
632 symbol_diff = _DiffSymbols(before.symbols, after.symbols)
633 return SizeInfoDiff(section_sizes, symbol_diff, before.metadata,
634 after.metadata)
635
636 assert isinstance(after, SymbolGroup) and isinstance(before, SymbolGroup)
637 return _DiffSymbols(before, after)
638
639
640 def _NegateAll(symbols):
641 ret = []
642 for symbol in symbols:
643 if symbol.IsGroup():
644 duped = SymbolDiff([], _NegateAll(symbol), [], name=symbol.name,
645 full_name=symbol.full_name,
646 section_name=symbol.section_name)
647 else:
648 duped = copy.copy(symbol)
649 duped.size = -duped.size
650 duped.padding = -duped.padding
651 ret.append(duped)
652 return ret
653
654
655 def _DiffSymbols(before, after):
656 symbols_by_key = collections.defaultdict(list)
657 for s in before:
658 symbols_by_key[s._Key()].append(s)
659
660 added = []
661 similar = []
662 # For similar symbols, padding is zeroed out. In order to not lose the
663 # information entirely, store it in aggregate.
664 padding_by_section_name = collections.defaultdict(int)
665 for after_sym in after:
666 matching_syms = symbols_by_key.get(after_sym._Key())
667 if matching_syms:
668 before_sym = matching_syms.pop(0)
669 if before_sym.IsGroup() and after_sym.IsGroup():
670 merged_sym = _DiffSymbols(before_sym, after_sym)
671 else:
672 size_diff = (after_sym.size_without_padding -
673 before_sym.size_without_padding)
674 merged_sym = Symbol(after_sym.section_name, size_diff,
675 address=after_sym.address, name=after_sym.name,
676 source_path=after_sym.source_path,
677 object_path=after_sym.object_path,
678 full_name=after_sym.full_name,
679 flags=after_sym.flags)
680
681 # Diffs are more stable when comparing size without padding, except when
682 # the symbol is a padding-only symbol.
683 if after_sym.size_without_padding == 0 and size_diff == 0:
684 merged_sym.padding = after_sym.padding - before_sym.padding
685 else:
686 padding_by_section_name[after_sym.section_name] += (
687 after_sym.padding - before_sym.padding)
688
689 similar.append(merged_sym)
690 else:
691 added.append(after_sym)
692
693 removed = []
694 for remaining_syms in symbols_by_key.itervalues():
695 if remaining_syms:
696 removed.extend(_NegateAll(remaining_syms))
697
698 for section_name, padding in padding_by_section_name.iteritems():
699 if padding != 0:
700 similar.append(Symbol(section_name, padding,
701 name="** aggregate padding of diff'ed symbols"))
702 return SymbolDiff(added, removed, similar, name=after.name,
703 full_name=after.full_name,
704 section_name=after.section_name)
705
706
707 def _ExtractPrefixBeforeSeparator(string, separator, count=1): 673 def _ExtractPrefixBeforeSeparator(string, separator, count=1):
708 idx = -len(separator) 674 idx = -len(separator)
709 prev_idx = None 675 prev_idx = None
710 for _ in xrange(count): 676 for _ in xrange(count):
711 idx = string.find(separator, idx + len(separator)) 677 idx = string.find(separator, idx + len(separator))
712 if idx < 0: 678 if idx < 0:
713 break 679 break
714 prev_idx = idx 680 prev_idx = idx
715 return string[:prev_idx] 681 return string[:prev_idx]
OLDNEW
« no previous file with comments | « tools/binary_size/libsupersize/main.py ('k') | tools/binary_size/libsupersize/ninja_parser.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698