Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(140)

Side by Side Diff: tools/binary_size/libsupersize/models.py

Issue 2851473003: supersize: Track symbol aliases and shared symbols (Closed)
Patch Set: fix regression in calculate padding introduced in ps3 Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « tools/binary_size/libsupersize/main.py ('k') | tools/binary_size/libsupersize/ninja_parser.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2017 The Chromium Authors. All rights reserved. 1 # Copyright 2017 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 """Classes that comprise the data model for binary size analysis. 4 """Classes that comprise the data model for binary size analysis.
5 5
6 The primary classes are Symbol, and SymbolGroup. 6 The primary classes are Symbol, and SymbolGroup.
7 7
8 Description of common properties: 8 Description of common properties:
9 * address: The start address of the symbol. 9 * address: The start address of the symbol.
10 May be 0 (e.g. for .bss or for SymbolGroups). 10 May be 0 (e.g. for .bss or for SymbolGroups).
11 * size: The number of bytes this symbol takes up, including padding that comes 11 * size: The number of bytes this symbol takes up, including padding that comes
12 before |address|. 12 before |address|.
13 * num_aliases: The number of symbols with the same address (including self).
14 * pss: size / num_aliases.
13 * padding: The number of bytes of padding before |address| due to this symbol. 15 * padding: The number of bytes of padding before |address| due to this symbol.
14 * name: Symbol names with parameter list removed. 16 * name: Symbol names with parameter list removed.
15 Never None, but will be '' for anonymous symbols. 17 Never None, but will be '' for anonymous symbols.
16 * full_name: Symbols names with parameter list left in. 18 * full_name: Symbols names with parameter list left in.
17 Never None, but will be '' for anonymous symbols, and for symbols that do 19 Never None, but will be '' for anonymous symbols, and for symbols that do
18 not contain a parameter list. 20 not contain a parameter list.
19 * is_anonymous: True when the symbol exists in an anonymous namespace (which 21 * is_anonymous: True when the symbol exists in an anonymous namespace (which
20 are removed from both full_name and name during normalization). 22 are removed from both full_name and name during normalization).
21 * section_name: E.g. ".text", ".rodata", ".data.rel.local" 23 * section_name: E.g. ".text", ".rodata", ".data.rel.local"
22 * section: The second character of |section_name|. E.g. "t", "r", "d". 24 * section: The second character of |section_name|. E.g. "t", "r", "d".
23 """ 25 """
24 26
25 import collections 27 import collections
26 import copy 28 import logging
27 import os 29 import os
28 import re 30 import re
29 31
30 import match_util 32 import match_util
31 33
32 34
33 METADATA_GIT_REVISION = 'git_revision' 35 METADATA_GIT_REVISION = 'git_revision'
34 METADATA_APK_FILENAME = 'apk_file_name' # Path relative to output_directory. 36 METADATA_APK_FILENAME = 'apk_file_name' # Path relative to output_directory.
35 METADATA_MAP_FILENAME = 'map_file_name' # Path relative to output_directory. 37 METADATA_MAP_FILENAME = 'map_file_name' # Path relative to output_directory.
36 METADATA_ELF_ARCHITECTURE = 'elf_arch' # "Machine" field from readelf -h 38 METADATA_ELF_ARCHITECTURE = 'elf_arch' # "Machine" field from readelf -h
(...skipping 10 matching lines...) Expand all
47 't': '.text', 49 't': '.text',
48 } 50 }
49 51
50 FLAG_ANONYMOUS = 1 52 FLAG_ANONYMOUS = 1
51 FLAG_STARTUP = 2 53 FLAG_STARTUP = 2
52 FLAG_UNLIKELY = 4 54 FLAG_UNLIKELY = 4
53 FLAG_REL = 8 55 FLAG_REL = 8
54 FLAG_REL_LOCAL = 16 56 FLAG_REL_LOCAL = 16
55 57
56 58
59 def _StripCloneSuffix(name):
60 clone_idx = name.find(' [clone ')
61 if clone_idx != -1:
62 return name[:clone_idx]
63 return name
64
65
57 class SizeInfo(object): 66 class SizeInfo(object):
58 """Represents all size information for a single binary. 67 """Represents all size information for a single binary.
59 68
60 Fields: 69 Fields:
61 section_sizes: A dict of section_name -> size. 70 section_sizes: A dict of section_name -> size.
62 raw_symbols: A flat list of all symbols. 71 raw_symbols: A flat list of all symbols.
63 symbols: A SymbolGroup containing raw_symbols, but with some Symbols grouped 72 symbols: A SymbolGroup containing raw_symbols, but with some Symbols grouped
64 into sub-SymbolGroups. 73 into sub-SymbolGroups.
65 metadata: A dict. 74 metadata: A dict.
66 """ 75 """
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after
125 return self.size - self.padding 134 return self.size - self.padding
126 135
127 @property 136 @property
128 def end_address(self): 137 def end_address(self):
129 return self.address + self.size_without_padding 138 return self.address + self.size_without_padding
130 139
131 @property 140 @property
132 def is_anonymous(self): 141 def is_anonymous(self):
133 return bool(self.flags & FLAG_ANONYMOUS) 142 return bool(self.flags & FLAG_ANONYMOUS)
134 143
144 @property
145 def num_aliases(self):
146 return len(self.aliases) if self.aliases else 1
147
135 def FlagsString(self): 148 def FlagsString(self):
136 # Most flags are 0. 149 # Most flags are 0.
137 flags = self.flags 150 flags = self.flags
138 if not flags: 151 if not flags and not self.aliases:
139 return '{}' 152 return '{}'
140 parts = [] 153 parts = []
141 if flags & FLAG_ANONYMOUS: 154 if flags & FLAG_ANONYMOUS:
142 parts.append('anon') 155 parts.append('anon')
143 if flags & FLAG_STARTUP: 156 if flags & FLAG_STARTUP:
144 parts.append('startup') 157 parts.append('startup')
145 if flags & FLAG_UNLIKELY: 158 if flags & FLAG_UNLIKELY:
146 parts.append('unlikely') 159 parts.append('unlikely')
147 if flags & FLAG_REL: 160 if flags & FLAG_REL:
148 parts.append('rel') 161 parts.append('rel')
149 if flags & FLAG_REL_LOCAL: 162 if flags & FLAG_REL_LOCAL:
150 parts.append('rel.loc') 163 parts.append('rel.loc')
164 # Not actually a part of flags, but useful to show it here.
165 if self.aliases:
166 parts.append('{} aliases'.format(self.num_aliases))
151 return '{%s}' % ','.join(parts) 167 return '{%s}' % ','.join(parts)
152 168
153 def IsBss(self): 169 def IsBss(self):
154 return self.section_name == '.bss' 170 return self.section_name == '.bss'
155 171
156 def IsGroup(self): 172 def IsGroup(self):
157 return False 173 return False
158 174
159 def IsGenerated(self): 175 def IsGenerated(self):
160 # TODO(agrieve): Also match generated functions such as: 176 # TODO(agrieve): Also match generated functions such as:
161 # startup._GLOBAL__sub_I_page_allocator.cc 177 # startup._GLOBAL__sub_I_page_allocator.cc
162 return self.name.endswith(']') and not self.name.endswith('[]') 178 return self.name.endswith(']') and not self.name.endswith('[]')
163 179
164 def _Key(self): 180 def _Key(self):
165 """Returns a tuple that can be used to see if two Symbol are the same. 181 """Returns a tuple that can be used to see if two Symbol are the same.
166 182
167 Keys are not guaranteed to be unique within a SymbolGroup. For example, it 183 Keys are not guaranteed to be unique within a SymbolGroup. For example, it
168 is common to have multiple "** merge strings" symbols, which will have a 184 is common to have multiple "** merge strings" symbols, which will have a
169 common key.""" 185 common key."""
170 stripped_full_name = self.full_name 186 stripped_full_name = self.full_name
171 if stripped_full_name: 187 if stripped_full_name:
172 clone_idx = stripped_full_name.find(' [clone ') 188 stripped_full_name = _StripCloneSuffix(stripped_full_name)
173 if clone_idx != -1:
174 stripped_full_name = stripped_full_name[:clone_idx]
175 return (self.section_name, stripped_full_name or self.name) 189 return (self.section_name, stripped_full_name or self.name)
176 190
177 191
178 class Symbol(BaseSymbol): 192 class Symbol(BaseSymbol):
179 """Represents a single symbol within a binary. 193 """Represents a single symbol within a binary.
180 194
181 Refer to module docs for field descriptions. 195 Refer to module docs for field descriptions.
182 """ 196 """
183 197
184 __slots__ = ( 198 __slots__ = (
185 'address', 199 'address',
186 'full_name', 200 'full_name',
187 'flags', 201 'flags',
188 'object_path', 202 'object_path',
189 'name', 203 'name',
204 'aliases',
190 'padding', 205 'padding',
191 'section_name', 206 'section_name',
192 'source_path', 207 'source_path',
193 'size', 208 'size',
194 ) 209 )
195 210
196 def __init__(self, section_name, size_without_padding, address=None, 211 def __init__(self, section_name, size_without_padding, address=None,
197 name=None, source_path=None, object_path=None, full_name=None, 212 name=None, source_path=None, object_path=None, full_name=None,
198 flags=0): 213 flags=0, aliases=None):
199 self.section_name = section_name 214 self.section_name = section_name
200 self.address = address or 0 215 self.address = address or 0
201 self.name = name or '' 216 self.name = name or ''
202 self.full_name = full_name or '' 217 self.full_name = full_name or ''
203 self.source_path = source_path or '' 218 self.source_path = source_path or ''
204 self.object_path = object_path or '' 219 self.object_path = object_path or ''
205 self.size = size_without_padding 220 self.size = size_without_padding
206 self.flags = flags 221 self.flags = flags
222 self.aliases = aliases
207 self.padding = 0 223 self.padding = 0
208 224
209 def __repr__(self): 225 def __repr__(self):
210 return ('%s@%x(size_without_padding=%d,padding=%d,name=%s,path=%s,flags=%s)' 226 template = ('{}@{:x}(size_without_padding={},padding={},name={},'
211 % (self.section_name, self.address, self.size_without_padding, 227 'object_path={},source_path={},flags={})')
212 self.padding, self.name, self.source_path or self.object_path, 228 return template.format(
213 self.FlagsString())) 229 self.section_name, self.address, self.size_without_padding,
230 self.padding, self.name, self.object_path, self.source_path,
231 self.FlagsString())
232
233 @property
234 def pss(self):
235 return float(self.size) / self.num_aliases
236
237 @property
238 def pss_without_padding(self):
239 return float(self.size_without_padding) / self.num_aliases
214 240
215 241
216 class SymbolGroup(BaseSymbol): 242 class SymbolGroup(BaseSymbol):
217 """Represents a group of symbols using the same interface as Symbol. 243 """Represents a group of symbols using the same interface as Symbol.
218 244
219 SymbolGroups are immutable. All filtering / sorting will return new 245 SymbolGroups are immutable. All filtering / sorting will return new
220 SymbolGroups objects. 246 SymbolGroups objects.
221 247
222 Overrides many __functions__. E.g. the following are all valid: 248 Overrides many __functions__. E.g. the following are all valid:
223 * len(group) 249 * len(group)
224 * iter(group) 250 * iter(group)
225 * group[0] 251 * group[0]
226 * group['0x1234'] # By symbol address 252 * group['0x1234'] # By symbol address
227 * without_group2 = group1 - group2 253 * without_group2 = group1 - group2
228 * unioned = group1 + group2 254 * unioned = group1 + group2
229 """ 255 """
230 256
231 __slots__ = ( 257 __slots__ = (
232 '_padding', 258 '_padding',
233 '_size', 259 '_size',
260 '_pss',
234 '_symbols', 261 '_symbols',
235 '_filtered_symbols', 262 '_filtered_symbols',
236 'full_name', 263 'full_name',
237 'name', 264 'name',
238 'section_name', 265 'section_name',
239 'is_sorted', 266 'is_sorted',
240 ) 267 )
241 268
242 def __init__(self, symbols, filtered_symbols=None, name=None, 269 def __init__(self, symbols, filtered_symbols=None, name=None,
243 full_name=None, section_name=None, is_sorted=False): 270 full_name=None, section_name=None, is_sorted=False):
244 self._padding = None 271 self._padding = None
245 self._size = None 272 self._size = None
273 self._pss = None
246 self._symbols = symbols 274 self._symbols = symbols
247 self._filtered_symbols = filtered_symbols or [] 275 self._filtered_symbols = filtered_symbols or []
248 self.name = name or '' 276 self.name = name or ''
249 self.full_name = full_name 277 self.full_name = full_name
250 self.section_name = section_name or '.*' 278 self.section_name = section_name or '.*'
251 self.is_sorted = is_sorted 279 self.is_sorted = is_sorted
252 280
253 def __repr__(self): 281 def __repr__(self):
254 return 'Group(name=%s,count=%d,size=%d)' % ( 282 return 'Group(name=%s,count=%d,size=%d)' % (
255 self.name, len(self), self.size) 283 self.name, len(self), self.size)
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
295 return first if all(s.address == first for s in self._symbols) else 0 323 return first if all(s.address == first for s in self._symbols) else 0
296 324
297 @property 325 @property
298 def flags(self): 326 def flags(self):
299 first = self._symbols[0].flags 327 first = self._symbols[0].flags
300 return first if all(s.flags == first for s in self._symbols) else 0 328 return first if all(s.flags == first for s in self._symbols) else 0
301 329
302 @property 330 @property
303 def object_path(self): 331 def object_path(self):
304 first = self._symbols[0].object_path 332 first = self._symbols[0].object_path
305 return first if all(s.object_path == first for s in self._symbols) else None 333 return first if all(s.object_path == first for s in self._symbols) else ''
306 334
307 @property 335 @property
308 def source_path(self): 336 def source_path(self):
309 first = self._symbols[0].source_path 337 first = self._symbols[0].source_path
310 return first if all(s.source_path == first for s in self._symbols) else None 338 return first if all(s.source_path == first for s in self._symbols) else ''
339
340 def IterUniqueSymbols(self):
341 seen_aliases_lists = set()
342 for s in self:
343 if not s.aliases:
344 yield s
345 elif id(s.aliases) not in seen_aliases_lists:
346 seen_aliases_lists.add(id(s.aliases))
347 yield s
311 348
312 @property 349 @property
313 def size(self): 350 def size(self):
314 if self._size is None: 351 if self._size is None:
315 if self.IsBss(): 352 if self.IsBss():
316 self._size = sum(s.size for s in self) 353 self._size = sum(s.size for s in self)
317 self._size = sum(s.size for s in self if not s.IsBss()) 354 else:
355 self._size = sum(s.size for s in self.IterUniqueSymbols())
318 return self._size 356 return self._size
319 357
320 @property 358 @property
359 def pss(self):
360 if self._pss is None:
361 if self.IsBss():
362 self._pss = self.size
363 else:
364 self._pss = sum(s.pss for s in self)
365 return self._pss
366
367 @property
321 def padding(self): 368 def padding(self):
322 if self._padding is None: 369 if self._padding is None:
323 self._padding = sum(s.padding for s in self) 370 self._padding = sum(s.padding for s in self.IterUniqueSymbols())
324 return self._padding 371 return self._padding
325 372
373 @property
374 def aliases(self):
375 return None
376
326 def IsGroup(self): 377 def IsGroup(self):
327 return True 378 return True
328 379
329 def _CreateTransformed(self, symbols, filtered_symbols=None, name=None, 380 def _CreateTransformed(self, symbols, filtered_symbols=None, name=None,
330 section_name=None, is_sorted=None): 381 section_name=None, is_sorted=None):
331 if is_sorted is None: 382 if is_sorted is None:
332 is_sorted = self.is_sorted 383 is_sorted = self.is_sorted
333 return SymbolGroup(symbols, filtered_symbols=filtered_symbols, name=name, 384 return SymbolGroup(symbols, filtered_symbols=filtered_symbols, name=name,
334 section_name=section_name, is_sorted=is_sorted) 385 section_name=section_name, is_sorted=is_sorted)
335 386
(...skipping 13 matching lines...) Expand all
349 400
350 def SortedByAddress(self, reverse=False): 401 def SortedByAddress(self, reverse=False):
351 return self.Sorted(key=(lambda s:s.address), reverse=reverse) 402 return self.Sorted(key=(lambda s:s.address), reverse=reverse)
352 403
353 def SortedByCount(self, reverse=False): 404 def SortedByCount(self, reverse=False):
354 return self.Sorted(key=(lambda s:len(s) if s.IsGroup() else 1), 405 return self.Sorted(key=(lambda s:len(s) if s.IsGroup() else 1),
355 reverse=not reverse) 406 reverse=not reverse)
356 407
357 def Filter(self, func): 408 def Filter(self, func):
358 filtered_and_kept = ([], []) 409 filtered_and_kept = ([], [])
359 for symbol in self: 410 symbol = None
360 filtered_and_kept[int(bool(func(symbol)))].append(symbol) 411 try:
412 for symbol in self:
413 filtered_and_kept[int(bool(func(symbol)))].append(symbol)
414 except:
415 logging.warning('Filter failed on symbol %r', symbol)
416 raise
417
361 return self._CreateTransformed(filtered_and_kept[1], 418 return self._CreateTransformed(filtered_and_kept[1],
362 filtered_symbols=filtered_and_kept[0], 419 filtered_symbols=filtered_and_kept[0],
363 section_name=self.section_name) 420 section_name=self.section_name)
364 421
365 def WhereBiggerThan(self, min_size): 422 def WhereBiggerThan(self, min_size):
366 return self.Filter(lambda s: s.size >= min_size) 423 return self.Filter(lambda s: s.size >= min_size)
367 424
368 def WhereInSection(self, section): 425 def WhereInSection(self, section):
369 if len(section) == 1: 426 if len(section) == 1:
370 ret = self.Filter(lambda s: s.section == section) 427 ret = self.Filter(lambda s: s.section == section)
(...skipping 237 matching lines...) Expand 10 before | Expand all | Expand 10 after
608 key = id(sym) 665 key = id(sym)
609 return key not in self._added_ids and key not in self._removed_ids 666 return key not in self._added_ids and key not in self._removed_ids
610 667
611 def IsRemoved(self, sym): 668 def IsRemoved(self, sym):
612 return id(sym) in self._removed_ids 669 return id(sym) in self._removed_ids
613 670
614 def WhereNotUnchanged(self): 671 def WhereNotUnchanged(self):
615 return self.Filter(lambda s: not self.IsSimilar(s) or s.size) 672 return self.Filter(lambda s: not self.IsSimilar(s) or s.size)
616 673
617 674
618 def Diff(before, after):
619 """Diffs two SizeInfo or SymbolGroup objects.
620
621 When diffing SizeInfos, a SizeInfoDiff is returned.
622 When diffing SymbolGroups, a SymbolDiff is returned.
623
624 Returns:
625 Returns a SizeInfo when args are of type SizeInfo.
626 Returns a SymbolDiff when args are of type SymbolGroup.
627 """
628 if isinstance(after, SizeInfo):
629 assert isinstance(before, SizeInfo)
630 section_sizes = {k: after.section_sizes[k] - v
631 for k, v in before.section_sizes.iteritems()}
632 symbol_diff = _DiffSymbols(before.symbols, after.symbols)
633 return SizeInfoDiff(section_sizes, symbol_diff, before.metadata,
634 after.metadata)
635
636 assert isinstance(after, SymbolGroup) and isinstance(before, SymbolGroup)
637 return _DiffSymbols(before, after)
638
639
640 def _NegateAll(symbols):
641 ret = []
642 for symbol in symbols:
643 if symbol.IsGroup():
644 duped = SymbolDiff([], _NegateAll(symbol), [], name=symbol.name,
645 full_name=symbol.full_name,
646 section_name=symbol.section_name)
647 else:
648 duped = copy.copy(symbol)
649 duped.size = -duped.size
650 duped.padding = -duped.padding
651 ret.append(duped)
652 return ret
653
654
655 def _DiffSymbols(before, after):
656 symbols_by_key = collections.defaultdict(list)
657 for s in before:
658 symbols_by_key[s._Key()].append(s)
659
660 added = []
661 similar = []
662 # For similar symbols, padding is zeroed out. In order to not lose the
663 # information entirely, store it in aggregate.
664 padding_by_section_name = collections.defaultdict(int)
665 for after_sym in after:
666 matching_syms = symbols_by_key.get(after_sym._Key())
667 if matching_syms:
668 before_sym = matching_syms.pop(0)
669 if before_sym.IsGroup() and after_sym.IsGroup():
670 merged_sym = _DiffSymbols(before_sym, after_sym)
671 else:
672 size_diff = (after_sym.size_without_padding -
673 before_sym.size_without_padding)
674 merged_sym = Symbol(after_sym.section_name, size_diff,
675 address=after_sym.address, name=after_sym.name,
676 source_path=after_sym.source_path,
677 object_path=after_sym.object_path,
678 full_name=after_sym.full_name,
679 flags=after_sym.flags)
680
681 # Diffs are more stable when comparing size without padding, except when
682 # the symbol is a padding-only symbol.
683 if after_sym.size_without_padding == 0 and size_diff == 0:
684 merged_sym.padding = after_sym.padding - before_sym.padding
685 else:
686 padding_by_section_name[after_sym.section_name] += (
687 after_sym.padding - before_sym.padding)
688
689 similar.append(merged_sym)
690 else:
691 added.append(after_sym)
692
693 removed = []
694 for remaining_syms in symbols_by_key.itervalues():
695 if remaining_syms:
696 removed.extend(_NegateAll(remaining_syms))
697
698 for section_name, padding in padding_by_section_name.iteritems():
699 if padding != 0:
700 similar.append(Symbol(section_name, padding,
701 name="** aggregate padding of diff'ed symbols"))
702 return SymbolDiff(added, removed, similar, name=after.name,
703 full_name=after.full_name,
704 section_name=after.section_name)
705
706
707 def _ExtractPrefixBeforeSeparator(string, separator, count=1): 675 def _ExtractPrefixBeforeSeparator(string, separator, count=1):
708 idx = -len(separator) 676 idx = -len(separator)
709 prev_idx = None 677 prev_idx = None
710 for _ in xrange(count): 678 for _ in xrange(count):
711 idx = string.find(separator, idx + len(separator)) 679 idx = string.find(separator, idx + len(separator))
712 if idx < 0: 680 if idx < 0:
713 break 681 break
714 prev_idx = idx 682 prev_idx = idx
715 return string[:prev_idx] 683 return string[:prev_idx]
OLDNEW
« no previous file with comments | « tools/binary_size/libsupersize/main.py ('k') | tools/binary_size/libsupersize/ninja_parser.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698