Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(33)

Side by Side Diff: tools/binary_size/libsupersize/models.py

Issue 2885073002: supersize: Move [clone] from name -> flags & rewrite Cluster() (Closed)
Patch Set: review comment Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2017 The Chromium Authors. All rights reserved. 1 # Copyright 2017 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 """Classes that comprise the data model for binary size analysis. 4 """Classes that comprise the data model for binary size analysis.
5 5
6 The primary classes are Symbol, and SymbolGroup. 6 The primary classes are Symbol, and SymbolGroup.
7 7
8 Description of common properties: 8 Description of common properties:
9 * address: The start address of the symbol. 9 * address: The start address of the symbol.
10 May be 0 (e.g. for .bss or for SymbolGroups). 10 May be 0 (e.g. for .bss or for SymbolGroups).
(...skipping 12 matching lines...) Expand all
23 are removed from both full_name and name during normalization). 23 are removed from both full_name and name during normalization).
24 * section_name: E.g. ".text", ".rodata", ".data.rel.local" 24 * section_name: E.g. ".text", ".rodata", ".data.rel.local"
25 * section: The second character of |section_name|. E.g. "t", "r", "d". 25 * section: The second character of |section_name|. E.g. "t", "r", "d".
26 """ 26 """
27 27
28 import collections 28 import collections
29 import logging 29 import logging
30 import os 30 import os
31 import re 31 import re
32 32
33 import cluster_symbols
34 import match_util 33 import match_util
35 34
36 35
37 METADATA_GIT_REVISION = 'git_revision' 36 METADATA_GIT_REVISION = 'git_revision'
38 METADATA_APK_FILENAME = 'apk_file_name' # Path relative to output_directory. 37 METADATA_APK_FILENAME = 'apk_file_name' # Path relative to output_directory.
39 METADATA_MAP_FILENAME = 'map_file_name' # Path relative to output_directory. 38 METADATA_MAP_FILENAME = 'map_file_name' # Path relative to output_directory.
40 METADATA_ELF_ARCHITECTURE = 'elf_arch' # "Machine" field from readelf -h 39 METADATA_ELF_ARCHITECTURE = 'elf_arch' # "Machine" field from readelf -h
41 METADATA_ELF_FILENAME = 'elf_file_name' # Path relative to output_directory. 40 METADATA_ELF_FILENAME = 'elf_file_name' # Path relative to output_directory.
42 METADATA_ELF_MTIME = 'elf_mtime' # int timestamp in utc. 41 METADATA_ELF_MTIME = 'elf_mtime' # int timestamp in utc.
43 METADATA_ELF_BUILD_ID = 'elf_build_id' 42 METADATA_ELF_BUILD_ID = 'elf_build_id'
44 METADATA_GN_ARGS = 'gn_args' 43 METADATA_GN_ARGS = 'gn_args'
45 METADATA_TOOL_PREFIX = 'tool_prefix' # Path relative to SRC_ROOT. 44 METADATA_TOOL_PREFIX = 'tool_prefix' # Path relative to SRC_ROOT.
46 45
47 46
48 SECTION_TO_SECTION_NAME = { 47 SECTION_TO_SECTION_NAME = {
49 'b': '.bss', 48 'b': '.bss',
50 'd': '.data', 49 'd': '.data',
51 'r': '.rodata', 50 'r': '.rodata',
52 't': '.text', 51 't': '.text',
53 } 52 }
54 53
55 FLAG_ANONYMOUS = 1 54 FLAG_ANONYMOUS = 1
56 FLAG_STARTUP = 2 55 FLAG_STARTUP = 2
57 FLAG_UNLIKELY = 4 56 FLAG_UNLIKELY = 4
58 FLAG_REL = 8 57 FLAG_REL = 8
59 FLAG_REL_LOCAL = 16 58 FLAG_REL_LOCAL = 16
60 FLAG_GENERATED_SOURCE = 32 59 FLAG_GENERATED_SOURCE = 32
60 FLAG_CLONE = 64
61 61
62 DIFF_STATUS_UNCHANGED = 0 62 DIFF_STATUS_UNCHANGED = 0
63 DIFF_STATUS_CHANGED = 1 63 DIFF_STATUS_CHANGED = 1
64 DIFF_STATUS_ADDED = 2 64 DIFF_STATUS_ADDED = 2
65 DIFF_STATUS_REMOVED = 3 65 DIFF_STATUS_REMOVED = 3
66 66
67 67
68 class SizeInfo(object): 68 class SizeInfo(object):
69 """Represents all size information for a single binary. 69 """Represents all size information for a single binary.
70 70
71 Fields: 71 Fields:
72 section_sizes: A dict of section_name -> size. 72 section_sizes: A dict of section_name -> size.
73 raw_symbols: A list of all symbols, sorted by address. 73 raw_symbols: A SymbolGroup containing all top-level symbols (no groups).
74 symbols: A SymbolGroup containing all symbols. By default, these are the 74 symbols: A SymbolGroup where symbols have been grouped by full_name (where
75 same as raw_symbols, but may contain custom groupings when it is 75 applicable). May be re-assigned when it is desirable to show custom
76 desirable to convey the result of a query along with section_sizes and 76 groupings while still printing metadata and section_sizes.
77 metadata.
78 metadata: A dict. 77 metadata: A dict.
79 """ 78 """
80 __slots__ = ( 79 __slots__ = (
81 'section_sizes', 80 'section_sizes',
82 'raw_symbols', 81 'raw_symbols',
83 'symbols', 82 '_symbols',
84 'metadata', 83 'metadata',
85 ) 84 )
86 85
87 """Root size information.""" 86 """Root size information."""
88 def __init__(self, section_sizes, raw_symbols, metadata=None, symbols=None): 87 def __init__(self, section_sizes, raw_symbols, metadata=None, symbols=None):
88 if isinstance(raw_symbols, list):
89 raw_symbols = SymbolGroup(raw_symbols)
89 self.section_sizes = section_sizes # E.g. {'.text': 0} 90 self.section_sizes = section_sizes # E.g. {'.text': 0}
90 self.raw_symbols = raw_symbols 91 self.raw_symbols = raw_symbols
91 self.symbols = symbols or SymbolGroup(raw_symbols) 92 self._symbols = symbols
92 self.metadata = metadata or {} 93 self.metadata = metadata or {}
93 94
94 def Clustered(self): 95 @property
95 """Returns a new SizeInfo with some symbols moved into subgroups. 96 def symbols(self):
97 if self._symbols is None:
98 self._symbols = self.raw_symbols._Clustered()
99 return self._symbols
96 100
97 See SymbolGroup.Clustered() for more details. 101 @symbols.setter
98 """ 102 def symbols(self, value):
99 return SizeInfo(self.section_sizes, self.raw_symbols, self.metadata, 103 self._symbols = value
100 symbols=self.symbols.Clustered())
101 104
102 105
103 class SizeInfoDiff(object): 106 class SizeInfoDiff(object):
104 """What you get when you Diff() two SizeInfo objects. 107 """What you get when you Diff() two SizeInfo objects.
105 108
106 Fields: 109 Fields:
107 section_sizes: A dict of section_name -> size delta. 110 section_sizes: A dict of section_name -> size delta.
108 symbols: A SymbolDiff with all symbols in it. 111 raw_symbols: A SymbolDiff with all top-level symbols in it (no groups).
112 symbols: A SymbolDiff where symbols have been grouped by full_name (where
113 applicable). May be re-assigned when it is desirable to show custom
114 groupings while still printing metadata and section_sizes.
109 before_metadata: metadata of the "before" SizeInfo. 115 before_metadata: metadata of the "before" SizeInfo.
110 after_metadata: metadata of the "after" SizeInfo. 116 after_metadata: metadata of the "after" SizeInfo.
111 """ 117 """
112 __slots__ = ( 118 __slots__ = (
113 'section_sizes', 119 'section_sizes',
114 'symbols', 120 'raw_symbols',
121 '_symbols',
115 'before_metadata', 122 'before_metadata',
116 'after_metadata', 123 'after_metadata',
117 ) 124 )
118 125
119 def __init__(self, section_sizes, symbols, before_metadata, after_metadata): 126 def __init__(self, section_sizes, raw_symbols, before_metadata,
127 after_metadata):
120 self.section_sizes = section_sizes 128 self.section_sizes = section_sizes
121 self.symbols = symbols 129 self.raw_symbols = raw_symbols
122 self.before_metadata = before_metadata 130 self.before_metadata = before_metadata
123 self.after_metadata = after_metadata 131 self.after_metadata = after_metadata
132 self._symbols = None
133
134 @property
135 def symbols(self):
136 if self._symbols is None:
137 self._symbols = self.raw_symbols._Clustered()
138 return self._symbols
139
140 @symbols.setter
141 def symbols(self, value):
142 self._symbols = value
124 143
125 144
126 class BaseSymbol(object): 145 class BaseSymbol(object):
127 """Base class for Symbol and SymbolGroup. 146 """Base class for Symbol and SymbolGroup.
128 147
129 Refer to module docs for field descriptions. 148 Refer to module docs for field descriptions.
130 """ 149 """
131 __slots__ = () 150 __slots__ = ()
132 151
133 @property 152 @property
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
176 if flags & FLAG_STARTUP: 195 if flags & FLAG_STARTUP:
177 parts.append('startup') 196 parts.append('startup')
178 if flags & FLAG_UNLIKELY: 197 if flags & FLAG_UNLIKELY:
179 parts.append('unlikely') 198 parts.append('unlikely')
180 if flags & FLAG_REL: 199 if flags & FLAG_REL:
181 parts.append('rel') 200 parts.append('rel')
182 if flags & FLAG_REL_LOCAL: 201 if flags & FLAG_REL_LOCAL:
183 parts.append('rel.loc') 202 parts.append('rel.loc')
184 if flags & FLAG_GENERATED_SOURCE: 203 if flags & FLAG_GENERATED_SOURCE:
185 parts.append('gen') 204 parts.append('gen')
205 if flags & FLAG_CLONE:
206 parts.append('clone')
186 # Not actually a part of flags, but useful to show it here. 207 # Not actually a part of flags, but useful to show it here.
187 if self.aliases: 208 if self.aliases:
188 parts.append('{} aliases'.format(self.num_aliases)) 209 parts.append('{} aliases'.format(self.num_aliases))
189 return '{%s}' % ','.join(parts) 210 return '{%s}' % ','.join(parts)
190 211
191 def IsBss(self): 212 def IsBss(self):
192 return self.section_name == '.bss' 213 return self.section_name == '.bss'
193 214
194 def IsGroup(self): 215 def IsGroup(self):
195 return False 216 return False
(...skipping 212 matching lines...) Expand 10 before | Expand all | Expand 10 after
408 def _CreateTransformed(self, symbols, filtered_symbols=None, full_name=None, 429 def _CreateTransformed(self, symbols, filtered_symbols=None, full_name=None,
409 template_name=None, name=None, section_name=None, 430 template_name=None, name=None, section_name=None,
410 is_sorted=None): 431 is_sorted=None):
411 if is_sorted is None: 432 if is_sorted is None:
412 is_sorted = self.is_sorted 433 is_sorted = self.is_sorted
413 return SymbolGroup(symbols, filtered_symbols=filtered_symbols, 434 return SymbolGroup(symbols, filtered_symbols=filtered_symbols,
414 full_name=full_name, template_name=template_name, 435 full_name=full_name, template_name=template_name,
415 name=name, section_name=section_name, 436 name=name, section_name=section_name,
416 is_sorted=is_sorted) 437 is_sorted=is_sorted)
417 438
418 def Clustered(self):
419 """Returns a new SymbolGroup with some symbols moved into subgroups.
420
421 Subgroups include:
422 * Symbols that have [clone] in their name (created during inlining).
423 * Star symbols (such as "** merge strings", and "** symbol gap")
424
425 To view created groups:
426 Print(clustered.Filter(lambda s: s.IsGroup()), recursive=True)
427 """
428 return self._CreateTransformed(cluster_symbols.ClusterSymbols(self))
429
430 def Sorted(self, cmp_func=None, key=None, reverse=False): 439 def Sorted(self, cmp_func=None, key=None, reverse=False):
431 if cmp_func is None and key is None: 440 if cmp_func is None and key is None:
432 cmp_func = lambda a, b: cmp((a.IsBss(), abs(b.pss), a.name), 441 cmp_func = lambda a, b: cmp((a.IsBss(), abs(b.pss), a.name),
433 (b.IsBss(), abs(a.pss), b.name)) 442 (b.IsBss(), abs(a.pss), b.name))
434 443
435 after_symbols = sorted(self._symbols, cmp_func, key, reverse) 444 after_symbols = sorted(self._symbols, cmp_func, key, reverse)
436 return self._CreateTransformed( 445 return self._CreateTransformed(
437 after_symbols, filtered_symbols=self._filtered_symbols, 446 after_symbols, filtered_symbols=self._filtered_symbols,
438 section_name=self.section_name, is_sorted=True) 447 section_name=self.section_name, is_sorted=True)
439 448
(...skipping 15 matching lines...) Expand all
455 for symbol in self: 464 for symbol in self:
456 filtered_and_kept[int(bool(func(symbol)))].append(symbol) 465 filtered_and_kept[int(bool(func(symbol)))].append(symbol)
457 except: 466 except:
458 logging.warning('Filter failed on symbol %r', symbol) 467 logging.warning('Filter failed on symbol %r', symbol)
459 raise 468 raise
460 469
461 return self._CreateTransformed(filtered_and_kept[1], 470 return self._CreateTransformed(filtered_and_kept[1],
462 filtered_symbols=filtered_and_kept[0], 471 filtered_symbols=filtered_and_kept[0],
463 section_name=self.section_name) 472 section_name=self.section_name)
464 473
474 def WhereIsGroup(self):
475 return self.Filter(lambda s: s.IsGroup())
476
465 def WhereSizeBiggerThan(self, min_size): 477 def WhereSizeBiggerThan(self, min_size):
466 return self.Filter(lambda s: s.size >= min_size) 478 return self.Filter(lambda s: s.size >= min_size)
467 479
468 def WherePssBiggerThan(self, min_pss): 480 def WherePssBiggerThan(self, min_pss):
469 return self.Filter(lambda s: s.pss >= min_pss) 481 return self.Filter(lambda s: s.pss >= min_pss)
470 482
471 def WhereInSection(self, section): 483 def WhereInSection(self, section):
472 if len(section) == 1: 484 if len(section) == 1:
473 ret = self.Filter(lambda s: s.section == section) 485 ret = self.Filter(lambda s: s.section == section)
474 ret.section_name = SECTION_TO_SECTION_NAME[section] 486 ret.section_name = SECTION_TO_SECTION_NAME[section]
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after
545 557
546 Example: 558 Example:
547 # Symbols that do not have "third_party" in their path. 559 # Symbols that do not have "third_party" in their path.
548 symbols.WherePathMatches(r'third_party').Inverted() 560 symbols.WherePathMatches(r'third_party').Inverted()
549 # Symbols within third_party that do not contain the string "foo". 561 # Symbols within third_party that do not contain the string "foo".
550 symbols.WherePathMatches(r'third_party').WhereMatches('foo').Inverted() 562 symbols.WherePathMatches(r'third_party').WhereMatches('foo').Inverted()
551 """ 563 """
552 return self._CreateTransformed( 564 return self._CreateTransformed(
553 self._filtered_symbols, filtered_symbols=self._symbols, is_sorted=False) 565 self._filtered_symbols, filtered_symbols=self._symbols, is_sorted=False)
554 566
555 def GroupedBy(self, func, min_count=0): 567 def GroupedBy(self, func, min_count=0, group_factory=None):
556 """Returns a SymbolGroup of SymbolGroups, indexed by |func|. 568 """Returns a SymbolGroup of SymbolGroups, indexed by |func|.
557 569
558 Symbols within each subgroup maintain their relative ordering. 570 Symbols within each subgroup maintain their relative ordering.
559 571
560 Args: 572 Args:
561 func: Grouping function. Passed a symbol and returns a string for the 573 func: Grouping function. Passed a symbol and returns a string for the
562 name of the subgroup to put the symbol in. If None is returned, the 574 name of the subgroup to put the symbol in. If None is returned, the
563 symbol is omitted. 575 symbol is omitted.
564 min_count: Miniumum number of symbols for a group. If fewer than this many 576 min_count: Miniumum number of symbols for a group. If fewer than this many
565 symbols end up in a group, they will not be put within a group. 577 symbols end up in a group, they will not be put within a group.
566 Use a negative value to omit symbols entirely rather than 578 Use a negative value to omit symbols entirely rather than
567 include them outside of a group. 579 include them outside of a group.
580 group_factory: Function to create SymbolGroup from a list of Symbols.
568 """ 581 """
582 if group_factory is None:
583 group_factory = lambda token, symbols: self._CreateTransformed(
584 symbols, full_name=token, template_name=token, name=token,
585 section_name=self.section_name)
586
569 after_syms = [] 587 after_syms = []
570 filtered_symbols = [] 588 filtered_symbols = []
571 symbols_by_token = collections.defaultdict(list) 589 symbols_by_token = collections.OrderedDict()
572 # Index symbols by |func|. 590 # Index symbols by |func|.
573 for symbol in self: 591 for symbol in self:
574 token = func(symbol) 592 token = func(symbol)
575 if token is None: 593 if token is None:
576 filtered_symbols.append(symbol) 594 filtered_symbols.append(symbol)
577 symbols_by_token[token].append(symbol) 595 else:
596 # Optimization: Store a list only when >1 symbol.
597 # Saves 200-300ms for _Clustered().
598 prev = symbols_by_token.setdefault(token, symbol)
599 if prev is not symbol:
600 if prev.__class__ == list:
601 prev.append(symbol)
602 else:
603 symbols_by_token[token] = [prev, symbol]
578 # Create the subgroups. 604 # Create the subgroups.
579 include_singles = min_count >= 0 605 include_singles = min_count >= 0
580 min_count = abs(min_count) 606 min_count = abs(min_count)
581 for token, symbols in symbols_by_token.iteritems(): 607 for token, symbol_or_list in symbols_by_token.iteritems():
582 if len(symbols) >= min_count: 608 count = 1
583 after_syms.append(self._CreateTransformed( 609 if symbol_or_list.__class__ == list:
584 symbols, name=token, section_name=self.section_name, 610 count = len(symbol_or_list)
585 is_sorted=False)) 611
586 elif include_singles: 612 if count >= min_count:
587 after_syms.extend(symbols) 613 if count == 1:
614 symbol_or_list = [symbol_or_list]
615 after_syms.append(group_factory(token, symbol_or_list))
588 else: 616 else:
589 filtered_symbols.extend(symbols) 617 target_list = after_syms if include_singles else filtered_symbols
590 grouped = self._CreateTransformed( 618 if count == 1:
619 target_list.append(symbol_or_list)
620 else:
621 target_list.extend(symbol_or_list)
622
623 return self._CreateTransformed(
591 after_syms, filtered_symbols=filtered_symbols, 624 after_syms, filtered_symbols=filtered_symbols,
592 section_name=self.section_name, is_sorted=False) 625 section_name=self.section_name)
593 return grouped 626
627 def _Clustered(self):
628 """Returns a new SymbolGroup with some symbols moved into subgroups.
629
630 Method is private since it only ever makes sense to call it from
631 SizeInfo.symbols.
632
633 The main function of clustering is to put symbols that were broken into
634 multiple parts under a group so that they once again look like a single
635 symbol. It also groups together symbols like "** merge strings".
636
637 To view created groups:
638 Print(size_info.symbols.WhereIsGroup())
639 """
640 def cluster_func(symbol):
641 name = symbol.full_name
642 if not name:
643 # min_count=2 will ensure order is maintained while not being grouped.
644 # "&" to distinguish from real symbol names, id() to ensure uniqueness.
645 return '&' + hex(id(symbol))
646 if name.startswith('*'):
647 # "symbol gap 3" -> "symbol gaps"
648 name = re.sub(r'\s+\d+( \(.*\))?$', 's', name)
649 return name
650
651 # Use a custom factory to fill in name & template_name.
652 def group_factory(full_name, symbols):
653 sym = symbols[0]
654 if full_name.startswith('*'):
655 return self._CreateTransformed(
656 symbols, full_name=full_name, template_name=full_name,
657 name=full_name, section_name=sym.section_name)
658 return self._CreateTransformed(
659 symbols, full_name=full_name, template_name=sym.template_name,
660 name=sym.name, section_name=sym.section_name)
661
662 # A full second faster to cluster per-section. Plus, don't need create
663 # (section_name, name) tuples in cluster_func.
664 ret = []
665 for section in self.GroupedBySectionName():
666 ret.extend(section.GroupedBy(
667 cluster_func, min_count=2, group_factory=group_factory))
668
669 return self._CreateTransformed(ret)
594 670
595 def GroupedBySectionName(self): 671 def GroupedBySectionName(self):
596 return self.GroupedBy(lambda s: s.section_name) 672 return self.GroupedBy(lambda s: s.section_name)
597 673
598 def GroupedByName(self, depth=0, min_count=0): 674 def GroupedByName(self, depth=0, min_count=0):
599 """Groups by symbol name, where |depth| controls how many ::s to include. 675 """Groups by symbol name, where |depth| controls how many ::s to include.
600 676
601 Does not differentiate between namespaces/classes/functions. 677 Does not differentiate between namespaces/classes/functions.
602 678
603 Args: 679 Args:
(...skipping 161 matching lines...) Expand 10 before | Expand all | Expand 10 after
765 841
766 842
767 def _ExtractSuffixAfterSeparator(string, separator, count): 843 def _ExtractSuffixAfterSeparator(string, separator, count):
768 prev_idx = len(string) + 1 844 prev_idx = len(string) + 1
769 for _ in xrange(count): 845 for _ in xrange(count):
770 idx = string.rfind(separator, 0, prev_idx - 1) 846 idx = string.rfind(separator, 0, prev_idx - 1)
771 if idx < 0: 847 if idx < 0:
772 break 848 break
773 prev_idx = idx 849 prev_idx = idx
774 return string[:prev_idx] 850 return string[:prev_idx]
OLDNEW
« no previous file with comments | « tools/binary_size/libsupersize/integration_test.py ('k') | tools/binary_size/libsupersize/testdata/Archive.golden » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698