OLD | NEW |
1 # Copyright 2017 The Chromium Authors. All rights reserved. | 1 # Copyright 2017 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 """Classes that comprise the data model for binary size analysis. | 4 """Classes that comprise the data model for binary size analysis. |
5 | 5 |
6 The primary classes are Symbol, and SymbolGroup. | 6 The primary classes are Symbol, and SymbolGroup. |
7 | 7 |
8 Description of common properties: | 8 Description of common properties: |
9 * address: The start address of the symbol. | 9 * address: The start address of the symbol. |
10 May be 0 (e.g. for .bss or for SymbolGroups). | 10 May be 0 (e.g. for .bss or for SymbolGroups). |
(...skipping 12 matching lines...) Expand all Loading... |
23 are removed from both full_name and name during normalization). | 23 are removed from both full_name and name during normalization). |
24 * section_name: E.g. ".text", ".rodata", ".data.rel.local" | 24 * section_name: E.g. ".text", ".rodata", ".data.rel.local" |
25 * section: The second character of |section_name|. E.g. "t", "r", "d". | 25 * section: The second character of |section_name|. E.g. "t", "r", "d". |
26 """ | 26 """ |
27 | 27 |
28 import collections | 28 import collections |
29 import logging | 29 import logging |
30 import os | 30 import os |
31 import re | 31 import re |
32 | 32 |
33 import cluster_symbols | |
34 import match_util | 33 import match_util |
35 | 34 |
36 | 35 |
37 METADATA_GIT_REVISION = 'git_revision' | 36 METADATA_GIT_REVISION = 'git_revision' |
38 METADATA_APK_FILENAME = 'apk_file_name' # Path relative to output_directory. | 37 METADATA_APK_FILENAME = 'apk_file_name' # Path relative to output_directory. |
39 METADATA_MAP_FILENAME = 'map_file_name' # Path relative to output_directory. | 38 METADATA_MAP_FILENAME = 'map_file_name' # Path relative to output_directory. |
40 METADATA_ELF_ARCHITECTURE = 'elf_arch' # "Machine" field from readelf -h | 39 METADATA_ELF_ARCHITECTURE = 'elf_arch' # "Machine" field from readelf -h |
41 METADATA_ELF_FILENAME = 'elf_file_name' # Path relative to output_directory. | 40 METADATA_ELF_FILENAME = 'elf_file_name' # Path relative to output_directory. |
42 METADATA_ELF_MTIME = 'elf_mtime' # int timestamp in utc. | 41 METADATA_ELF_MTIME = 'elf_mtime' # int timestamp in utc. |
43 METADATA_ELF_BUILD_ID = 'elf_build_id' | 42 METADATA_ELF_BUILD_ID = 'elf_build_id' |
44 METADATA_GN_ARGS = 'gn_args' | 43 METADATA_GN_ARGS = 'gn_args' |
45 METADATA_TOOL_PREFIX = 'tool_prefix' # Path relative to SRC_ROOT. | 44 METADATA_TOOL_PREFIX = 'tool_prefix' # Path relative to SRC_ROOT. |
46 | 45 |
47 | 46 |
48 SECTION_TO_SECTION_NAME = { | 47 SECTION_TO_SECTION_NAME = { |
49 'b': '.bss', | 48 'b': '.bss', |
50 'd': '.data', | 49 'd': '.data', |
51 'r': '.rodata', | 50 'r': '.rodata', |
52 't': '.text', | 51 't': '.text', |
53 } | 52 } |
54 | 53 |
55 FLAG_ANONYMOUS = 1 | 54 FLAG_ANONYMOUS = 1 |
56 FLAG_STARTUP = 2 | 55 FLAG_STARTUP = 2 |
57 FLAG_UNLIKELY = 4 | 56 FLAG_UNLIKELY = 4 |
58 FLAG_REL = 8 | 57 FLAG_REL = 8 |
59 FLAG_REL_LOCAL = 16 | 58 FLAG_REL_LOCAL = 16 |
60 FLAG_GENERATED_SOURCE = 32 | 59 FLAG_GENERATED_SOURCE = 32 |
| 60 FLAG_CLONE = 64 |
61 | 61 |
62 DIFF_STATUS_UNCHANGED = 0 | 62 DIFF_STATUS_UNCHANGED = 0 |
63 DIFF_STATUS_CHANGED = 1 | 63 DIFF_STATUS_CHANGED = 1 |
64 DIFF_STATUS_ADDED = 2 | 64 DIFF_STATUS_ADDED = 2 |
65 DIFF_STATUS_REMOVED = 3 | 65 DIFF_STATUS_REMOVED = 3 |
66 | 66 |
67 | 67 |
68 class SizeInfo(object): | 68 class SizeInfo(object): |
69 """Represents all size information for a single binary. | 69 """Represents all size information for a single binary. |
70 | 70 |
71 Fields: | 71 Fields: |
72 section_sizes: A dict of section_name -> size. | 72 section_sizes: A dict of section_name -> size. |
73 raw_symbols: A list of all symbols, sorted by address. | 73 raw_symbols: A SymbolGroup containing all top-level symbols (no groups). |
74 symbols: A SymbolGroup containing all symbols. By default, these are the | 74 symbols: A SymbolGroup where symbols have been grouped by full_name (where |
75 same as raw_symbols, but may contain custom groupings when it is | 75 applicable). May be re-assigned when it is desirable to show custom |
76 desirable to convey the result of a query along with section_sizes and | 76 groupings while still printing metadata and section_sizes. |
77 metadata. | |
78 metadata: A dict. | 77 metadata: A dict. |
79 """ | 78 """ |
80 __slots__ = ( | 79 __slots__ = ( |
81 'section_sizes', | 80 'section_sizes', |
82 'raw_symbols', | 81 'raw_symbols', |
83 'symbols', | 82 '_symbols', |
84 'metadata', | 83 'metadata', |
85 ) | 84 ) |
86 | 85 |
87 """Root size information.""" | 86 """Root size information.""" |
88 def __init__(self, section_sizes, raw_symbols, metadata=None, symbols=None): | 87 def __init__(self, section_sizes, raw_symbols, metadata=None, symbols=None): |
| 88 if isinstance(raw_symbols, list): |
| 89 raw_symbols = SymbolGroup(raw_symbols) |
89 self.section_sizes = section_sizes # E.g. {'.text': 0} | 90 self.section_sizes = section_sizes # E.g. {'.text': 0} |
90 self.raw_symbols = raw_symbols | 91 self.raw_symbols = raw_symbols |
91 self.symbols = symbols or SymbolGroup(raw_symbols) | 92 self._symbols = symbols |
92 self.metadata = metadata or {} | 93 self.metadata = metadata or {} |
93 | 94 |
94 def Clustered(self): | 95 @property |
95 """Returns a new SizeInfo with some symbols moved into subgroups. | 96 def symbols(self): |
| 97 if self._symbols is None: |
| 98 self._symbols = self.raw_symbols._Clustered() |
| 99 return self._symbols |
96 | 100 |
97 See SymbolGroup.Clustered() for more details. | 101 @symbols.setter |
98 """ | 102 def symbols(self, value): |
99 return SizeInfo(self.section_sizes, self.raw_symbols, self.metadata, | 103 self._symbols = value |
100 symbols=self.symbols.Clustered()) | |
101 | 104 |
102 | 105 |
103 class SizeInfoDiff(object): | 106 class SizeInfoDiff(object): |
104 """What you get when you Diff() two SizeInfo objects. | 107 """What you get when you Diff() two SizeInfo objects. |
105 | 108 |
106 Fields: | 109 Fields: |
107 section_sizes: A dict of section_name -> size delta. | 110 section_sizes: A dict of section_name -> size delta. |
108 symbols: A SymbolDiff with all symbols in it. | 111 raw_symbols: A SymbolDiff with all top-level symbols in it (no groups). |
| 112 symbols: A SymbolDiff where symbols have been grouped by full_name (where |
| 113 applicable). May be re-assigned when it is desirable to show custom |
| 114 groupings while still printing metadata and section_sizes. |
109 before_metadata: metadata of the "before" SizeInfo. | 115 before_metadata: metadata of the "before" SizeInfo. |
110 after_metadata: metadata of the "after" SizeInfo. | 116 after_metadata: metadata of the "after" SizeInfo. |
111 """ | 117 """ |
112 __slots__ = ( | 118 __slots__ = ( |
113 'section_sizes', | 119 'section_sizes', |
114 'symbols', | 120 'raw_symbols', |
| 121 '_symbols', |
115 'before_metadata', | 122 'before_metadata', |
116 'after_metadata', | 123 'after_metadata', |
117 ) | 124 ) |
118 | 125 |
119 def __init__(self, section_sizes, symbols, before_metadata, after_metadata): | 126 def __init__(self, section_sizes, raw_symbols, before_metadata, |
| 127 after_metadata): |
120 self.section_sizes = section_sizes | 128 self.section_sizes = section_sizes |
121 self.symbols = symbols | 129 self.raw_symbols = raw_symbols |
122 self.before_metadata = before_metadata | 130 self.before_metadata = before_metadata |
123 self.after_metadata = after_metadata | 131 self.after_metadata = after_metadata |
| 132 self._symbols = None |
| 133 |
| 134 @property |
| 135 def symbols(self): |
| 136 if self._symbols is None: |
| 137 self._symbols = self.raw_symbols._Clustered() |
| 138 return self._symbols |
| 139 |
| 140 @symbols.setter |
| 141 def symbols(self, value): |
| 142 self._symbols = value |
124 | 143 |
125 | 144 |
126 class BaseSymbol(object): | 145 class BaseSymbol(object): |
127 """Base class for Symbol and SymbolGroup. | 146 """Base class for Symbol and SymbolGroup. |
128 | 147 |
129 Refer to module docs for field descriptions. | 148 Refer to module docs for field descriptions. |
130 """ | 149 """ |
131 __slots__ = () | 150 __slots__ = () |
132 | 151 |
133 @property | 152 @property |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
176 if flags & FLAG_STARTUP: | 195 if flags & FLAG_STARTUP: |
177 parts.append('startup') | 196 parts.append('startup') |
178 if flags & FLAG_UNLIKELY: | 197 if flags & FLAG_UNLIKELY: |
179 parts.append('unlikely') | 198 parts.append('unlikely') |
180 if flags & FLAG_REL: | 199 if flags & FLAG_REL: |
181 parts.append('rel') | 200 parts.append('rel') |
182 if flags & FLAG_REL_LOCAL: | 201 if flags & FLAG_REL_LOCAL: |
183 parts.append('rel.loc') | 202 parts.append('rel.loc') |
184 if flags & FLAG_GENERATED_SOURCE: | 203 if flags & FLAG_GENERATED_SOURCE: |
185 parts.append('gen') | 204 parts.append('gen') |
| 205 if flags & FLAG_CLONE: |
| 206 parts.append('clone') |
186 # Not actually a part of flags, but useful to show it here. | 207 # Not actually a part of flags, but useful to show it here. |
187 if self.aliases: | 208 if self.aliases: |
188 parts.append('{} aliases'.format(self.num_aliases)) | 209 parts.append('{} aliases'.format(self.num_aliases)) |
189 return '{%s}' % ','.join(parts) | 210 return '{%s}' % ','.join(parts) |
190 | 211 |
191 def IsBss(self): | 212 def IsBss(self): |
192 return self.section_name == '.bss' | 213 return self.section_name == '.bss' |
193 | 214 |
194 def IsGroup(self): | 215 def IsGroup(self): |
195 return False | 216 return False |
(...skipping 212 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
408 def _CreateTransformed(self, symbols, filtered_symbols=None, full_name=None, | 429 def _CreateTransformed(self, symbols, filtered_symbols=None, full_name=None, |
409 template_name=None, name=None, section_name=None, | 430 template_name=None, name=None, section_name=None, |
410 is_sorted=None): | 431 is_sorted=None): |
411 if is_sorted is None: | 432 if is_sorted is None: |
412 is_sorted = self.is_sorted | 433 is_sorted = self.is_sorted |
413 return SymbolGroup(symbols, filtered_symbols=filtered_symbols, | 434 return SymbolGroup(symbols, filtered_symbols=filtered_symbols, |
414 full_name=full_name, template_name=template_name, | 435 full_name=full_name, template_name=template_name, |
415 name=name, section_name=section_name, | 436 name=name, section_name=section_name, |
416 is_sorted=is_sorted) | 437 is_sorted=is_sorted) |
417 | 438 |
418 def Clustered(self): | |
419 """Returns a new SymbolGroup with some symbols moved into subgroups. | |
420 | |
421 Subgroups include: | |
422 * Symbols that have [clone] in their name (created during inlining). | |
423 * Star symbols (such as "** merge strings", and "** symbol gap") | |
424 | |
425 To view created groups: | |
426 Print(clustered.Filter(lambda s: s.IsGroup()), recursive=True) | |
427 """ | |
428 return self._CreateTransformed(cluster_symbols.ClusterSymbols(self)) | |
429 | |
430 def Sorted(self, cmp_func=None, key=None, reverse=False): | 439 def Sorted(self, cmp_func=None, key=None, reverse=False): |
431 if cmp_func is None and key is None: | 440 if cmp_func is None and key is None: |
432 cmp_func = lambda a, b: cmp((a.IsBss(), abs(b.pss), a.name), | 441 cmp_func = lambda a, b: cmp((a.IsBss(), abs(b.pss), a.name), |
433 (b.IsBss(), abs(a.pss), b.name)) | 442 (b.IsBss(), abs(a.pss), b.name)) |
434 | 443 |
435 after_symbols = sorted(self._symbols, cmp_func, key, reverse) | 444 after_symbols = sorted(self._symbols, cmp_func, key, reverse) |
436 return self._CreateTransformed( | 445 return self._CreateTransformed( |
437 after_symbols, filtered_symbols=self._filtered_symbols, | 446 after_symbols, filtered_symbols=self._filtered_symbols, |
438 section_name=self.section_name, is_sorted=True) | 447 section_name=self.section_name, is_sorted=True) |
439 | 448 |
(...skipping 15 matching lines...) Expand all Loading... |
455 for symbol in self: | 464 for symbol in self: |
456 filtered_and_kept[int(bool(func(symbol)))].append(symbol) | 465 filtered_and_kept[int(bool(func(symbol)))].append(symbol) |
457 except: | 466 except: |
458 logging.warning('Filter failed on symbol %r', symbol) | 467 logging.warning('Filter failed on symbol %r', symbol) |
459 raise | 468 raise |
460 | 469 |
461 return self._CreateTransformed(filtered_and_kept[1], | 470 return self._CreateTransformed(filtered_and_kept[1], |
462 filtered_symbols=filtered_and_kept[0], | 471 filtered_symbols=filtered_and_kept[0], |
463 section_name=self.section_name) | 472 section_name=self.section_name) |
464 | 473 |
| 474 def WhereIsGroup(self): |
| 475 return self.Filter(lambda s: s.IsGroup()) |
| 476 |
465 def WhereSizeBiggerThan(self, min_size): | 477 def WhereSizeBiggerThan(self, min_size): |
466 return self.Filter(lambda s: s.size >= min_size) | 478 return self.Filter(lambda s: s.size >= min_size) |
467 | 479 |
468 def WherePssBiggerThan(self, min_pss): | 480 def WherePssBiggerThan(self, min_pss): |
469 return self.Filter(lambda s: s.pss >= min_pss) | 481 return self.Filter(lambda s: s.pss >= min_pss) |
470 | 482 |
471 def WhereInSection(self, section): | 483 def WhereInSection(self, section): |
472 if len(section) == 1: | 484 if len(section) == 1: |
473 ret = self.Filter(lambda s: s.section == section) | 485 ret = self.Filter(lambda s: s.section == section) |
474 ret.section_name = SECTION_TO_SECTION_NAME[section] | 486 ret.section_name = SECTION_TO_SECTION_NAME[section] |
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
545 | 557 |
546 Example: | 558 Example: |
547 # Symbols that do not have "third_party" in their path. | 559 # Symbols that do not have "third_party" in their path. |
548 symbols.WherePathMatches(r'third_party').Inverted() | 560 symbols.WherePathMatches(r'third_party').Inverted() |
549 # Symbols within third_party that do not contain the string "foo". | 561 # Symbols within third_party that do not contain the string "foo". |
550 symbols.WherePathMatches(r'third_party').WhereMatches('foo').Inverted() | 562 symbols.WherePathMatches(r'third_party').WhereMatches('foo').Inverted() |
551 """ | 563 """ |
552 return self._CreateTransformed( | 564 return self._CreateTransformed( |
553 self._filtered_symbols, filtered_symbols=self._symbols, is_sorted=False) | 565 self._filtered_symbols, filtered_symbols=self._symbols, is_sorted=False) |
554 | 566 |
555 def GroupedBy(self, func, min_count=0): | 567 def GroupedBy(self, func, min_count=0, group_factory=None): |
556 """Returns a SymbolGroup of SymbolGroups, indexed by |func|. | 568 """Returns a SymbolGroup of SymbolGroups, indexed by |func|. |
557 | 569 |
558 Symbols within each subgroup maintain their relative ordering. | 570 Symbols within each subgroup maintain their relative ordering. |
559 | 571 |
560 Args: | 572 Args: |
561 func: Grouping function. Passed a symbol and returns a string for the | 573 func: Grouping function. Passed a symbol and returns a string for the |
562 name of the subgroup to put the symbol in. If None is returned, the | 574 name of the subgroup to put the symbol in. If None is returned, the |
563 symbol is omitted. | 575 symbol is omitted. |
564 min_count: Miniumum number of symbols for a group. If fewer than this many | 576 min_count: Miniumum number of symbols for a group. If fewer than this many |
565 symbols end up in a group, they will not be put within a group. | 577 symbols end up in a group, they will not be put within a group. |
566 Use a negative value to omit symbols entirely rather than | 578 Use a negative value to omit symbols entirely rather than |
567 include them outside of a group. | 579 include them outside of a group. |
| 580 group_factory: Function to create SymbolGroup from a list of Symbols. |
568 """ | 581 """ |
| 582 if group_factory is None: |
| 583 group_factory = lambda token, symbols: self._CreateTransformed( |
| 584 symbols, full_name=token, template_name=token, name=token, |
| 585 section_name=self.section_name) |
| 586 |
569 after_syms = [] | 587 after_syms = [] |
570 filtered_symbols = [] | 588 filtered_symbols = [] |
571 symbols_by_token = collections.defaultdict(list) | 589 symbols_by_token = collections.OrderedDict() |
572 # Index symbols by |func|. | 590 # Index symbols by |func|. |
573 for symbol in self: | 591 for symbol in self: |
574 token = func(symbol) | 592 token = func(symbol) |
575 if token is None: | 593 if token is None: |
576 filtered_symbols.append(symbol) | 594 filtered_symbols.append(symbol) |
577 symbols_by_token[token].append(symbol) | 595 else: |
| 596 # Optimization: Store a list only when >1 symbol. |
| 597 # Saves 200-300ms for _Clustered(). |
| 598 prev = symbols_by_token.setdefault(token, symbol) |
| 599 if prev is not symbol: |
| 600 if prev.__class__ == list: |
| 601 prev.append(symbol) |
| 602 else: |
| 603 symbols_by_token[token] = [prev, symbol] |
578 # Create the subgroups. | 604 # Create the subgroups. |
579 include_singles = min_count >= 0 | 605 include_singles = min_count >= 0 |
580 min_count = abs(min_count) | 606 min_count = abs(min_count) |
581 for token, symbols in symbols_by_token.iteritems(): | 607 for token, symbol_or_list in symbols_by_token.iteritems(): |
582 if len(symbols) >= min_count: | 608 count = 1 |
583 after_syms.append(self._CreateTransformed( | 609 if symbol_or_list.__class__ == list: |
584 symbols, name=token, section_name=self.section_name, | 610 count = len(symbol_or_list) |
585 is_sorted=False)) | 611 |
586 elif include_singles: | 612 if count >= min_count: |
587 after_syms.extend(symbols) | 613 if count == 1: |
| 614 symbol_or_list = [symbol_or_list] |
| 615 after_syms.append(group_factory(token, symbol_or_list)) |
588 else: | 616 else: |
589 filtered_symbols.extend(symbols) | 617 target_list = after_syms if include_singles else filtered_symbols |
590 grouped = self._CreateTransformed( | 618 if count == 1: |
| 619 target_list.append(symbol_or_list) |
| 620 else: |
| 621 target_list.extend(symbol_or_list) |
| 622 |
| 623 return self._CreateTransformed( |
591 after_syms, filtered_symbols=filtered_symbols, | 624 after_syms, filtered_symbols=filtered_symbols, |
592 section_name=self.section_name, is_sorted=False) | 625 section_name=self.section_name) |
593 return grouped | 626 |
| 627 def _Clustered(self): |
| 628 """Returns a new SymbolGroup with some symbols moved into subgroups. |
| 629 |
| 630 Method is private since it only ever makes sense to call it from |
| 631 SizeInfo.symbols. |
| 632 |
| 633 The main function of clustering is to put symbols that were broken into |
| 634 multiple parts under a group so that they once again look like a single |
| 635 symbol. It also groups together symbols like "** merge strings". |
| 636 |
| 637 To view created groups: |
| 638 Print(size_info.symbols.WhereIsGroup()) |
| 639 """ |
| 640 def cluster_func(symbol): |
| 641 name = symbol.full_name |
| 642 if not name: |
| 643 # min_count=2 will ensure order is maintained while not being grouped. |
| 644 # "&" to distinguish from real symbol names, id() to ensure uniqueness. |
| 645 return '&' + hex(id(symbol)) |
| 646 if name.startswith('*'): |
| 647 # "symbol gap 3" -> "symbol gaps" |
| 648 name = re.sub(r'\s+\d+( \(.*\))?$', 's', name) |
| 649 return name |
| 650 |
| 651 # Use a custom factory to fill in name & template_name. |
| 652 def group_factory(full_name, symbols): |
| 653 sym = symbols[0] |
| 654 if full_name.startswith('*'): |
| 655 return self._CreateTransformed( |
| 656 symbols, full_name=full_name, template_name=full_name, |
| 657 name=full_name, section_name=sym.section_name) |
| 658 return self._CreateTransformed( |
| 659 symbols, full_name=full_name, template_name=sym.template_name, |
| 660 name=sym.name, section_name=sym.section_name) |
| 661 |
| 662 # A full second faster to cluster per-section. Plus, don't need create |
| 663 # (section_name, name) tuples in cluster_func. |
| 664 ret = [] |
| 665 for section in self.GroupedBySectionName(): |
| 666 ret.extend(section.GroupedBy( |
| 667 cluster_func, min_count=2, group_factory=group_factory)) |
| 668 |
| 669 return self._CreateTransformed(ret) |
594 | 670 |
595 def GroupedBySectionName(self): | 671 def GroupedBySectionName(self): |
596 return self.GroupedBy(lambda s: s.section_name) | 672 return self.GroupedBy(lambda s: s.section_name) |
597 | 673 |
598 def GroupedByName(self, depth=0, min_count=0): | 674 def GroupedByName(self, depth=0, min_count=0): |
599 """Groups by symbol name, where |depth| controls how many ::s to include. | 675 """Groups by symbol name, where |depth| controls how many ::s to include. |
600 | 676 |
601 Does not differentiate between namespaces/classes/functions. | 677 Does not differentiate between namespaces/classes/functions. |
602 | 678 |
603 Args: | 679 Args: |
(...skipping 161 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
765 | 841 |
766 | 842 |
767 def _ExtractSuffixAfterSeparator(string, separator, count): | 843 def _ExtractSuffixAfterSeparator(string, separator, count): |
768 prev_idx = len(string) + 1 | 844 prev_idx = len(string) + 1 |
769 for _ in xrange(count): | 845 for _ in xrange(count): |
770 idx = string.rfind(separator, 0, prev_idx - 1) | 846 idx = string.rfind(separator, 0, prev_idx - 1) |
771 if idx < 0: | 847 if idx < 0: |
772 break | 848 break |
773 prev_idx = idx | 849 prev_idx = idx |
774 return string[:prev_idx] | 850 return string[:prev_idx] |
OLD | NEW |