tools/binary_size/libsupersize/models.py - Issue 2885073002: supersize: Move [clone] from name -> flags & rewrite Cluster()

Side by Side Diff: tools/binary_size/libsupersize/models.py

Issue 2885073002: supersize: Move [clone] from name -> flags & rewrite Cluster() (Closed)

Patch Set: review comment Created 3 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 # Copyright 2017 The Chromium Authors. All rights reserved.	1 # Copyright 2017 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4 """Classes that comprise the data model for binary size analysis.	4 """Classes that comprise the data model for binary size analysis.

5	5

6 The primary classes are Symbol, and SymbolGroup.	6 The primary classes are Symbol, and SymbolGroup.

7	7

8 Description of common properties:	8 Description of common properties:

9 * address: The start address of the symbol.	9 * address: The start address of the symbol.

10 May be 0 (e.g. for .bss or for SymbolGroups).	10 May be 0 (e.g. for .bss or for SymbolGroups).

(...skipping 12 matching lines...) Expand all Loading...
23 are removed from both full_name and name during normalization).	23 are removed from both full_name and name during normalization).

24 * section_name: E.g. ".text", ".rodata", ".data.rel.local"	24 * section_name: E.g. ".text", ".rodata", ".data.rel.local"

25 * section: The second character of \|section_name\|. E.g. "t", "r", "d".	25 * section: The second character of \|section_name\|. E.g. "t", "r", "d".

26 """	26 """

27	27

28 import collections	28 import collections

29 import logging	29 import logging

30 import os	30 import os

31 import re	31 import re

32	32

33 import cluster_symbols

34 import match_util	33 import match_util

35	34

36	35

37 METADATA_GIT_REVISION = 'git_revision'	36 METADATA_GIT_REVISION = 'git_revision'

38 METADATA_APK_FILENAME = 'apk_file_name' # Path relative to output_directory.	37 METADATA_APK_FILENAME = 'apk_file_name' # Path relative to output_directory.

39 METADATA_MAP_FILENAME = 'map_file_name' # Path relative to output_directory.	38 METADATA_MAP_FILENAME = 'map_file_name' # Path relative to output_directory.

40 METADATA_ELF_ARCHITECTURE = 'elf_arch' # "Machine" field from readelf -h	39 METADATA_ELF_ARCHITECTURE = 'elf_arch' # "Machine" field from readelf -h

41 METADATA_ELF_FILENAME = 'elf_file_name' # Path relative to output_directory.	40 METADATA_ELF_FILENAME = 'elf_file_name' # Path relative to output_directory.

42 METADATA_ELF_MTIME = 'elf_mtime' # int timestamp in utc.	41 METADATA_ELF_MTIME = 'elf_mtime' # int timestamp in utc.

43 METADATA_ELF_BUILD_ID = 'elf_build_id'	42 METADATA_ELF_BUILD_ID = 'elf_build_id'

44 METADATA_GN_ARGS = 'gn_args'	43 METADATA_GN_ARGS = 'gn_args'

45 METADATA_TOOL_PREFIX = 'tool_prefix' # Path relative to SRC_ROOT.	44 METADATA_TOOL_PREFIX = 'tool_prefix' # Path relative to SRC_ROOT.

46	45

47	46

48 SECTION_TO_SECTION_NAME = {	47 SECTION_TO_SECTION_NAME = {

49 'b': '.bss',	48 'b': '.bss',

50 'd': '.data',	49 'd': '.data',

51 'r': '.rodata',	50 'r': '.rodata',

52 't': '.text',	51 't': '.text',

53 }	52 }

54	53

55 FLAG_ANONYMOUS = 1	54 FLAG_ANONYMOUS = 1

56 FLAG_STARTUP = 2	55 FLAG_STARTUP = 2

57 FLAG_UNLIKELY = 4	56 FLAG_UNLIKELY = 4

58 FLAG_REL = 8	57 FLAG_REL = 8

59 FLAG_REL_LOCAL = 16	58 FLAG_REL_LOCAL = 16

60 FLAG_GENERATED_SOURCE = 32	59 FLAG_GENERATED_SOURCE = 32

	60 FLAG_CLONE = 64

61	61

62 DIFF_STATUS_UNCHANGED = 0	62 DIFF_STATUS_UNCHANGED = 0

63 DIFF_STATUS_CHANGED = 1	63 DIFF_STATUS_CHANGED = 1

64 DIFF_STATUS_ADDED = 2	64 DIFF_STATUS_ADDED = 2

65 DIFF_STATUS_REMOVED = 3	65 DIFF_STATUS_REMOVED = 3

66	66

67	67

68 class SizeInfo(object):	68 class SizeInfo(object):

69 """Represents all size information for a single binary.	69 """Represents all size information for a single binary.

70	70

71 Fields:	71 Fields:

72 section_sizes: A dict of section_name -> size.	72 section_sizes: A dict of section_name -> size.

73 raw_symbols: A list of all symbols, sorted by address.	73 raw_symbols: A SymbolGroup containing all top-level symbols (no groups).

74 symbols: A SymbolGroup containing all symbols. By default, these are the	74 symbols: A SymbolGroup where symbols have been grouped by full_name (where

75 same as raw_symbols, but may contain custom groupings when it is	75 applicable). May be re-assigned when it is desirable to show custom

76 desirable to convey the result of a query along with section_sizes and	76 groupings while still printing metadata and section_sizes.

77 metadata.

78 metadata: A dict.	77 metadata: A dict.

79 """	78 """

80 __slots__ = (	79 __slots__ = (

81 'section_sizes',	80 'section_sizes',

82 'raw_symbols',	81 'raw_symbols',

83 'symbols',	82 '_symbols',

84 'metadata',	83 'metadata',

85 )	84 )

86	85

87 """Root size information."""	86 """Root size information."""

88 def __init__(self, section_sizes, raw_symbols, metadata=None, symbols=None):	87 def __init__(self, section_sizes, raw_symbols, metadata=None, symbols=None):

	88 if isinstance(raw_symbols, list):

	89 raw_symbols = SymbolGroup(raw_symbols)

89 self.section_sizes = section_sizes # E.g. {'.text': 0}	90 self.section_sizes = section_sizes # E.g. {'.text': 0}

90 self.raw_symbols = raw_symbols	91 self.raw_symbols = raw_symbols

91 self.symbols = symbols or SymbolGroup(raw_symbols)	92 self._symbols = symbols

92 self.metadata = metadata or {}	93 self.metadata = metadata or {}

93	94

94 def Clustered(self):	95 @property

95 """Returns a new SizeInfo with some symbols moved into subgroups.	96 def symbols(self):

	97 if self._symbols is None:

	98 self._symbols = self.raw_symbols._Clustered()

	99 return self._symbols

96	100

97 See SymbolGroup.Clustered() for more details.	101 @symbols.setter

98 """	102 def symbols(self, value):

99 return SizeInfo(self.section_sizes, self.raw_symbols, self.metadata,	103 self._symbols = value

100 symbols=self.symbols.Clustered())

101	104

102	105

103 class SizeInfoDiff(object):	106 class SizeInfoDiff(object):

104 """What you get when you Diff() two SizeInfo objects.	107 """What you get when you Diff() two SizeInfo objects.

105	108

106 Fields:	109 Fields:

107 section_sizes: A dict of section_name -> size delta.	110 section_sizes: A dict of section_name -> size delta.

108 symbols: A SymbolDiff with all symbols in it.	111 raw_symbols: A SymbolDiff with all top-level symbols in it (no groups).

	112 symbols: A SymbolDiff where symbols have been grouped by full_name (where

	113 applicable). May be re-assigned when it is desirable to show custom

	114 groupings while still printing metadata and section_sizes.

109 before_metadata: metadata of the "before" SizeInfo.	115 before_metadata: metadata of the "before" SizeInfo.

110 after_metadata: metadata of the "after" SizeInfo.	116 after_metadata: metadata of the "after" SizeInfo.

111 """	117 """

112 __slots__ = (	118 __slots__ = (

113 'section_sizes',	119 'section_sizes',

114 'symbols',	120 'raw_symbols',

	121 '_symbols',

115 'before_metadata',	122 'before_metadata',

116 'after_metadata',	123 'after_metadata',

117 )	124 )

118	125

119 def __init__(self, section_sizes, symbols, before_metadata, after_metadata):	126 def __init__(self, section_sizes, raw_symbols, before_metadata,

	127 after_metadata):

120 self.section_sizes = section_sizes	128 self.section_sizes = section_sizes

121 self.symbols = symbols	129 self.raw_symbols = raw_symbols

122 self.before_metadata = before_metadata	130 self.before_metadata = before_metadata

123 self.after_metadata = after_metadata	131 self.after_metadata = after_metadata

	132 self._symbols = None

	133

	134 @property

	135 def symbols(self):

	136 if self._symbols is None:

	137 self._symbols = self.raw_symbols._Clustered()

	138 return self._symbols

	139

	140 @symbols.setter

	141 def symbols(self, value):

	142 self._symbols = value

124	143

125	144

126 class BaseSymbol(object):	145 class BaseSymbol(object):

127 """Base class for Symbol and SymbolGroup.	146 """Base class for Symbol and SymbolGroup.

128	147

129 Refer to module docs for field descriptions.	148 Refer to module docs for field descriptions.

130 """	149 """

131 __slots__ = ()	150 __slots__ = ()

132	151

133 @property	152 @property

(...skipping 42 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
176 if flags & FLAG_STARTUP:	195 if flags & FLAG_STARTUP:

177 parts.append('startup')	196 parts.append('startup')

178 if flags & FLAG_UNLIKELY:	197 if flags & FLAG_UNLIKELY:

179 parts.append('unlikely')	198 parts.append('unlikely')

180 if flags & FLAG_REL:	199 if flags & FLAG_REL:

181 parts.append('rel')	200 parts.append('rel')

182 if flags & FLAG_REL_LOCAL:	201 if flags & FLAG_REL_LOCAL:

183 parts.append('rel.loc')	202 parts.append('rel.loc')

184 if flags & FLAG_GENERATED_SOURCE:	203 if flags & FLAG_GENERATED_SOURCE:

185 parts.append('gen')	204 parts.append('gen')

	205 if flags & FLAG_CLONE:

	206 parts.append('clone')

186 # Not actually a part of flags, but useful to show it here.	207 # Not actually a part of flags, but useful to show it here.

187 if self.aliases:	208 if self.aliases:

188 parts.append('{} aliases'.format(self.num_aliases))	209 parts.append('{} aliases'.format(self.num_aliases))

189 return '{%s}' % ','.join(parts)	210 return '{%s}' % ','.join(parts)

190	211

191 def IsBss(self):	212 def IsBss(self):

192 return self.section_name == '.bss'	213 return self.section_name == '.bss'

193	214

194 def IsGroup(self):	215 def IsGroup(self):

195 return False	216 return False

(...skipping 212 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
408 def _CreateTransformed(self, symbols, filtered_symbols=None, full_name=None,	429 def _CreateTransformed(self, symbols, filtered_symbols=None, full_name=None,

409 template_name=None, name=None, section_name=None,	430 template_name=None, name=None, section_name=None,

410 is_sorted=None):	431 is_sorted=None):

411 if is_sorted is None:	432 if is_sorted is None:

412 is_sorted = self.is_sorted	433 is_sorted = self.is_sorted

413 return SymbolGroup(symbols, filtered_symbols=filtered_symbols,	434 return SymbolGroup(symbols, filtered_symbols=filtered_symbols,

414 full_name=full_name, template_name=template_name,	435 full_name=full_name, template_name=template_name,

415 name=name, section_name=section_name,	436 name=name, section_name=section_name,

416 is_sorted=is_sorted)	437 is_sorted=is_sorted)

417	438

418 def Clustered(self):

419 """Returns a new SymbolGroup with some symbols moved into subgroups.

420

421 Subgroups include:

422 * Symbols that have [clone] in their name (created during inlining).

423 * Star symbols (such as " merge strings", and " symbol gap")

424

425 To view created groups:

426 Print(clustered.Filter(lambda s: s.IsGroup()), recursive=True)

427 """

428 return self._CreateTransformed(cluster_symbols.ClusterSymbols(self))

429

430 def Sorted(self, cmp_func=None, key=None, reverse=False):	439 def Sorted(self, cmp_func=None, key=None, reverse=False):

431 if cmp_func is None and key is None:	440 if cmp_func is None and key is None:

432 cmp_func = lambda a, b: cmp((a.IsBss(), abs(b.pss), a.name),	441 cmp_func = lambda a, b: cmp((a.IsBss(), abs(b.pss), a.name),

433 (b.IsBss(), abs(a.pss), b.name))	442 (b.IsBss(), abs(a.pss), b.name))

434	443

435 after_symbols = sorted(self._symbols, cmp_func, key, reverse)	444 after_symbols = sorted(self._symbols, cmp_func, key, reverse)

436 return self._CreateTransformed(	445 return self._CreateTransformed(

437 after_symbols, filtered_symbols=self._filtered_symbols,	446 after_symbols, filtered_symbols=self._filtered_symbols,

438 section_name=self.section_name, is_sorted=True)	447 section_name=self.section_name, is_sorted=True)

439	448

(...skipping 15 matching lines...) Expand all Loading...
455 for symbol in self:	464 for symbol in self:

456 filtered_and_kept[int(bool(func(symbol)))].append(symbol)	465 filtered_and_kept[int(bool(func(symbol)))].append(symbol)

457 except:	466 except:

458 logging.warning('Filter failed on symbol %r', symbol)	467 logging.warning('Filter failed on symbol %r', symbol)

459 raise	468 raise

460	469

461 return self._CreateTransformed(filtered_and_kept[1],	470 return self._CreateTransformed(filtered_and_kept[1],

462 filtered_symbols=filtered_and_kept[0],	471 filtered_symbols=filtered_and_kept[0],

463 section_name=self.section_name)	472 section_name=self.section_name)

464	473

	474 def WhereIsGroup(self):

	475 return self.Filter(lambda s: s.IsGroup())

	476

465 def WhereSizeBiggerThan(self, min_size):	477 def WhereSizeBiggerThan(self, min_size):

466 return self.Filter(lambda s: s.size >= min_size)	478 return self.Filter(lambda s: s.size >= min_size)

467	479

468 def WherePssBiggerThan(self, min_pss):	480 def WherePssBiggerThan(self, min_pss):

469 return self.Filter(lambda s: s.pss >= min_pss)	481 return self.Filter(lambda s: s.pss >= min_pss)

470	482

471 def WhereInSection(self, section):	483 def WhereInSection(self, section):

472 if len(section) == 1:	484 if len(section) == 1:

473 ret = self.Filter(lambda s: s.section == section)	485 ret = self.Filter(lambda s: s.section == section)

474 ret.section_name = SECTION_TO_SECTION_NAME[section]	486 ret.section_name = SECTION_TO_SECTION_NAME[section]

(...skipping 70 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
545	557

546 Example:	558 Example:

547 # Symbols that do not have "third_party" in their path.	559 # Symbols that do not have "third_party" in their path.

548 symbols.WherePathMatches(r'third_party').Inverted()	560 symbols.WherePathMatches(r'third_party').Inverted()

549 # Symbols within third_party that do not contain the string "foo".	561 # Symbols within third_party that do not contain the string "foo".

550 symbols.WherePathMatches(r'third_party').WhereMatches('foo').Inverted()	562 symbols.WherePathMatches(r'third_party').WhereMatches('foo').Inverted()

551 """	563 """

552 return self._CreateTransformed(	564 return self._CreateTransformed(

553 self._filtered_symbols, filtered_symbols=self._symbols, is_sorted=False)	565 self._filtered_symbols, filtered_symbols=self._symbols, is_sorted=False)

554	566

555 def GroupedBy(self, func, min_count=0):	567 def GroupedBy(self, func, min_count=0, group_factory=None):

556 """Returns a SymbolGroup of SymbolGroups, indexed by \|func\|.	568 """Returns a SymbolGroup of SymbolGroups, indexed by \|func\|.

557	569

558 Symbols within each subgroup maintain their relative ordering.	570 Symbols within each subgroup maintain their relative ordering.

559	571

560 Args:	572 Args:

561 func: Grouping function. Passed a symbol and returns a string for the	573 func: Grouping function. Passed a symbol and returns a string for the

562 name of the subgroup to put the symbol in. If None is returned, the	574 name of the subgroup to put the symbol in. If None is returned, the

563 symbol is omitted.	575 symbol is omitted.

564 min_count: Miniumum number of symbols for a group. If fewer than this many	576 min_count: Miniumum number of symbols for a group. If fewer than this many

565 symbols end up in a group, they will not be put within a group.	577 symbols end up in a group, they will not be put within a group.

566 Use a negative value to omit symbols entirely rather than	578 Use a negative value to omit symbols entirely rather than

567 include them outside of a group.	579 include them outside of a group.

	580 group_factory: Function to create SymbolGroup from a list of Symbols.

568 """	581 """

	582 if group_factory is None:

	583 group_factory = lambda token, symbols: self._CreateTransformed(

	584 symbols, full_name=token, template_name=token, name=token,

	585 section_name=self.section_name)

	586

569 after_syms = []	587 after_syms = []

570 filtered_symbols = []	588 filtered_symbols = []

571 symbols_by_token = collections.defaultdict(list)	589 symbols_by_token = collections.OrderedDict()

572 # Index symbols by \|func\|.	590 # Index symbols by \|func\|.

573 for symbol in self:	591 for symbol in self:

574 token = func(symbol)	592 token = func(symbol)

575 if token is None:	593 if token is None:

576 filtered_symbols.append(symbol)	594 filtered_symbols.append(symbol)

577 symbols_by_token[token].append(symbol)	595 else:

	596 # Optimization: Store a list only when >1 symbol.

	597 # Saves 200-300ms for _Clustered().

	598 prev = symbols_by_token.setdefault(token, symbol)

	599 if prev is not symbol:

	600 if prev.__class__ == list:

	601 prev.append(symbol)

	602 else:

	603 symbols_by_token[token] = [prev, symbol]

578 # Create the subgroups.	604 # Create the subgroups.

579 include_singles = min_count >= 0	605 include_singles = min_count >= 0

580 min_count = abs(min_count)	606 min_count = abs(min_count)

581 for token, symbols in symbols_by_token.iteritems():	607 for token, symbol_or_list in symbols_by_token.iteritems():

582 if len(symbols) >= min_count:	608 count = 1

583 after_syms.append(self._CreateTransformed(	609 if symbol_or_list.__class__ == list:

584 symbols, name=token, section_name=self.section_name,	610 count = len(symbol_or_list)

585 is_sorted=False))	611

586 elif include_singles:	612 if count >= min_count:

587 after_syms.extend(symbols)	613 if count == 1:

	614 symbol_or_list = [symbol_or_list]

	615 after_syms.append(group_factory(token, symbol_or_list))

588 else:	616 else:

589 filtered_symbols.extend(symbols)	617 target_list = after_syms if include_singles else filtered_symbols

590 grouped = self._CreateTransformed(	618 if count == 1:

	619 target_list.append(symbol_or_list)

	620 else:

	621 target_list.extend(symbol_or_list)

	622

	623 return self._CreateTransformed(

591 after_syms, filtered_symbols=filtered_symbols,	624 after_syms, filtered_symbols=filtered_symbols,

592 section_name=self.section_name, is_sorted=False)	625 section_name=self.section_name)

593 return grouped	626

	627 def _Clustered(self):

	628 """Returns a new SymbolGroup with some symbols moved into subgroups.

	629

	630 Method is private since it only ever makes sense to call it from

	631 SizeInfo.symbols.

	632

	633 The main function of clustering is to put symbols that were broken into

	634 multiple parts under a group so that they once again look like a single

	635 symbol. It also groups together symbols like "** merge strings".

	636

	637 To view created groups:

	638 Print(size_info.symbols.WhereIsGroup())

	639 """

	640 def cluster_func(symbol):

	641 name = symbol.full_name

	642 if not name:

	643 # min_count=2 will ensure order is maintained while not being grouped.

	644 # "&" to distinguish from real symbol names, id() to ensure uniqueness.

	645 return '&' + hex(id(symbol))

	646 if name.startswith('*'):

	647 # "symbol gap 3" -> "symbol gaps"

	648 name = re.sub(r'\s+\d+( $.*$)?$', 's', name)

	649 return name

	650

	651 # Use a custom factory to fill in name & template_name.

	652 def group_factory(full_name, symbols):

	653 sym = symbols[0]

	654 if full_name.startswith('*'):

	655 return self._CreateTransformed(

	656 symbols, full_name=full_name, template_name=full_name,

	657 name=full_name, section_name=sym.section_name)

	658 return self._CreateTransformed(

	659 symbols, full_name=full_name, template_name=sym.template_name,

	660 name=sym.name, section_name=sym.section_name)

	661

	662 # A full second faster to cluster per-section. Plus, don't need create

	663 # (section_name, name) tuples in cluster_func.

	664 ret = []

	665 for section in self.GroupedBySectionName():

	666 ret.extend(section.GroupedBy(

	667 cluster_func, min_count=2, group_factory=group_factory))

	668

	669 return self._CreateTransformed(ret)

594	670

595 def GroupedBySectionName(self):	671 def GroupedBySectionName(self):

596 return self.GroupedBy(lambda s: s.section_name)	672 return self.GroupedBy(lambda s: s.section_name)

597	673

598 def GroupedByName(self, depth=0, min_count=0):	674 def GroupedByName(self, depth=0, min_count=0):

599 """Groups by symbol name, where \|depth\| controls how many ::s to include.	675 """Groups by symbol name, where \|depth\| controls how many ::s to include.

600	676

601 Does not differentiate between namespaces/classes/functions.	677 Does not differentiate between namespaces/classes/functions.

602	678

603 Args:	679 Args:

(...skipping 161 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
765	841

766	842

767 def _ExtractSuffixAfterSeparator(string, separator, count):	843 def _ExtractSuffixAfterSeparator(string, separator, count):

768 prev_idx = len(string) + 1	844 prev_idx = len(string) + 1

769 for _ in xrange(count):	845 for _ in xrange(count):

770 idx = string.rfind(separator, 0, prev_idx - 1)	846 idx = string.rfind(separator, 0, prev_idx - 1)

771 if idx < 0:	847 if idx < 0:

772 break	848 break

773 prev_idx = idx	849 prev_idx = idx

774 return string[:prev_idx]	850 return string[:prev_idx]

OLD	NEW

« no previous file with comments | « tools/binary_size/libsupersize/integration_test.py ('k') | tools/binary_size/libsupersize/testdata/Archive.golden » ('j') | no next file with comments »