tools/binary_size/models.py - Issue 2791433004: //tools/binary_size: source_path information, change file format, fixes

Side by Side Diff: tools/binary_size/models.py

Issue 2791433004: //tools/binary_size: source_path information, change file format, fixes (Closed)

Patch Set: fix comment for _DetectToolPrefix Created 3 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 # Copyright 2017 The Chromium Authors. All rights reserved.	1 # Copyright 2017 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4 """Classes that comprise the data model for binary size analysis."""	4 """Classes that comprise the data model for binary size analysis."""

5	5

6 import collections	6 import collections

7 import copy	7 import copy

	8 import os

8 import re	9 import re

9	10

10	11

11 SECTION_TO_SECTION_NAME = {	12 SECTION_TO_SECTION_NAME = {

12 'b': '.bss',	13 'b': '.bss',

13 'd': '.data',	14 'd': '.data',

14 'r': '.rodata',	15 'r': '.rodata',

15 't': '.text',	16 't': '.text',

16 }	17 }

17	18

18	19

19 class SizeInfo(object):	20 class SizeInfo(object):

20 """Represents all size information for a single binary.	21 """Represents all size information for a single binary.

21	22

22 Fields:	23 Fields:

23 section_sizes: A dict of section_name -> size.	24 section_sizes: A dict of section_name -> size.

24 symbols: A SymbolGroup (or SymbolDiff) with all symbols in it.	25 symbols: A SymbolGroup (or SymbolDiff) with all symbols in it.

25 """	26 """

26 __slots__ = (	27 __slots__ = (

	28 'section_sizes',

27 'symbols',	29 'symbols',

28 'section_sizes',	30 'tag',

	31 'timestamp',

29 )	32 )

30	33

31 """Root size information."""	34 """Root size information."""

32 def __init__(self, symbols, section_sizes):	35 def __init__(self, section_sizes, symbols, timestamp=None, tag=''):

33 self.symbols = symbols

34 self.section_sizes = section_sizes # E.g. {'.text': 0}	36 self.section_sizes = section_sizes # E.g. {'.text': 0}

	37 self.symbols = symbols # List of symbols sorted by address per-section.

	38 self.timestamp = timestamp # UTC datetime object.

	39 self.tag = tag # E.g. git revision.

	40 assert not tag or '\n' not in tag # Simplifies file format.

35	41

36	42

37 class BaseSymbol(object):	43 class BaseSymbol(object):

38 """Base class for Symbol and SymbolGroup."""	44 """Base class for Symbol and SymbolGroup."""

39 __slots__ = ()	45 __slots__ = ()

40	46

41 @property	47 @property

42 def section(self):	48 def section(self):

43 """Returns the one-letter section.	49 """Returns the one-letter section.

44	50

(...skipping 19 matching lines...) Expand all Loading...
64 # TODO(agrieve): Also match generated functions such as:	70 # TODO(agrieve): Also match generated functions such as:

65 # startup._GLOBAL__sub_I_page_allocator.cc	71 # startup._GLOBAL__sub_I_page_allocator.cc

66 return self.name.endswith(']') and not self.name.endswith('[]')	72 return self.name.endswith(']') and not self.name.endswith('[]')

67	73

68 def _Key(self):	74 def _Key(self):

69 """Returns a tuple that can be used to see if two Symbol are the same.	75 """Returns a tuple that can be used to see if two Symbol are the same.

70	76

71 Keys are not guaranteed to be unique within a SymbolGroup. For example, it	77 Keys are not guaranteed to be unique within a SymbolGroup. For example, it

72 is common to have multiple "** merge strings" symbols, which will have a	78 is common to have multiple "** merge strings" symbols, which will have a

73 common key."""	79 common key."""

74 return (self.section_name, self.function_signature or self.name)	80 return (self.section_name, self.full_name or self.name)

75	81

76	82

77 class Symbol(BaseSymbol):	83 class Symbol(BaseSymbol):

78 """Represents a single symbol within a binary."""	84 """Represents a single symbol within a binary."""

79	85

80 __slots__ = (	86 __slots__ = (

	87 'address',

	88 'full_name',

	89 'is_anonymous',

	90 'object_path',

	91 'name',

	92 'flags',

	93 'padding',

81 'section_name',	94 'section_name',

82 'address',	95 'source_path',

83 'size',	96 'size',

84 'padding',

85 'name',

86 'function_signature',

87 'path',

88 )	97 )

89	98

90 def __init__(self, section_name, size_without_padding, address=None,	99 def __init__(self, section_name, size_without_padding, address=None,

91 name=None, path=None, function_signature=None):	100 name=None, source_path=None, object_path=None,

	101 full_name=None, is_anonymous=False):

92 self.section_name = section_name	102 self.section_name = section_name

93 self.address = address or 0	103 self.address = address or 0

94 self.name = name or ''	104 self.name = name or ''

95 self.function_signature = function_signature or ''	105 self.full_name = full_name or ''

96 self.path = path or ''	106 self.source_path = source_path or ''

	107 self.object_path = object_path or ''

97 self.size = size_without_padding	108 self.size = size_without_padding

	109 # Change this to be a bitfield of flags if ever there is a need to add

	110 # another similar thing.

	111 self.is_anonymous = is_anonymous

98 self.padding = 0	112 self.padding = 0

99	113

100 def __repr__(self):	114 def __repr__(self):

101 return '%s@%x(size=%d,padding=%d,name=%s,path=%s)' % (	115 return '%s@%x(size=%d,padding=%d,name=%s,path=%s,anon=%d)' % (

102 self.section_name, self.address, self.size_without_padding,	116 self.section_name, self.address, self.size_without_padding,

103 self.padding, self.name, self.path)	117 self.padding, self.name, self.source_path or self.object_path,

	118 int(self.is_anonymous))

104	119

105	120

106 class SymbolGroup(BaseSymbol):	121 class SymbolGroup(BaseSymbol):

107 """Represents a group of symbols using the same interface as Symbol.	122 """Represents a group of symbols using the same interface as Symbol.

108	123

109 SymbolGroups are immutable. All filtering / sorting will return new	124 SymbolGroups are immutable. All filtering / sorting will return new

110 SymbolGroups objects.	125 SymbolGroups objects.

111 """	126 """

112	127

113 __slots__ = (	128 __slots__ = (

(...skipping 31 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
145 def __add__(self, other):	160 def __add__(self, other):

146 self_ids = set(id(s) for s in self)	161 self_ids = set(id(s) for s in self)

147 new_symbols = self.symbols + [s for s in other if id(s) not in self_ids]	162 new_symbols = self.symbols + [s for s in other if id(s) not in self_ids]

148 return self._CreateTransformed(new_symbols, section_name=self.section_name)	163 return self._CreateTransformed(new_symbols, section_name=self.section_name)

149	164

150 @property	165 @property

151 def address(self):	166 def address(self):

152 return 0	167 return 0

153	168

154 @property	169 @property

155 def function_signature(self):	170 def full_name(self):

156 return None	171 return None

157	172

158 @property	173 @property

159 def path(self):	174 def is_anonymous(self):

	175 return False

	176

	177 @property

	178 def source_path(self):

160 return None	179 return None

161	180

162 @property	181 @property

163 def size(self):	182 def size(self):

164 if self.IsBss():	183 if self.IsBss():

165 return sum(s.size for s in self)	184 return sum(s.size for s in self)

166 return sum(s.size for s in self if not s.IsBss())	185 return sum(s.size for s in self if not s.IsBss())

167	186

168 @property	187 @property

169 def padding(self):	188 def padding(self):

(...skipping 11 matching lines...) Expand all Loading...
181 # Default to sorting by abs(size) then name.	200 # Default to sorting by abs(size) then name.

182 if cmp_func is None and key is None:	201 if cmp_func is None and key is None:

183 cmp_func = lambda a, b: cmp((a.IsBss(), abs(b.size), a.name),	202 cmp_func = lambda a, b: cmp((a.IsBss(), abs(b.size), a.name),

184 (b.IsBss(), abs(a.size), b.name))	203 (b.IsBss(), abs(a.size), b.name))

185	204

186 new_symbols = sorted(self.symbols, cmp_func, key, reverse)	205 new_symbols = sorted(self.symbols, cmp_func, key, reverse)

187 return self._CreateTransformed(new_symbols,	206 return self._CreateTransformed(new_symbols,

188 filtered_symbols=self.filtered_symbols,	207 filtered_symbols=self.filtered_symbols,

189 section_name=self.section_name)	208 section_name=self.section_name)

190	209

	210 def SortedByName(self, reverse=False):

	211 return self.Sorted(key=(lambda s:s.name), reverse=reverse)

	212

	213 def SortedByAddress(self, reverse=False):

	214 return self.Sorted(key=(lambda s:s.address), reverse=reverse)

	215

	216 def SortedByCount(self, reverse=False):

	217 return self.Sorted(key=(lambda s:len(s) if s.IsGroup() else 1),

	218 reverse=not reverse)

	219

191 def Filter(self, func):	220 def Filter(self, func):

192 filtered_and_kept = ([], [])	221 filtered_and_kept = ([], [])

193 for symbol in self:	222 for symbol in self:

194 filtered_and_kept[int(bool(func(symbol)))].append(symbol)	223 filtered_and_kept[int(bool(func(symbol)))].append(symbol)

195 return self._CreateTransformed(filtered_and_kept[1],	224 return self._CreateTransformed(filtered_and_kept[1],

196 filtered_symbols=filtered_and_kept[0],	225 filtered_symbols=filtered_and_kept[0],

197 section_name=self.section_name)	226 section_name=self.section_name)

198	227

199 def WhereBiggerThan(self, min_size):	228 def WhereBiggerThan(self, min_size):

200 return self.Filter(lambda s: s.size >= min_size)	229 return self.Filter(lambda s: s.size >= min_size)

201	230

202 def WhereInSection(self, section):	231 def WhereInSection(self, section):

203 if len(section) == 1:	232 if len(section) == 1:

204 ret = self.Filter(lambda s: s.section == section)	233 ret = self.Filter(lambda s: s.section == section)

205 ret.section_name = SECTION_TO_SECTION_NAME[section]	234 ret.section_name = SECTION_TO_SECTION_NAME[section]

206 else:	235 else:

207 ret = self.Filter(lambda s: s.section_name == section)	236 ret = self.Filter(lambda s: s.section_name == section)

208 ret.section_name = section	237 ret.section_name = section

209 return ret	238 return ret

210	239

211 def WhereIsGenerated(self):	240 def WhereIsGenerated(self):

212 return self.Filter(lambda s: s.IsGenerated())	241 return self.Filter(lambda s: s.IsGenerated())

213	242

214 def WhereNameMatches(self, pattern):	243 def WhereNameMatches(self, pattern):

215 regex = re.compile(pattern)	244 regex = re.compile(pattern)

216 return self.Filter(lambda s: regex.search(s.name))	245 return self.Filter(lambda s: regex.search(s.name))

217	246

	247 def WhereObjectPathMatches(self, pattern):

	248 regex = re.compile(pattern)

	249 return self.Filter(lambda s: regex.search(s.object_path))

	250

	251 def WhereSourcePathMatches(self, pattern):

	252 regex = re.compile(pattern)

	253 return self.Filter(lambda s: regex.search(s.source_path))

	254

218 def WherePathMatches(self, pattern):	255 def WherePathMatches(self, pattern):

219 regex = re.compile(pattern)	256 regex = re.compile(pattern)

220 return self.Filter(lambda s: s.path and regex.search(s.path))	257 return self.Filter(lambda s: regex.search(s.source_path or s.object_path))

221	258

222 def WhereAddressInRange(self, start, end):	259 def WhereAddressInRange(self, start, end):

223 return self.Filter(lambda s: s.address >= start and s.address <= end)	260 return self.Filter(lambda s: s.address >= start and s.address <= end)

224	261

225 def WhereHasAnyAttribution(self):	262 def WhereHasAnyAttribution(self):

226 return self.Filter(lambda s: s.name or s.path)	263 return self.Filter(lambda s: s.name or s.source_path or s.object_path)

227	264

228 def Inverted(self):	265 def Inverted(self):

229 return self._CreateTransformed(self.filtered_symbols,	266 return self._CreateTransformed(self.filtered_symbols,

230 filtered_symbols=self.symbols)	267 filtered_symbols=self.symbols)

231	268

232 def GroupBy(self, func):	269 def GroupBy(self, func, min_count=0):

	270 """Returns a SymbolGroup of SymbolGroups, indexed by \|func\|.

	271

	272 Args:

	273 func: Grouping function. Passed a symbol and returns a string for the

	274 name of the subgroup to put the symbol in. If None is returned, the

	275 symbol is omitted.

	276 min_count: Miniumum number of symbols for a group. If fewer than this many

	277 symbols end up in a group, they will not be put within a group.

	278 Use a negative value to omit symbols entirely rather than

	279 include them outside of a group.

	280 """

233 new_syms = []	281 new_syms = []

234 filtered_symbols = []	282 filtered_symbols = []

235 symbols_by_token = collections.defaultdict(list)	283 symbols_by_token = collections.defaultdict(list)

	284 # Index symbols by \|func\|.

236 for symbol in self:	285 for symbol in self:

237 token = func(symbol)	286 token = func(symbol)

238 if not token:	287 if token is None:

239 filtered_symbols.append(symbol)	288 filtered_symbols.append(symbol)

240 continue

241 symbols_by_token[token].append(symbol)	289 symbols_by_token[token].append(symbol)

	290 # Create the subgroups.

	291 include_singles = min_count >= 0

	292 min_count = abs(min_count)

242 for token, symbols in symbols_by_token.iteritems():	293 for token, symbols in symbols_by_token.iteritems():

243 new_syms.append(self._CreateTransformed(symbols, name=token,	294 if len(symbols) >= min_count:

244 section_name=self.section_name))	295 new_syms.append(self._CreateTransformed(symbols, name=token,

	296 section_name=self.section_name))

	297 elif include_singles:

	298 new_syms.extend(symbols)

	299 else:

	300 filtered_symbols.extend(symbols)

245 return self._CreateTransformed(new_syms, filtered_symbols=filtered_symbols,	301 return self._CreateTransformed(new_syms, filtered_symbols=filtered_symbols,

246 section_name=self.section_name)	302 section_name=self.section_name)

247	303

248 def GroupByNamespace(self, depth=1):	304 def GroupBySectionName(self):

	305 return self.GroupBy(lambda s: s.section_name)

	306

	307 def GroupByNamespace(self, depth=0, fallback='{global}', min_count=0):

	308 """Groups by symbol namespace (as denoted by ::s).

	309

	310 Does not differentiate between C++ namespaces and C++ classes.

	311

	312 Args:

	313 depth: When 0 (default), groups by entire namespace. When 1, groups by

	314 top-level name, when 2, groups by top 2 names, etc.

	315 fallback: Use this value when no namespace exists.

	316 min_count: Miniumum number of symbols for a group. If fewer than this many

	317 symbols end up in a group, they will not be put within a group.

	318 Use a negative value to omit symbols entirely rather than

	319 include them outside of a group.

	320 """

249 def extract_namespace(symbol):	321 def extract_namespace(symbol):

250 # Does not distinguish between classes and namespaces.	322 # Remove template params.

251 idx = -2	323 name = symbol.name

252 for _ in xrange(depth):	324 template_idx = name.find('<')

253 idx = symbol.name.find('::', idx + 2)	325 if template_idx:

254 if idx != -1:	326 name = name[:template_idx]

255 ret = symbol.name[:idx]

256 if '<' not in ret:

257 return ret

258 return '{global}'

259 return self.GroupBy(extract_namespace)

260	327

261 def GroupByPath(self, depth=1):	328 # Remove after the final :: (not part of the namespace).

	329 colon_idx = name.rfind('::')

	330 if colon_idx == -1:

	331 return fallback

	332 name = name[:colon_idx]

	333

	334 return _ExtractPrefixBeforeSeparator(name, '::', depth)

	335 return self.GroupBy(extract_namespace, min_count=min_count)

	336

	337 def GroupBySourcePath(self, depth=0, fallback='{no path}',

	338 fallback_to_object_path=True, min_count=0):

	339 """Groups by source_path.

	340

	341 Args:

	342 depth: When 0 (default), groups by entire path. When 1, groups by

	343 top-level directory, when 2, groups by top 2 directories, etc.

	344 fallback: Use this value when no namespace exists.

	345 fallback_to_object_path: When True (default), uses object_path when

	346 source_path is missing.

	347 min_count: Miniumum number of symbols for a group. If fewer than this many

	348 symbols end up in a group, they will not be put within a group.

	349 Use a negative value to omit symbols entirely rather than

	350 include them outside of a group.

	351 """

262 def extract_path(symbol):	352 def extract_path(symbol):

263 idx = -1	353 path = symbol.source_path

264 for _ in xrange(depth):	354 if fallback_to_object_path and not path:

265 idx = symbol.path.find('/', idx + 1)	355 path = symbol.object_path

266 if idx != -1:	356 path = path or fallback

267 return symbol.path[:idx]	357 return _ExtractPrefixBeforeSeparator(path, os.path.sep, depth)

268 return '{path unknown}'	358 return self.GroupBy(extract_path, min_count=min_count)

269 return self.GroupBy(extract_path)	359

	360 def GroupByObjectPath(self, depth=0, fallback='{no path}', min_count=0):

	361 """Groups by object_path.

	362

	363 Args:

	364 depth: When 0 (default), groups by entire path. When 1, groups by

	365 top-level directory, when 2, groups by top 2 directories, etc.

	366 fallback: Use this value when no namespace exists.

	367 min_count: Miniumum number of symbols for a group. If fewer than this many

	368 symbols end up in a group, they will not be put within a group.

	369 Use a negative value to omit symbols entirely rather than

	370 include them outside of a group.

	371 """

	372 def extract_path(symbol):

	373 path = symbol.object_path or fallback

	374 return _ExtractPrefixBeforeSeparator(path, os.path.sep, depth)

	375 return self.GroupBy(extract_path, min_count=min_count)

270	376

271	377

272 class SymbolDiff(SymbolGroup):	378 class SymbolDiff(SymbolGroup):

273 """A SymbolGroup subclass representing a diff of two other SymbolGroups.	379 """A SymbolGroup subclass representing a diff of two other SymbolGroups.

274	380

275 All Symbols contained within have a \|size\| which is actually the size delta.	381 All Symbols contained within have a \|size\| which is actually the size delta.

276 Additionally, metadata is kept about which symbols were added / removed /	382 Additionally, metadata is kept about which symbols were added / removed /

277 changed.	383 changed.

278 """	384 """

279 __slots__ = (	385 __slots__ = (

(...skipping 71 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
351	457

352 Returns:	458 Returns:

353 Returns a SizeInfo when args are of type SizeInfo.	459 Returns a SizeInfo when args are of type SizeInfo.

354 Returns a SymbolDiff when args are of type SymbolGroup.	460 Returns a SymbolDiff when args are of type SymbolGroup.

355 """	461 """

356 if isinstance(new, SizeInfo):	462 if isinstance(new, SizeInfo):

357 assert isinstance(old, SizeInfo)	463 assert isinstance(old, SizeInfo)

358 section_sizes = {	464 section_sizes = {

359 k:new.section_sizes[k] - v for k, v in old.section_sizes.iteritems()}	465 k:new.section_sizes[k] - v for k, v in old.section_sizes.iteritems()}

360 symbol_diff = Diff(new.symbols, old.symbols)	466 symbol_diff = Diff(new.symbols, old.symbols)

361 return SizeInfo(symbol_diff, section_sizes)	467 return SizeInfo(section_sizes, symbol_diff)

362	468

363 assert isinstance(new, SymbolGroup) and isinstance(old, SymbolGroup)	469 assert isinstance(new, SymbolGroup) and isinstance(old, SymbolGroup)

364 symbols_by_key = collections.defaultdict(list)	470 symbols_by_key = collections.defaultdict(list)

365 for s in old:	471 for s in old:

366 symbols_by_key[s._Key()].append(s)	472 symbols_by_key[s._Key()].append(s)

367	473

368 added = []	474 added = []

369 removed = []	475 removed = []

370 similar = []	476 similar = []

371 # For similar symbols, padding is zeroed out. In order to not lose the	477 # For similar symbols, padding is zeroed out. In order to not lose the

372 # information entirely, store it in aggregate.	478 # information entirely, store it in aggregate.

373 padding_by_section_name = collections.defaultdict(int)	479 padding_by_section_name = collections.defaultdict(int)

374 for new_sym in new:	480 for new_sym in new:

375 matching_syms = symbols_by_key.get(new_sym._Key())	481 matching_syms = symbols_by_key.get(new_sym._Key())

376 if matching_syms:	482 if matching_syms:

377 old_sym = matching_syms.pop(0)	483 old_sym = matching_syms.pop(0)

378 # More stable/useful to compare size without padding.	484 # More stable/useful to compare size without padding.

379 size_diff = (new_sym.size_without_padding -	485 size_diff = (new_sym.size_without_padding -

380 old_sym.size_without_padding)	486 old_sym.size_without_padding)

381 merged_sym = Symbol(old_sym.section_name, size_diff,	487 merged_sym = Symbol(new_sym.section_name, size_diff,

382 address=old_sym.address, name=old_sym.name,	488 address=new_sym.address, name=new_sym.name,

383 path=old_sym.path,	489 source_path=new_sym.source_path,

384 function_signature=old_sym.function_signature)	490 object_path=new_sym.object_path,

	491 full_name=new_sym.full_name,

	492 is_anonymous=new_sym.is_anonymous)

385 similar.append(merged_sym)	493 similar.append(merged_sym)

386 padding_by_section_name[new_sym.section_name] += (	494 padding_by_section_name[new_sym.section_name] += (

387 new_sym.padding - old_sym.padding)	495 new_sym.padding - old_sym.padding)

388 else:	496 else:

389 added.append(new_sym)	497 added.append(new_sym)

390	498

391 for remaining_syms in symbols_by_key.itervalues():	499 for remaining_syms in symbols_by_key.itervalues():

392 for old_sym in remaining_syms:	500 for old_sym in remaining_syms:

393 duped = copy.copy(old_sym)	501 duped = copy.copy(old_sym)

394 duped.size = -duped.size	502 duped.size = -duped.size

395 duped.padding = -duped.padding	503 duped.padding = -duped.padding

396 removed.append(duped)	504 removed.append(duped)

397	505

398 for section_name, padding in padding_by_section_name.iteritems():	506 for section_name, padding in padding_by_section_name.iteritems():

399 similar.append(Symbol(section_name, padding,	507 similar.append(Symbol(section_name, padding,

400 name='** aggregate padding of delta symbols'))	508 name='** aggregate padding of delta symbols'))

401 return SymbolDiff(added, removed, similar)	509 return SymbolDiff(added, removed, similar)

	510

	511

	512 def _ExtractPrefixBeforeSeparator(string, separator, count=1):

	513 idx = -len(separator)

	514 prev_idx = None

	515 for _ in xrange(count):

	516 idx = string.find(separator, idx + len(separator))

	517 if idx < 0:

	518 break

	519 prev_idx = idx

	520 return string[:prev_idx]

OLD	NEW

« no previous file with comments | « tools/binary_size/map2size.py ('k') | tools/binary_size/ninja_parser.py » ('j') | no next file with comments »