Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2)

Side by Side Diff: tools/binary_size/models.py

Issue 2791433004: //tools/binary_size: source_path information, change file format, fixes (Closed)
Patch Set: fix comment for _DetectToolPrefix Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « tools/binary_size/map2size.py ('k') | tools/binary_size/ninja_parser.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2017 The Chromium Authors. All rights reserved. 1 # Copyright 2017 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 """Classes that comprise the data model for binary size analysis.""" 4 """Classes that comprise the data model for binary size analysis."""
5 5
6 import collections 6 import collections
7 import copy 7 import copy
8 import os
8 import re 9 import re
9 10
10 11
11 SECTION_TO_SECTION_NAME = { 12 SECTION_TO_SECTION_NAME = {
12 'b': '.bss', 13 'b': '.bss',
13 'd': '.data', 14 'd': '.data',
14 'r': '.rodata', 15 'r': '.rodata',
15 't': '.text', 16 't': '.text',
16 } 17 }
17 18
18 19
19 class SizeInfo(object): 20 class SizeInfo(object):
20 """Represents all size information for a single binary. 21 """Represents all size information for a single binary.
21 22
22 Fields: 23 Fields:
23 section_sizes: A dict of section_name -> size. 24 section_sizes: A dict of section_name -> size.
24 symbols: A SymbolGroup (or SymbolDiff) with all symbols in it. 25 symbols: A SymbolGroup (or SymbolDiff) with all symbols in it.
25 """ 26 """
26 __slots__ = ( 27 __slots__ = (
28 'section_sizes',
27 'symbols', 29 'symbols',
28 'section_sizes', 30 'tag',
31 'timestamp',
29 ) 32 )
30 33
31 """Root size information.""" 34 """Root size information."""
32 def __init__(self, symbols, section_sizes): 35 def __init__(self, section_sizes, symbols, timestamp=None, tag=''):
33 self.symbols = symbols
34 self.section_sizes = section_sizes # E.g. {'.text': 0} 36 self.section_sizes = section_sizes # E.g. {'.text': 0}
37 self.symbols = symbols # List of symbols sorted by address per-section.
38 self.timestamp = timestamp # UTC datetime object.
39 self.tag = tag # E.g. git revision.
40 assert not tag or '\n' not in tag # Simplifies file format.
35 41
36 42
37 class BaseSymbol(object): 43 class BaseSymbol(object):
38 """Base class for Symbol and SymbolGroup.""" 44 """Base class for Symbol and SymbolGroup."""
39 __slots__ = () 45 __slots__ = ()
40 46
41 @property 47 @property
42 def section(self): 48 def section(self):
43 """Returns the one-letter section. 49 """Returns the one-letter section.
44 50
(...skipping 19 matching lines...) Expand all
64 # TODO(agrieve): Also match generated functions such as: 70 # TODO(agrieve): Also match generated functions such as:
65 # startup._GLOBAL__sub_I_page_allocator.cc 71 # startup._GLOBAL__sub_I_page_allocator.cc
66 return self.name.endswith(']') and not self.name.endswith('[]') 72 return self.name.endswith(']') and not self.name.endswith('[]')
67 73
68 def _Key(self): 74 def _Key(self):
69 """Returns a tuple that can be used to see if two Symbol are the same. 75 """Returns a tuple that can be used to see if two Symbol are the same.
70 76
71 Keys are not guaranteed to be unique within a SymbolGroup. For example, it 77 Keys are not guaranteed to be unique within a SymbolGroup. For example, it
72 is common to have multiple "** merge strings" symbols, which will have a 78 is common to have multiple "** merge strings" symbols, which will have a
73 common key.""" 79 common key."""
74 return (self.section_name, self.function_signature or self.name) 80 return (self.section_name, self.full_name or self.name)
75 81
76 82
77 class Symbol(BaseSymbol): 83 class Symbol(BaseSymbol):
78 """Represents a single symbol within a binary.""" 84 """Represents a single symbol within a binary."""
79 85
80 __slots__ = ( 86 __slots__ = (
87 'address',
88 'full_name',
89 'is_anonymous',
90 'object_path',
91 'name',
92 'flags',
93 'padding',
81 'section_name', 94 'section_name',
82 'address', 95 'source_path',
83 'size', 96 'size',
84 'padding',
85 'name',
86 'function_signature',
87 'path',
88 ) 97 )
89 98
90 def __init__(self, section_name, size_without_padding, address=None, 99 def __init__(self, section_name, size_without_padding, address=None,
91 name=None, path=None, function_signature=None): 100 name=None, source_path=None, object_path=None,
101 full_name=None, is_anonymous=False):
92 self.section_name = section_name 102 self.section_name = section_name
93 self.address = address or 0 103 self.address = address or 0
94 self.name = name or '' 104 self.name = name or ''
95 self.function_signature = function_signature or '' 105 self.full_name = full_name or ''
96 self.path = path or '' 106 self.source_path = source_path or ''
107 self.object_path = object_path or ''
97 self.size = size_without_padding 108 self.size = size_without_padding
109 # Change this to be a bitfield of flags if ever there is a need to add
110 # another similar thing.
111 self.is_anonymous = is_anonymous
98 self.padding = 0 112 self.padding = 0
99 113
100 def __repr__(self): 114 def __repr__(self):
101 return '%s@%x(size=%d,padding=%d,name=%s,path=%s)' % ( 115 return '%s@%x(size=%d,padding=%d,name=%s,path=%s,anon=%d)' % (
102 self.section_name, self.address, self.size_without_padding, 116 self.section_name, self.address, self.size_without_padding,
103 self.padding, self.name, self.path) 117 self.padding, self.name, self.source_path or self.object_path,
118 int(self.is_anonymous))
104 119
105 120
106 class SymbolGroup(BaseSymbol): 121 class SymbolGroup(BaseSymbol):
107 """Represents a group of symbols using the same interface as Symbol. 122 """Represents a group of symbols using the same interface as Symbol.
108 123
109 SymbolGroups are immutable. All filtering / sorting will return new 124 SymbolGroups are immutable. All filtering / sorting will return new
110 SymbolGroups objects. 125 SymbolGroups objects.
111 """ 126 """
112 127
113 __slots__ = ( 128 __slots__ = (
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
145 def __add__(self, other): 160 def __add__(self, other):
146 self_ids = set(id(s) for s in self) 161 self_ids = set(id(s) for s in self)
147 new_symbols = self.symbols + [s for s in other if id(s) not in self_ids] 162 new_symbols = self.symbols + [s for s in other if id(s) not in self_ids]
148 return self._CreateTransformed(new_symbols, section_name=self.section_name) 163 return self._CreateTransformed(new_symbols, section_name=self.section_name)
149 164
150 @property 165 @property
151 def address(self): 166 def address(self):
152 return 0 167 return 0
153 168
154 @property 169 @property
155 def function_signature(self): 170 def full_name(self):
156 return None 171 return None
157 172
158 @property 173 @property
159 def path(self): 174 def is_anonymous(self):
175 return False
176
177 @property
178 def source_path(self):
160 return None 179 return None
161 180
162 @property 181 @property
163 def size(self): 182 def size(self):
164 if self.IsBss(): 183 if self.IsBss():
165 return sum(s.size for s in self) 184 return sum(s.size for s in self)
166 return sum(s.size for s in self if not s.IsBss()) 185 return sum(s.size for s in self if not s.IsBss())
167 186
168 @property 187 @property
169 def padding(self): 188 def padding(self):
(...skipping 11 matching lines...) Expand all
181 # Default to sorting by abs(size) then name. 200 # Default to sorting by abs(size) then name.
182 if cmp_func is None and key is None: 201 if cmp_func is None and key is None:
183 cmp_func = lambda a, b: cmp((a.IsBss(), abs(b.size), a.name), 202 cmp_func = lambda a, b: cmp((a.IsBss(), abs(b.size), a.name),
184 (b.IsBss(), abs(a.size), b.name)) 203 (b.IsBss(), abs(a.size), b.name))
185 204
186 new_symbols = sorted(self.symbols, cmp_func, key, reverse) 205 new_symbols = sorted(self.symbols, cmp_func, key, reverse)
187 return self._CreateTransformed(new_symbols, 206 return self._CreateTransformed(new_symbols,
188 filtered_symbols=self.filtered_symbols, 207 filtered_symbols=self.filtered_symbols,
189 section_name=self.section_name) 208 section_name=self.section_name)
190 209
210 def SortedByName(self, reverse=False):
211 return self.Sorted(key=(lambda s:s.name), reverse=reverse)
212
213 def SortedByAddress(self, reverse=False):
214 return self.Sorted(key=(lambda s:s.address), reverse=reverse)
215
216 def SortedByCount(self, reverse=False):
217 return self.Sorted(key=(lambda s:len(s) if s.IsGroup() else 1),
218 reverse=not reverse)
219
191 def Filter(self, func): 220 def Filter(self, func):
192 filtered_and_kept = ([], []) 221 filtered_and_kept = ([], [])
193 for symbol in self: 222 for symbol in self:
194 filtered_and_kept[int(bool(func(symbol)))].append(symbol) 223 filtered_and_kept[int(bool(func(symbol)))].append(symbol)
195 return self._CreateTransformed(filtered_and_kept[1], 224 return self._CreateTransformed(filtered_and_kept[1],
196 filtered_symbols=filtered_and_kept[0], 225 filtered_symbols=filtered_and_kept[0],
197 section_name=self.section_name) 226 section_name=self.section_name)
198 227
199 def WhereBiggerThan(self, min_size): 228 def WhereBiggerThan(self, min_size):
200 return self.Filter(lambda s: s.size >= min_size) 229 return self.Filter(lambda s: s.size >= min_size)
201 230
202 def WhereInSection(self, section): 231 def WhereInSection(self, section):
203 if len(section) == 1: 232 if len(section) == 1:
204 ret = self.Filter(lambda s: s.section == section) 233 ret = self.Filter(lambda s: s.section == section)
205 ret.section_name = SECTION_TO_SECTION_NAME[section] 234 ret.section_name = SECTION_TO_SECTION_NAME[section]
206 else: 235 else:
207 ret = self.Filter(lambda s: s.section_name == section) 236 ret = self.Filter(lambda s: s.section_name == section)
208 ret.section_name = section 237 ret.section_name = section
209 return ret 238 return ret
210 239
211 def WhereIsGenerated(self): 240 def WhereIsGenerated(self):
212 return self.Filter(lambda s: s.IsGenerated()) 241 return self.Filter(lambda s: s.IsGenerated())
213 242
214 def WhereNameMatches(self, pattern): 243 def WhereNameMatches(self, pattern):
215 regex = re.compile(pattern) 244 regex = re.compile(pattern)
216 return self.Filter(lambda s: regex.search(s.name)) 245 return self.Filter(lambda s: regex.search(s.name))
217 246
247 def WhereObjectPathMatches(self, pattern):
248 regex = re.compile(pattern)
249 return self.Filter(lambda s: regex.search(s.object_path))
250
251 def WhereSourcePathMatches(self, pattern):
252 regex = re.compile(pattern)
253 return self.Filter(lambda s: regex.search(s.source_path))
254
218 def WherePathMatches(self, pattern): 255 def WherePathMatches(self, pattern):
219 regex = re.compile(pattern) 256 regex = re.compile(pattern)
220 return self.Filter(lambda s: s.path and regex.search(s.path)) 257 return self.Filter(lambda s: regex.search(s.source_path or s.object_path))
221 258
222 def WhereAddressInRange(self, start, end): 259 def WhereAddressInRange(self, start, end):
223 return self.Filter(lambda s: s.address >= start and s.address <= end) 260 return self.Filter(lambda s: s.address >= start and s.address <= end)
224 261
225 def WhereHasAnyAttribution(self): 262 def WhereHasAnyAttribution(self):
226 return self.Filter(lambda s: s.name or s.path) 263 return self.Filter(lambda s: s.name or s.source_path or s.object_path)
227 264
228 def Inverted(self): 265 def Inverted(self):
229 return self._CreateTransformed(self.filtered_symbols, 266 return self._CreateTransformed(self.filtered_symbols,
230 filtered_symbols=self.symbols) 267 filtered_symbols=self.symbols)
231 268
232 def GroupBy(self, func): 269 def GroupBy(self, func, min_count=0):
270 """Returns a SymbolGroup of SymbolGroups, indexed by |func|.
271
272 Args:
273 func: Grouping function. Passed a symbol and returns a string for the
274 name of the subgroup to put the symbol in. If None is returned, the
275 symbol is omitted.
276 min_count: Miniumum number of symbols for a group. If fewer than this many
277 symbols end up in a group, they will not be put within a group.
278 Use a negative value to omit symbols entirely rather than
279 include them outside of a group.
280 """
233 new_syms = [] 281 new_syms = []
234 filtered_symbols = [] 282 filtered_symbols = []
235 symbols_by_token = collections.defaultdict(list) 283 symbols_by_token = collections.defaultdict(list)
284 # Index symbols by |func|.
236 for symbol in self: 285 for symbol in self:
237 token = func(symbol) 286 token = func(symbol)
238 if not token: 287 if token is None:
239 filtered_symbols.append(symbol) 288 filtered_symbols.append(symbol)
240 continue
241 symbols_by_token[token].append(symbol) 289 symbols_by_token[token].append(symbol)
290 # Create the subgroups.
291 include_singles = min_count >= 0
292 min_count = abs(min_count)
242 for token, symbols in symbols_by_token.iteritems(): 293 for token, symbols in symbols_by_token.iteritems():
243 new_syms.append(self._CreateTransformed(symbols, name=token, 294 if len(symbols) >= min_count:
244 section_name=self.section_name)) 295 new_syms.append(self._CreateTransformed(symbols, name=token,
296 section_name=self.section_name))
297 elif include_singles:
298 new_syms.extend(symbols)
299 else:
300 filtered_symbols.extend(symbols)
245 return self._CreateTransformed(new_syms, filtered_symbols=filtered_symbols, 301 return self._CreateTransformed(new_syms, filtered_symbols=filtered_symbols,
246 section_name=self.section_name) 302 section_name=self.section_name)
247 303
248 def GroupByNamespace(self, depth=1): 304 def GroupBySectionName(self):
305 return self.GroupBy(lambda s: s.section_name)
306
307 def GroupByNamespace(self, depth=0, fallback='{global}', min_count=0):
308 """Groups by symbol namespace (as denoted by ::s).
309
310 Does not differentiate between C++ namespaces and C++ classes.
311
312 Args:
313 depth: When 0 (default), groups by entire namespace. When 1, groups by
314 top-level name, when 2, groups by top 2 names, etc.
315 fallback: Use this value when no namespace exists.
316 min_count: Miniumum number of symbols for a group. If fewer than this many
317 symbols end up in a group, they will not be put within a group.
318 Use a negative value to omit symbols entirely rather than
319 include them outside of a group.
320 """
249 def extract_namespace(symbol): 321 def extract_namespace(symbol):
250 # Does not distinguish between classes and namespaces. 322 # Remove template params.
251 idx = -2 323 name = symbol.name
252 for _ in xrange(depth): 324 template_idx = name.find('<')
253 idx = symbol.name.find('::', idx + 2) 325 if template_idx:
254 if idx != -1: 326 name = name[:template_idx]
255 ret = symbol.name[:idx]
256 if '<' not in ret:
257 return ret
258 return '{global}'
259 return self.GroupBy(extract_namespace)
260 327
261 def GroupByPath(self, depth=1): 328 # Remove after the final :: (not part of the namespace).
329 colon_idx = name.rfind('::')
330 if colon_idx == -1:
331 return fallback
332 name = name[:colon_idx]
333
334 return _ExtractPrefixBeforeSeparator(name, '::', depth)
335 return self.GroupBy(extract_namespace, min_count=min_count)
336
337 def GroupBySourcePath(self, depth=0, fallback='{no path}',
338 fallback_to_object_path=True, min_count=0):
339 """Groups by source_path.
340
341 Args:
342 depth: When 0 (default), groups by entire path. When 1, groups by
343 top-level directory, when 2, groups by top 2 directories, etc.
344 fallback: Use this value when no namespace exists.
345 fallback_to_object_path: When True (default), uses object_path when
346 source_path is missing.
347 min_count: Miniumum number of symbols for a group. If fewer than this many
348 symbols end up in a group, they will not be put within a group.
349 Use a negative value to omit symbols entirely rather than
350 include them outside of a group.
351 """
262 def extract_path(symbol): 352 def extract_path(symbol):
263 idx = -1 353 path = symbol.source_path
264 for _ in xrange(depth): 354 if fallback_to_object_path and not path:
265 idx = symbol.path.find('/', idx + 1) 355 path = symbol.object_path
266 if idx != -1: 356 path = path or fallback
267 return symbol.path[:idx] 357 return _ExtractPrefixBeforeSeparator(path, os.path.sep, depth)
268 return '{path unknown}' 358 return self.GroupBy(extract_path, min_count=min_count)
269 return self.GroupBy(extract_path) 359
360 def GroupByObjectPath(self, depth=0, fallback='{no path}', min_count=0):
361 """Groups by object_path.
362
363 Args:
364 depth: When 0 (default), groups by entire path. When 1, groups by
365 top-level directory, when 2, groups by top 2 directories, etc.
366 fallback: Use this value when no namespace exists.
367 min_count: Miniumum number of symbols for a group. If fewer than this many
368 symbols end up in a group, they will not be put within a group.
369 Use a negative value to omit symbols entirely rather than
370 include them outside of a group.
371 """
372 def extract_path(symbol):
373 path = symbol.object_path or fallback
374 return _ExtractPrefixBeforeSeparator(path, os.path.sep, depth)
375 return self.GroupBy(extract_path, min_count=min_count)
270 376
271 377
272 class SymbolDiff(SymbolGroup): 378 class SymbolDiff(SymbolGroup):
273 """A SymbolGroup subclass representing a diff of two other SymbolGroups. 379 """A SymbolGroup subclass representing a diff of two other SymbolGroups.
274 380
275 All Symbols contained within have a |size| which is actually the size delta. 381 All Symbols contained within have a |size| which is actually the size delta.
276 Additionally, metadata is kept about which symbols were added / removed / 382 Additionally, metadata is kept about which symbols were added / removed /
277 changed. 383 changed.
278 """ 384 """
279 __slots__ = ( 385 __slots__ = (
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
351 457
352 Returns: 458 Returns:
353 Returns a SizeInfo when args are of type SizeInfo. 459 Returns a SizeInfo when args are of type SizeInfo.
354 Returns a SymbolDiff when args are of type SymbolGroup. 460 Returns a SymbolDiff when args are of type SymbolGroup.
355 """ 461 """
356 if isinstance(new, SizeInfo): 462 if isinstance(new, SizeInfo):
357 assert isinstance(old, SizeInfo) 463 assert isinstance(old, SizeInfo)
358 section_sizes = { 464 section_sizes = {
359 k:new.section_sizes[k] - v for k, v in old.section_sizes.iteritems()} 465 k:new.section_sizes[k] - v for k, v in old.section_sizes.iteritems()}
360 symbol_diff = Diff(new.symbols, old.symbols) 466 symbol_diff = Diff(new.symbols, old.symbols)
361 return SizeInfo(symbol_diff, section_sizes) 467 return SizeInfo(section_sizes, symbol_diff)
362 468
363 assert isinstance(new, SymbolGroup) and isinstance(old, SymbolGroup) 469 assert isinstance(new, SymbolGroup) and isinstance(old, SymbolGroup)
364 symbols_by_key = collections.defaultdict(list) 470 symbols_by_key = collections.defaultdict(list)
365 for s in old: 471 for s in old:
366 symbols_by_key[s._Key()].append(s) 472 symbols_by_key[s._Key()].append(s)
367 473
368 added = [] 474 added = []
369 removed = [] 475 removed = []
370 similar = [] 476 similar = []
371 # For similar symbols, padding is zeroed out. In order to not lose the 477 # For similar symbols, padding is zeroed out. In order to not lose the
372 # information entirely, store it in aggregate. 478 # information entirely, store it in aggregate.
373 padding_by_section_name = collections.defaultdict(int) 479 padding_by_section_name = collections.defaultdict(int)
374 for new_sym in new: 480 for new_sym in new:
375 matching_syms = symbols_by_key.get(new_sym._Key()) 481 matching_syms = symbols_by_key.get(new_sym._Key())
376 if matching_syms: 482 if matching_syms:
377 old_sym = matching_syms.pop(0) 483 old_sym = matching_syms.pop(0)
378 # More stable/useful to compare size without padding. 484 # More stable/useful to compare size without padding.
379 size_diff = (new_sym.size_without_padding - 485 size_diff = (new_sym.size_without_padding -
380 old_sym.size_without_padding) 486 old_sym.size_without_padding)
381 merged_sym = Symbol(old_sym.section_name, size_diff, 487 merged_sym = Symbol(new_sym.section_name, size_diff,
382 address=old_sym.address, name=old_sym.name, 488 address=new_sym.address, name=new_sym.name,
383 path=old_sym.path, 489 source_path=new_sym.source_path,
384 function_signature=old_sym.function_signature) 490 object_path=new_sym.object_path,
491 full_name=new_sym.full_name,
492 is_anonymous=new_sym.is_anonymous)
385 similar.append(merged_sym) 493 similar.append(merged_sym)
386 padding_by_section_name[new_sym.section_name] += ( 494 padding_by_section_name[new_sym.section_name] += (
387 new_sym.padding - old_sym.padding) 495 new_sym.padding - old_sym.padding)
388 else: 496 else:
389 added.append(new_sym) 497 added.append(new_sym)
390 498
391 for remaining_syms in symbols_by_key.itervalues(): 499 for remaining_syms in symbols_by_key.itervalues():
392 for old_sym in remaining_syms: 500 for old_sym in remaining_syms:
393 duped = copy.copy(old_sym) 501 duped = copy.copy(old_sym)
394 duped.size = -duped.size 502 duped.size = -duped.size
395 duped.padding = -duped.padding 503 duped.padding = -duped.padding
396 removed.append(duped) 504 removed.append(duped)
397 505
398 for section_name, padding in padding_by_section_name.iteritems(): 506 for section_name, padding in padding_by_section_name.iteritems():
399 similar.append(Symbol(section_name, padding, 507 similar.append(Symbol(section_name, padding,
400 name='** aggregate padding of delta symbols')) 508 name='** aggregate padding of delta symbols'))
401 return SymbolDiff(added, removed, similar) 509 return SymbolDiff(added, removed, similar)
510
511
512 def _ExtractPrefixBeforeSeparator(string, separator, count=1):
513 idx = -len(separator)
514 prev_idx = None
515 for _ in xrange(count):
516 idx = string.find(separator, idx + len(separator))
517 if idx < 0:
518 break
519 prev_idx = idx
520 return string[:prev_idx]
OLDNEW
« no previous file with comments | « tools/binary_size/map2size.py ('k') | tools/binary_size/ninja_parser.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698