| OLD | NEW |
| 1 # Copyright 2017 The Chromium Authors. All rights reserved. | 1 # Copyright 2017 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 """Classes that comprise the data model for binary size analysis.""" | 4 """Classes that comprise the data model for binary size analysis.""" |
| 5 | 5 |
| 6 import collections | 6 import collections |
| 7 import copy | 7 import copy |
| 8 import os |
| 8 import re | 9 import re |
| 9 | 10 |
| 10 | 11 |
| 11 SECTION_TO_SECTION_NAME = { | 12 SECTION_TO_SECTION_NAME = { |
| 12 'b': '.bss', | 13 'b': '.bss', |
| 13 'd': '.data', | 14 'd': '.data', |
| 14 'r': '.rodata', | 15 'r': '.rodata', |
| 15 't': '.text', | 16 't': '.text', |
| 16 } | 17 } |
| 17 | 18 |
| (...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 64 # TODO(agrieve): Also match generated functions such as: | 65 # TODO(agrieve): Also match generated functions such as: |
| 65 # startup._GLOBAL__sub_I_page_allocator.cc | 66 # startup._GLOBAL__sub_I_page_allocator.cc |
| 66 return self.name.endswith(']') and not self.name.endswith('[]') | 67 return self.name.endswith(']') and not self.name.endswith('[]') |
| 67 | 68 |
| 68 def _Key(self): | 69 def _Key(self): |
| 69 """Returns a tuple that can be used to see if two Symbol are the same. | 70 """Returns a tuple that can be used to see if two Symbol are the same. |
| 70 | 71 |
| 71 Keys are not guaranteed to be unique within a SymbolGroup. For example, it | 72 Keys are not guaranteed to be unique within a SymbolGroup. For example, it |
| 72 is common to have multiple "** merge strings" symbols, which will have a | 73 is common to have multiple "** merge strings" symbols, which will have a |
| 73 common key.""" | 74 common key.""" |
| 74 return (self.section_name, self.function_signature or self.name) | 75 return (self.section_name, self.full_name or self.name) |
| 75 | 76 |
| 76 | 77 |
| 77 class Symbol(BaseSymbol): | 78 class Symbol(BaseSymbol): |
| 78 """Represents a single symbol within a binary.""" | 79 """Represents a single symbol within a binary.""" |
| 79 | 80 |
| 80 __slots__ = ( | 81 __slots__ = ( |
| 82 'address', |
| 83 'full_name', |
| 84 'is_anonymous', |
| 85 'object_path', |
| 86 'name', |
| 87 'flags', |
| 88 'padding', |
| 81 'section_name', | 89 'section_name', |
| 82 'address', | 90 'source_path', |
| 83 'size', | 91 'size', |
| 84 'padding', | |
| 85 'name', | |
| 86 'function_signature', | |
| 87 'path', | |
| 88 ) | 92 ) |
| 89 | 93 |
| 90 def __init__(self, section_name, size_without_padding, address=None, | 94 def __init__(self, section_name, size_without_padding, address=None, |
| 91 name=None, path=None, function_signature=None): | 95 name=None, source_path=None, object_path=None, |
| 96 full_name=None, is_anonymous=False): |
| 92 self.section_name = section_name | 97 self.section_name = section_name |
| 93 self.address = address or 0 | 98 self.address = address or 0 |
| 94 self.name = name or '' | 99 self.name = name or '' |
| 95 self.function_signature = function_signature or '' | 100 self.full_name = full_name or '' |
| 96 self.path = path or '' | 101 self.source_path = source_path or '' |
| 102 self.object_path = object_path or '' |
| 97 self.size = size_without_padding | 103 self.size = size_without_padding |
| 104 # Change this to be a bitfield of flags if ever there is a need to add |
| 105 # another similar thing. |
| 106 self.is_anonymous = is_anonymous |
| 98 self.padding = 0 | 107 self.padding = 0 |
| 99 | 108 |
| 100 def __repr__(self): | 109 def __repr__(self): |
| 101 return '%s@%x(size=%d,padding=%d,name=%s,path=%s)' % ( | 110 return '%s@%x(size=%d,padding=%d,name=%s,path=%s,anon=%d)' % ( |
| 102 self.section_name, self.address, self.size_without_padding, | 111 self.section_name, self.address, self.size_without_padding, |
| 103 self.padding, self.name, self.path) | 112 self.padding, self.name, self.source_path or self.object_path, |
| 113 int(self.is_anonymous)) |
| 104 | 114 |
| 105 | 115 |
| 106 class SymbolGroup(BaseSymbol): | 116 class SymbolGroup(BaseSymbol): |
| 107 """Represents a group of symbols using the same interface as Symbol. | 117 """Represents a group of symbols using the same interface as Symbol. |
| 108 | 118 |
| 109 SymbolGroups are immutable. All filtering / sorting will return new | 119 SymbolGroups are immutable. All filtering / sorting will return new |
| 110 SymbolGroups objects. | 120 SymbolGroups objects. |
| 111 """ | 121 """ |
| 112 | 122 |
| 113 __slots__ = ( | 123 __slots__ = ( |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 145 def __add__(self, other): | 155 def __add__(self, other): |
| 146 self_ids = set(id(s) for s in self) | 156 self_ids = set(id(s) for s in self) |
| 147 new_symbols = self.symbols + [s for s in other if id(s) not in self_ids] | 157 new_symbols = self.symbols + [s for s in other if id(s) not in self_ids] |
| 148 return self._CreateTransformed(new_symbols, section_name=self.section_name) | 158 return self._CreateTransformed(new_symbols, section_name=self.section_name) |
| 149 | 159 |
| 150 @property | 160 @property |
| 151 def address(self): | 161 def address(self): |
| 152 return 0 | 162 return 0 |
| 153 | 163 |
| 154 @property | 164 @property |
| 155 def function_signature(self): | 165 def full_name(self): |
| 156 return None | 166 return None |
| 157 | 167 |
| 158 @property | 168 @property |
| 159 def path(self): | 169 def is_anonymous(self): |
| 170 return False |
| 171 |
| 172 @property |
| 173 def source_path(self): |
| 160 return None | 174 return None |
| 161 | 175 |
| 162 @property | 176 @property |
| 163 def size(self): | 177 def size(self): |
| 164 if self.IsBss(): | 178 if self.IsBss(): |
| 165 return sum(s.size for s in self) | 179 return sum(s.size for s in self) |
| 166 return sum(s.size for s in self if not s.IsBss()) | 180 return sum(s.size for s in self if not s.IsBss()) |
| 167 | 181 |
| 168 @property | 182 @property |
| 169 def padding(self): | 183 def padding(self): |
| (...skipping 11 matching lines...) Expand all Loading... |
| 181 # Default to sorting by abs(size) then name. | 195 # Default to sorting by abs(size) then name. |
| 182 if cmp_func is None and key is None: | 196 if cmp_func is None and key is None: |
| 183 cmp_func = lambda a, b: cmp((a.IsBss(), abs(b.size), a.name), | 197 cmp_func = lambda a, b: cmp((a.IsBss(), abs(b.size), a.name), |
| 184 (b.IsBss(), abs(a.size), b.name)) | 198 (b.IsBss(), abs(a.size), b.name)) |
| 185 | 199 |
| 186 new_symbols = sorted(self.symbols, cmp_func, key, reverse) | 200 new_symbols = sorted(self.symbols, cmp_func, key, reverse) |
| 187 return self._CreateTransformed(new_symbols, | 201 return self._CreateTransformed(new_symbols, |
| 188 filtered_symbols=self.filtered_symbols, | 202 filtered_symbols=self.filtered_symbols, |
| 189 section_name=self.section_name) | 203 section_name=self.section_name) |
| 190 | 204 |
| 205 def SortedByName(self, reverse=False): |
| 206 return self.Sorted(key=(lambda s:s.name), reverse=reverse) |
| 207 |
| 208 def SortedByAddress(self, reverse=False): |
| 209 return self.Sorted(key=(lambda s:s.address), reverse=reverse) |
| 210 |
| 211 def SortedByCount(self, reverse=False): |
| 212 return self.Sorted(key=(lambda s:len(s) if s.IsGroup() else 1), |
| 213 reverse=not reverse) |
| 214 |
| 191 def Filter(self, func): | 215 def Filter(self, func): |
| 192 filtered_and_kept = ([], []) | 216 filtered_and_kept = ([], []) |
| 193 for symbol in self: | 217 for symbol in self: |
| 194 filtered_and_kept[int(bool(func(symbol)))].append(symbol) | 218 filtered_and_kept[int(bool(func(symbol)))].append(symbol) |
| 195 return self._CreateTransformed(filtered_and_kept[1], | 219 return self._CreateTransformed(filtered_and_kept[1], |
| 196 filtered_symbols=filtered_and_kept[0], | 220 filtered_symbols=filtered_and_kept[0], |
| 197 section_name=self.section_name) | 221 section_name=self.section_name) |
| 198 | 222 |
| 199 def WhereBiggerThan(self, min_size): | 223 def WhereBiggerThan(self, min_size): |
| 200 return self.Filter(lambda s: s.size >= min_size) | 224 return self.Filter(lambda s: s.size >= min_size) |
| 201 | 225 |
| 202 def WhereInSection(self, section): | 226 def WhereInSection(self, section): |
| 203 if len(section) == 1: | 227 if len(section) == 1: |
| 204 ret = self.Filter(lambda s: s.section == section) | 228 ret = self.Filter(lambda s: s.section == section) |
| 205 ret.section_name = SECTION_TO_SECTION_NAME[section] | 229 ret.section_name = SECTION_TO_SECTION_NAME[section] |
| 206 else: | 230 else: |
| 207 ret = self.Filter(lambda s: s.section_name == section) | 231 ret = self.Filter(lambda s: s.section_name == section) |
| 208 ret.section_name = section | 232 ret.section_name = section |
| 209 return ret | 233 return ret |
| 210 | 234 |
| 211 def WhereIsGenerated(self): | 235 def WhereIsGenerated(self): |
| 212 return self.Filter(lambda s: s.IsGenerated()) | 236 return self.Filter(lambda s: s.IsGenerated()) |
| 213 | 237 |
| 214 def WhereNameMatches(self, pattern): | 238 def WhereNameMatches(self, pattern): |
| 215 regex = re.compile(pattern) | 239 regex = re.compile(pattern) |
| 216 return self.Filter(lambda s: regex.search(s.name)) | 240 return self.Filter(lambda s: regex.search(s.name)) |
| 217 | 241 |
| 242 def WhereObjectPathMatches(self, pattern): |
| 243 regex = re.compile(pattern) |
| 244 return self.Filter(lambda s: regex.search(s.object_path)) |
| 245 |
| 246 def WhereSourcePathMatches(self, pattern): |
| 247 regex = re.compile(pattern) |
| 248 return self.Filter(lambda s: regex.search(s.source_path)) |
| 249 |
| 218 def WherePathMatches(self, pattern): | 250 def WherePathMatches(self, pattern): |
| 219 regex = re.compile(pattern) | 251 regex = re.compile(pattern) |
| 220 return self.Filter(lambda s: s.path and regex.search(s.path)) | 252 return self.Filter(lambda s: regex.search(s.source_path or s.object_path)) |
| 221 | 253 |
| 222 def WhereAddressInRange(self, start, end): | 254 def WhereAddressInRange(self, start, end): |
| 223 return self.Filter(lambda s: s.address >= start and s.address <= end) | 255 return self.Filter(lambda s: s.address >= start and s.address <= end) |
| 224 | 256 |
| 225 def WhereHasAnyAttribution(self): | 257 def WhereHasAnyAttribution(self): |
| 226 return self.Filter(lambda s: s.name or s.path) | 258 return self.Filter(lambda s: s.name or s.source_path or s.object_path) |
| 227 | 259 |
| 228 def Inverted(self): | 260 def Inverted(self): |
| 229 return self._CreateTransformed(self.filtered_symbols, | 261 return self._CreateTransformed(self.filtered_symbols, |
| 230 filtered_symbols=self.symbols) | 262 filtered_symbols=self.symbols) |
| 231 | 263 |
| 232 def GroupBy(self, func): | 264 def GroupBy(self, func, min_count=0): |
| 265 """Returns a SymbolGroup of SymbolGroups, indexed by |func|. |
| 266 |
| 267 Args: |
| 268 func: Grouping function. Passed a symbol and returns a string for the |
| 269 name of the subgroup to put the symbol in. If None is returned, the |
| 270 symbol is omitted. |
| 271 min_count: Miniumum number of symbols for a group. If fewer than this many |
| 272 symbols end up in a group, they will not be put within a group. |
| 273 Use a negative value to omit symbols entirely rather than |
| 274 include them outside of a group. |
| 275 """ |
| 233 new_syms = [] | 276 new_syms = [] |
| 234 filtered_symbols = [] | 277 filtered_symbols = [] |
| 235 symbols_by_token = collections.defaultdict(list) | 278 symbols_by_token = collections.defaultdict(list) |
| 279 # Index symbols by |func|. |
| 236 for symbol in self: | 280 for symbol in self: |
| 237 token = func(symbol) | 281 token = func(symbol) |
| 238 if not token: | 282 if token is None: |
| 239 filtered_symbols.append(symbol) | 283 filtered_symbols.append(symbol) |
| 240 continue | |
| 241 symbols_by_token[token].append(symbol) | 284 symbols_by_token[token].append(symbol) |
| 285 # Create the subgroups. |
| 286 include_singles = min_count >= 0 |
| 287 min_count = abs(min_count) |
| 242 for token, symbols in symbols_by_token.iteritems(): | 288 for token, symbols in symbols_by_token.iteritems(): |
| 243 new_syms.append(self._CreateTransformed(symbols, name=token, | 289 if len(symbols) >= min_count: |
| 244 section_name=self.section_name)) | 290 new_syms.append(self._CreateTransformed(symbols, name=token, |
| 291 section_name=self.section_name)) |
| 292 elif include_singles: |
| 293 new_syms.extend(symbols) |
| 294 else: |
| 295 filtered_symbols.extend(symbols) |
| 245 return self._CreateTransformed(new_syms, filtered_symbols=filtered_symbols, | 296 return self._CreateTransformed(new_syms, filtered_symbols=filtered_symbols, |
| 246 section_name=self.section_name) | 297 section_name=self.section_name) |
| 247 | 298 |
| 248 def GroupByNamespace(self, depth=1): | 299 def GroupBySectionName(self): |
| 300 return self.GroupBy(lambda s: s.section_name) |
| 301 |
| 302 def GroupByNamespace(self, depth=0, fallback='{global}', min_count=0): |
| 303 """Groups by symbol namespace (as denoted by ::s). |
| 304 |
| 305 Does not differentiate between C++ namespaces and C++ classes. |
| 306 |
| 307 Args: |
| 308 depth: When 0 (default), groups by entire namespace. When 1, groups by |
| 309 top-level name, when 2, groups by top 2 names, etc. |
| 310 fallback: Use this value when no namespace exists. |
| 311 min_count: Miniumum number of symbols for a group. If fewer than this many |
| 312 symbols end up in a group, they will not be put within a group. |
| 313 Use a negative value to omit symbols entirely rather than |
| 314 include them outside of a group. |
| 315 """ |
| 249 def extract_namespace(symbol): | 316 def extract_namespace(symbol): |
| 250 # Does not distinguish between classes and namespaces. | 317 # Remove template params. |
| 251 idx = -2 | 318 name = symbol.name |
| 252 for _ in xrange(depth): | 319 template_idx = name.find('<') |
| 253 idx = symbol.name.find('::', idx + 2) | 320 if template_idx: |
| 254 if idx != -1: | 321 name = name[:template_idx] |
| 255 ret = symbol.name[:idx] | |
| 256 if '<' not in ret: | |
| 257 return ret | |
| 258 return '{global}' | |
| 259 return self.GroupBy(extract_namespace) | |
| 260 | 322 |
| 261 def GroupByPath(self, depth=1): | 323 # Remove after the final :: (not part of the namespace). |
| 324 colon_idx = name.rfind('::') |
| 325 if colon_idx == -1: |
| 326 return fallback |
| 327 name = name[:colon_idx] |
| 328 |
| 329 return _ExtractPrefixBeforeSeparator(name, '::', depth) |
| 330 return self.GroupBy(extract_namespace, min_count=min_count) |
| 331 |
| 332 def GroupBySourcePath(self, depth=0, fallback='{no path}', |
| 333 fallback_to_object_path=True, min_count=0): |
| 334 """Groups by source_path. |
| 335 |
| 336 Args: |
| 337 depth: When 0 (default), groups by entire path. When 1, groups by |
| 338 top-level directory, when 2, groups by top 2 directories, etc. |
| 339 fallback: Use this value when no namespace exists. |
| 340 fallback_to_object_path: When True (default), uses object_path when |
| 341 source_path is missing. |
| 342 min_count: Miniumum number of symbols for a group. If fewer than this many |
| 343 symbols end up in a group, they will not be put within a group. |
| 344 Use a negative value to omit symbols entirely rather than |
| 345 include them outside of a group. |
| 346 """ |
| 262 def extract_path(symbol): | 347 def extract_path(symbol): |
| 263 idx = -1 | 348 path = symbol.source_path |
| 264 for _ in xrange(depth): | 349 if fallback_to_object_path and not path: |
| 265 idx = symbol.path.find('/', idx + 1) | 350 path = symbol.object_path |
| 266 if idx != -1: | 351 path = path or fallback |
| 267 return symbol.path[:idx] | 352 return _ExtractPrefixBeforeSeparator(path, os.path.sep, depth) |
| 268 return '{path unknown}' | 353 return self.GroupBy(extract_path, min_count=min_count) |
| 269 return self.GroupBy(extract_path) | 354 |
| 355 def GroupByObjectPath(self, depth=0, fallback='{no path}', min_count=0): |
| 356 """Groups by object_path. |
| 357 |
| 358 Args: |
| 359 depth: When 0 (default), groups by entire path. When 1, groups by |
| 360 top-level directory, when 2, groups by top 2 directories, etc. |
| 361 fallback: Use this value when no namespace exists. |
| 362 min_count: Miniumum number of symbols for a group. If fewer than this many |
| 363 symbols end up in a group, they will not be put within a group. |
| 364 Use a negative value to omit symbols entirely rather than |
| 365 include them outside of a group. |
| 366 """ |
| 367 def extract_path(symbol): |
| 368 path = symbol.object_path or fallback |
| 369 return _ExtractPrefixBeforeSeparator(path, os.path.sep, depth) |
| 370 return self.GroupBy(extract_path, min_count=min_count) |
| 270 | 371 |
| 271 | 372 |
| 272 class SymbolDiff(SymbolGroup): | 373 class SymbolDiff(SymbolGroup): |
| 273 """A SymbolGroup subclass representing a diff of two other SymbolGroups. | 374 """A SymbolGroup subclass representing a diff of two other SymbolGroups. |
| 274 | 375 |
| 275 All Symbols contained within have a |size| which is actually the size delta. | 376 All Symbols contained within have a |size| which is actually the size delta. |
| 276 Additionally, metadata is kept about which symbols were added / removed / | 377 Additionally, metadata is kept about which symbols were added / removed / |
| 277 changed. | 378 changed. |
| 278 """ | 379 """ |
| 279 __slots__ = ( | 380 __slots__ = ( |
| (...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 371 # For similar symbols, padding is zeroed out. In order to not lose the | 472 # For similar symbols, padding is zeroed out. In order to not lose the |
| 372 # information entirely, store it in aggregate. | 473 # information entirely, store it in aggregate. |
| 373 padding_by_section_name = collections.defaultdict(int) | 474 padding_by_section_name = collections.defaultdict(int) |
| 374 for new_sym in new: | 475 for new_sym in new: |
| 375 matching_syms = symbols_by_key.get(new_sym._Key()) | 476 matching_syms = symbols_by_key.get(new_sym._Key()) |
| 376 if matching_syms: | 477 if matching_syms: |
| 377 old_sym = matching_syms.pop(0) | 478 old_sym = matching_syms.pop(0) |
| 378 # More stable/useful to compare size without padding. | 479 # More stable/useful to compare size without padding. |
| 379 size_diff = (new_sym.size_without_padding - | 480 size_diff = (new_sym.size_without_padding - |
| 380 old_sym.size_without_padding) | 481 old_sym.size_without_padding) |
| 381 merged_sym = Symbol(old_sym.section_name, size_diff, | 482 merged_sym = Symbol(new_sym.section_name, size_diff, |
| 382 address=old_sym.address, name=old_sym.name, | 483 address=new_sym.address, name=new_sym.name, |
| 383 path=old_sym.path, | 484 source_path=new_sym.source_path, |
| 384 function_signature=old_sym.function_signature) | 485 object_path=new_sym.object_path, |
| 486 full_name=new_sym.full_name, |
| 487 is_anonymous=new_sym.is_anonymous) |
| 385 similar.append(merged_sym) | 488 similar.append(merged_sym) |
| 386 padding_by_section_name[new_sym.section_name] += ( | 489 padding_by_section_name[new_sym.section_name] += ( |
| 387 new_sym.padding - old_sym.padding) | 490 new_sym.padding - old_sym.padding) |
| 388 else: | 491 else: |
| 389 added.append(new_sym) | 492 added.append(new_sym) |
| 390 | 493 |
| 391 for remaining_syms in symbols_by_key.itervalues(): | 494 for remaining_syms in symbols_by_key.itervalues(): |
| 392 for old_sym in remaining_syms: | 495 for old_sym in remaining_syms: |
| 393 duped = copy.copy(old_sym) | 496 duped = copy.copy(old_sym) |
| 394 duped.size = -duped.size | 497 duped.size = -duped.size |
| 395 duped.padding = -duped.padding | 498 duped.padding = -duped.padding |
| 396 removed.append(duped) | 499 removed.append(duped) |
| 397 | 500 |
| 398 for section_name, padding in padding_by_section_name.iteritems(): | 501 for section_name, padding in padding_by_section_name.iteritems(): |
| 399 similar.append(Symbol(section_name, padding, | 502 similar.append(Symbol(section_name, padding, |
| 400 name='** aggregate padding of delta symbols')) | 503 name='** aggregate padding of delta symbols')) |
| 401 return SymbolDiff(added, removed, similar) | 504 return SymbolDiff(added, removed, similar) |
| 505 |
| 506 |
| 507 def _ExtractPrefixBeforeSeparator(string, separator, count=1): |
| 508 idx = -len(separator) |
| 509 prev_idx = None |
| 510 for _ in xrange(count): |
| 511 idx = string.find(separator, idx + len(separator)) |
| 512 if idx < 0: |
| 513 break |
| 514 prev_idx = idx |
| 515 return string[:prev_idx] |
| OLD | NEW |