| OLD | NEW |
| (Empty) |
| 1 #!/usr/bin/env python | |
| 2 # Copyright 2017 The Chromium Authors. All rights reserved. | |
| 3 # Use of this source code is governed by a BSD-style license that can be | |
| 4 # found in the LICENSE file. | |
| 5 | |
| 6 """Main Python API for analyzing binary size.""" | |
| 7 | |
| 8 import argparse | |
| 9 import calendar | |
| 10 import collections | |
| 11 import datetime | |
| 12 import gzip | |
| 13 import logging | |
| 14 import os | |
| 15 import re | |
| 16 import subprocess | |
| 17 import sys | |
| 18 | |
| 19 import describe | |
| 20 import file_format | |
| 21 import function_signature | |
| 22 import helpers | |
| 23 import linker_map_parser | |
| 24 import models | |
| 25 import ninja_parser | |
| 26 import paths | |
| 27 | |
| 28 | |
| 29 def _OpenMaybeGz(path, mode=None): | |
| 30 """Calls `gzip.open()` if |path| ends in ".gz", otherwise calls `open()`.""" | |
| 31 if path.endswith('.gz'): | |
| 32 if mode and 'w' in mode: | |
| 33 return gzip.GzipFile(path, mode, 1) | |
| 34 return gzip.open(path, mode) | |
| 35 return open(path, mode or 'r') | |
| 36 | |
| 37 | |
| 38 def _UnmangleRemainingSymbols(symbols, tool_prefix): | |
| 39 """Uses c++filt to unmangle any symbols that need it.""" | |
| 40 to_process = [s for s in symbols if s.name.startswith('_Z')] | |
| 41 if not to_process: | |
| 42 return | |
| 43 | |
| 44 logging.info('Unmangling %d names', len(to_process)) | |
| 45 proc = subprocess.Popen([tool_prefix + 'c++filt'], stdin=subprocess.PIPE, | |
| 46 stdout=subprocess.PIPE) | |
| 47 stdout = proc.communicate('\n'.join(s.name for s in to_process))[0] | |
| 48 assert proc.returncode == 0 | |
| 49 | |
| 50 for i, line in enumerate(stdout.splitlines()): | |
| 51 to_process[i].name = line | |
| 52 | |
| 53 | |
| 54 def _NormalizeNames(symbols): | |
| 55 """Ensures that all names are formatted in a useful way. | |
| 56 | |
| 57 This includes: | |
| 58 - Assigning of |full_name|. | |
| 59 - Stripping of return types in |full_name| and |name| (for functions). | |
| 60 - Stripping parameters from |name|. | |
| 61 - Moving "vtable for" and the like to be suffixes rather than prefixes. | |
| 62 """ | |
| 63 found_prefixes = set() | |
| 64 for symbol in symbols: | |
| 65 if symbol.name.startswith('*'): | |
| 66 # See comment in _CalculatePadding() about when this | |
| 67 # can happen. | |
| 68 continue | |
| 69 | |
| 70 # E.g.: vtable for FOO | |
| 71 idx = symbol.name.find(' for ', 0, 30) | |
| 72 if idx != -1: | |
| 73 found_prefixes.add(symbol.name[:idx + 4]) | |
| 74 symbol.name = symbol.name[idx + 5:] + ' [' + symbol.name[:idx] + ']' | |
| 75 | |
| 76 # E.g.: virtual thunk to FOO | |
| 77 idx = symbol.name.find(' to ', 0, 30) | |
| 78 if idx != -1: | |
| 79 found_prefixes.add(symbol.name[:idx + 3]) | |
| 80 symbol.name = symbol.name[idx + 4:] + ' [' + symbol.name[:idx] + ']' | |
| 81 | |
| 82 # Strip out return type, and identify where parameter list starts. | |
| 83 if symbol.section == 't': | |
| 84 symbol.full_name, symbol.name = function_signature.Parse(symbol.name) | |
| 85 | |
| 86 # Remove anonymous namespaces (they just harm clustering). | |
| 87 non_anonymous = symbol.name.replace('(anonymous namespace)::', '') | |
| 88 if symbol.name != non_anonymous: | |
| 89 symbol.is_anonymous = True | |
| 90 symbol.name = non_anonymous | |
| 91 symbol.full_name = symbol.full_name.replace( | |
| 92 '(anonymous namespace)::', '') | |
| 93 | |
| 94 if symbol.section != 't' and '(' in symbol.name: | |
| 95 # Pretty rare. Example: | |
| 96 # blink::CSSValueKeywordsHash::findValueImpl(char const*)::value_word_list | |
| 97 symbol.full_name = symbol.name | |
| 98 symbol.name = re.sub(r'\(.*\)', '', symbol.full_name) | |
| 99 | |
| 100 # Don't bother storing both if they are the same. | |
| 101 if symbol.full_name == symbol.name: | |
| 102 symbol.full_name = '' | |
| 103 | |
| 104 logging.debug('Found name prefixes of: %r', found_prefixes) | |
| 105 | |
| 106 | |
| 107 def _NormalizeObjectPaths(symbols): | |
| 108 """Ensures that all paths are formatted in a useful way.""" | |
| 109 for symbol in symbols: | |
| 110 path = symbol.object_path | |
| 111 if path.startswith('obj/'): | |
| 112 # Convert obj/third_party/... -> third_party/... | |
| 113 path = path[4:] | |
| 114 elif path.startswith('../../'): | |
| 115 # Convert ../../third_party/... -> third_party/... | |
| 116 path = path[6:] | |
| 117 if path.endswith(')'): | |
| 118 # Convert foo/bar.a(baz.o) -> foo/bar.a/baz.o | |
| 119 start_idx = path.index('(') | |
| 120 path = os.path.join(path[:start_idx], path[start_idx + 1:-1]) | |
| 121 symbol.object_path = path | |
| 122 | |
| 123 | |
| 124 def _NormalizeSourcePath(path): | |
| 125 if path.startswith('gen/'): | |
| 126 # Convert gen/third_party/... -> third_party/... | |
| 127 return path[4:] | |
| 128 if path.startswith('../../'): | |
| 129 # Convert ../../third_party/... -> third_party/... | |
| 130 return path[6:] | |
| 131 return path | |
| 132 | |
| 133 | |
| 134 def _ExtractSourcePaths(symbols, output_directory): | |
| 135 """Fills in the .source_path attribute of all symbols. | |
| 136 | |
| 137 Returns True if source paths were found. | |
| 138 """ | |
| 139 all_found = True | |
| 140 mapper = ninja_parser.SourceFileMapper(output_directory) | |
| 141 | |
| 142 for symbol in symbols: | |
| 143 object_path = symbol.object_path | |
| 144 if symbol.source_path or not object_path: | |
| 145 continue | |
| 146 # We don't have source info for prebuilt .a files. | |
| 147 if not object_path.startswith('..'): | |
| 148 source_path = mapper.FindSourceForPath(object_path) | |
| 149 if source_path: | |
| 150 symbol.source_path = _NormalizeSourcePath(source_path) | |
| 151 else: | |
| 152 all_found = False | |
| 153 logging.warning('Could not find source path for %s', object_path) | |
| 154 logging.debug('Parsed %d .ninja files.', mapper.GetParsedFileCount()) | |
| 155 return all_found | |
| 156 | |
| 157 | |
| 158 def _CalculatePadding(symbols): | |
| 159 """Populates the |padding| field based on symbol addresses. | |
| 160 | |
| 161 Symbols must already be sorted by |address|. | |
| 162 """ | |
| 163 seen_sections = [] | |
| 164 for i, symbol in enumerate(symbols[1:]): | |
| 165 prev_symbol = symbols[i] | |
| 166 if prev_symbol.section_name != symbol.section_name: | |
| 167 assert symbol.section_name not in seen_sections, ( | |
| 168 'Input symbols must be sorted by section, then address.') | |
| 169 seen_sections.append(symbol.section_name) | |
| 170 continue | |
| 171 if symbol.address <= 0 or prev_symbol.address <= 0: | |
| 172 continue | |
| 173 # Padding-only symbols happen for ** symbol gaps. | |
| 174 prev_is_padding_only = prev_symbol.size_without_padding == 0 | |
| 175 if symbol.address == prev_symbol.address and not prev_is_padding_only: | |
| 176 assert False, 'Found duplicate symbols:\n%r\n%r' % (prev_symbol, symbol) | |
| 177 # Even with symbols at the same address removed, overlaps can still | |
| 178 # happen. In this case, padding will be negative (and this is fine). | |
| 179 padding = symbol.address - prev_symbol.end_address | |
| 180 # These thresholds were found by manually auditing arm32 Chrome. | |
| 181 # E.g.: Set them to 0 and see what warnings get logged. | |
| 182 # TODO(agrieve): See if these thresholds make sense for architectures | |
| 183 # other than arm32. | |
| 184 if not symbol.name.startswith('*') and ( | |
| 185 symbol.section in 'rd' and padding >= 256 or | |
| 186 symbol.section in 't' and padding >= 64): | |
| 187 # For nm data, this is caused by data that has no associated symbol. | |
| 188 # The linker map file lists them with no name, but with a file. | |
| 189 # Example: | |
| 190 # .data 0x02d42764 0x120 .../V8SharedWorkerGlobalScope.o | |
| 191 # Where as most look like: | |
| 192 # .data.MANGLED_NAME... | |
| 193 logging.debug('Large padding of %d between:\n A) %r\n B) %r' % ( | |
| 194 padding, prev_symbol, symbol)) | |
| 195 continue | |
| 196 symbol.padding = padding | |
| 197 symbol.size += padding | |
| 198 assert symbol.size >= 0, ( | |
| 199 'Symbol has negative size (likely not sorted propertly): ' | |
| 200 '%r\nprev symbol: %r' % (symbol, prev_symbol)) | |
| 201 | |
| 202 | |
| 203 def _ClusterSymbols(symbols): | |
| 204 """Returns a new list of symbols with some symbols moved into groups. | |
| 205 | |
| 206 Groups include: | |
| 207 * Symbols that have [clone] in their name (created by compiler optimization). | |
| 208 * Star symbols (such as "** merge strings", and "** symbol gap") | |
| 209 """ | |
| 210 # http://unix.stackexchange.com/questions/223013/function-symbol-gets-part-suf
fix-after-compilation | |
| 211 # Example name suffixes: | |
| 212 # [clone .part.322] | |
| 213 # [clone .isra.322] | |
| 214 # [clone .constprop.1064] | |
| 215 | |
| 216 # Step 1: Create name map, find clones, collect star syms into replacements. | |
| 217 logging.debug('Creating name -> symbol map') | |
| 218 clone_indices = [] | |
| 219 indices_by_full_name = {} | |
| 220 # (name, full_name) -> [(index, sym),...] | |
| 221 replacements_by_name = collections.defaultdict(list) | |
| 222 for i, symbol in enumerate(symbols): | |
| 223 if symbol.name.startswith('**'): | |
| 224 # "symbol gap 3" -> "symbol gaps" | |
| 225 name = re.sub(r'\s+\d+$', 's', symbol.name) | |
| 226 replacements_by_name[(name, None)].append((i, symbol)) | |
| 227 elif symbol.full_name: | |
| 228 if symbol.full_name.endswith(']') and ' [clone ' in symbol.full_name: | |
| 229 clone_indices.append(i) | |
| 230 else: | |
| 231 indices_by_full_name[symbol.full_name] = i | |
| 232 | |
| 233 # Step 2: Collect same-named clone symbols. | |
| 234 logging.debug('Grouping all clones') | |
| 235 group_names_by_index = {} | |
| 236 for i in clone_indices: | |
| 237 symbol = symbols[i] | |
| 238 # Multiple attributes could exist, so search from left-to-right. | |
| 239 stripped_name = symbol.name[:symbol.name.index(' [clone ')] | |
| 240 stripped_full_name = symbol.full_name[:symbol.full_name.index(' [clone ')] | |
| 241 name_tup = (stripped_name, stripped_full_name) | |
| 242 replacement_list = replacements_by_name[name_tup] | |
| 243 | |
| 244 if not replacement_list: | |
| 245 # First occurance, check for non-clone symbol. | |
| 246 non_clone_idx = indices_by_full_name.get(stripped_name) | |
| 247 if non_clone_idx is not None: | |
| 248 non_clone_symbol = symbols[non_clone_idx] | |
| 249 replacement_list.append((non_clone_idx, non_clone_symbol)) | |
| 250 group_names_by_index[non_clone_idx] = stripped_name | |
| 251 | |
| 252 replacement_list.append((i, symbol)) | |
| 253 group_names_by_index[i] = stripped_name | |
| 254 | |
| 255 # Step 3: Undo clustering when length=1. | |
| 256 # Removing these groups means Diff() logic must know about [clone] suffix. | |
| 257 to_clear = [] | |
| 258 for name_tup, replacement_list in replacements_by_name.iteritems(): | |
| 259 if len(replacement_list) == 1: | |
| 260 to_clear.append(name_tup) | |
| 261 for name_tup in to_clear: | |
| 262 del replacements_by_name[name_tup] | |
| 263 | |
| 264 # Step 4: Replace first symbol from each cluster with a SymbolGroup. | |
| 265 before_symbol_count = sum(len(x) for x in replacements_by_name.itervalues()) | |
| 266 logging.debug('Creating %d symbol groups from %d symbols. %d clones had only ' | |
| 267 'one symbol.', len(replacements_by_name), before_symbol_count, | |
| 268 len(to_clear)) | |
| 269 | |
| 270 len_delta = len(replacements_by_name) - before_symbol_count | |
| 271 grouped_symbols = [None] * (len(symbols) + len_delta) | |
| 272 dest_index = 0 | |
| 273 src_index = 0 | |
| 274 seen_names = set() | |
| 275 replacement_names_by_index = {} | |
| 276 for name_tup, replacement_list in replacements_by_name.iteritems(): | |
| 277 for tup in replacement_list: | |
| 278 replacement_names_by_index[tup[0]] = name_tup | |
| 279 | |
| 280 sorted_items = replacement_names_by_index.items() | |
| 281 sorted_items.sort(key=lambda tup: tup[0]) | |
| 282 for index, name_tup in sorted_items: | |
| 283 count = index - src_index | |
| 284 grouped_symbols[dest_index:dest_index + count] = ( | |
| 285 symbols[src_index:src_index + count]) | |
| 286 src_index = index + 1 | |
| 287 dest_index += count | |
| 288 if name_tup not in seen_names: | |
| 289 seen_names.add(name_tup) | |
| 290 group_symbols = [tup[1] for tup in replacements_by_name[name_tup]] | |
| 291 grouped_symbols[dest_index] = models.SymbolGroup( | |
| 292 group_symbols, name=name_tup[0], full_name=name_tup[1], | |
| 293 section_name=group_symbols[0].section_name) | |
| 294 dest_index += 1 | |
| 295 | |
| 296 assert len(grouped_symbols[dest_index:None]) == len(symbols[src_index:None]) | |
| 297 grouped_symbols[dest_index:None] = symbols[src_index:None] | |
| 298 logging.debug('Finished making groups.') | |
| 299 return grouped_symbols | |
| 300 | |
| 301 | |
| 302 def LoadAndPostProcessSizeInfo(path): | |
| 303 """Returns a SizeInfo for the given |path|.""" | |
| 304 logging.debug('Loading results from: %s', path) | |
| 305 size_info = file_format.LoadSizeInfo(path) | |
| 306 _PostProcessSizeInfo(size_info) | |
| 307 return size_info | |
| 308 | |
| 309 | |
| 310 def _PostProcessSizeInfo(size_info): | |
| 311 logging.info('Normalizing symbol names') | |
| 312 _NormalizeNames(size_info.raw_symbols) | |
| 313 logging.info('Calculating padding') | |
| 314 _CalculatePadding(size_info.raw_symbols) | |
| 315 logging.info('Grouping decomposed functions') | |
| 316 size_info.symbols = models.SymbolGroup( | |
| 317 _ClusterSymbols(size_info.raw_symbols)) | |
| 318 logging.info('Processed %d symbols', len(size_info.raw_symbols)) | |
| 319 | |
| 320 | |
| 321 def CreateSizeInfo(map_path, lazy_paths=None, no_source_paths=False, | |
| 322 raw_only=False): | |
| 323 """Creates a SizeInfo from the given map file.""" | |
| 324 if not no_source_paths: | |
| 325 # output_directory needed for source file information. | |
| 326 lazy_paths.VerifyOutputDirectory() | |
| 327 # tool_prefix needed for c++filt. | |
| 328 lazy_paths.VerifyToolPrefix() | |
| 329 | |
| 330 with _OpenMaybeGz(map_path) as map_file: | |
| 331 section_sizes, raw_symbols = ( | |
| 332 linker_map_parser.MapFileParser().Parse(map_file)) | |
| 333 | |
| 334 if not no_source_paths: | |
| 335 logging.info('Extracting source paths from .ninja files') | |
| 336 all_found = _ExtractSourcePaths(raw_symbols, lazy_paths.output_directory) | |
| 337 assert all_found, ( | |
| 338 'One or more source file paths could not be found. Likely caused by ' | |
| 339 '.ninja files being generated at a different time than the .map file.') | |
| 340 # Map file for some reason doesn't unmangle all names. | |
| 341 # Unmangle prints its own log statement. | |
| 342 _UnmangleRemainingSymbols(raw_symbols, lazy_paths.tool_prefix) | |
| 343 logging.info('Normalizing object paths') | |
| 344 _NormalizeObjectPaths(raw_symbols) | |
| 345 size_info = models.SizeInfo(section_sizes, raw_symbols) | |
| 346 | |
| 347 # Name normalization not strictly required, but makes for smaller files. | |
| 348 if raw_only: | |
| 349 logging.info('Normalizing symbol names') | |
| 350 _NormalizeNames(size_info.raw_symbols) | |
| 351 else: | |
| 352 _PostProcessSizeInfo(size_info) | |
| 353 | |
| 354 if logging.getLogger().isEnabledFor(logging.DEBUG): | |
| 355 for line in describe.DescribeSizeInfoCoverage(size_info): | |
| 356 logging.info(line) | |
| 357 logging.info('Recorded info for %d symbols', len(size_info.raw_symbols)) | |
| 358 return size_info | |
| 359 | |
| 360 | |
| 361 def _DetectGitRevision(directory): | |
| 362 try: | |
| 363 git_rev = subprocess.check_output( | |
| 364 ['git', '-C', directory, 'rev-parse', 'HEAD']) | |
| 365 return git_rev.rstrip() | |
| 366 except Exception: | |
| 367 logging.warning('Failed to detect git revision for file metadata.') | |
| 368 return None | |
| 369 | |
| 370 | |
| 371 def BuildIdFromElf(elf_path, tool_prefix): | |
| 372 args = [tool_prefix + 'readelf', '-n', elf_path] | |
| 373 stdout = subprocess.check_output(args) | |
| 374 match = re.search(r'Build ID: (\w+)', stdout) | |
| 375 assert match, 'Build ID not found from running: ' + ' '.join(args) | |
| 376 return match.group(1) | |
| 377 | |
| 378 | |
| 379 def _SectionSizesFromElf(elf_path, tool_prefix): | |
| 380 args = [tool_prefix + 'readelf', '-S', '--wide', elf_path] | |
| 381 stdout = subprocess.check_output(args) | |
| 382 section_sizes = {} | |
| 383 # Matches [ 2] .hash HASH 00000000006681f0 0001f0 003154 04 A 3 0 8 | |
| 384 for match in re.finditer(r'\[[\s\d]+\] (\..*)$', stdout, re.MULTILINE): | |
| 385 items = match.group(1).split() | |
| 386 section_sizes[items[0]] = int(items[4], 16) | |
| 387 return section_sizes | |
| 388 | |
| 389 | |
| 390 def _ParseGnArgs(args_path): | |
| 391 """Returns a list of normalized "key=value" strings.""" | |
| 392 args = {} | |
| 393 with open(args_path) as f: | |
| 394 for l in f: | |
| 395 # Strips #s even if within string literal. Not a problem in practice. | |
| 396 parts = l.split('#')[0].split('=') | |
| 397 if len(parts) != 2: | |
| 398 continue | |
| 399 args[parts[0].strip()] = parts[1].strip() | |
| 400 return ["%s=%s" % x for x in sorted(args.iteritems())] | |
| 401 | |
| 402 | |
| 403 def main(argv): | |
| 404 parser = argparse.ArgumentParser(argv) | |
| 405 parser.add_argument('--elf-file', required=True, | |
| 406 help='Path to input ELF file. Currently used for ' | |
| 407 'capturing metadata. Pass "" to skip metadata ' | |
| 408 'collection.') | |
| 409 parser.add_argument('--map-file', | |
| 410 help='Path to input .map(.gz) file. Defaults to ' | |
| 411 '{{elf_file}}.map(.gz)?') | |
| 412 parser.add_argument('--output-file', required=True, | |
| 413 help='Path to output .size file.') | |
| 414 parser.add_argument('--no-source-paths', action='store_true', | |
| 415 help='Do not use .ninja files to map ' | |
| 416 'object_path -> source_path') | |
| 417 paths.AddOptions(parser) | |
| 418 args = helpers.AddCommonOptionsAndParseArgs(parser, argv) | |
| 419 if not args.output_file.endswith('.size'): | |
| 420 parser.error('output_file must end with .size') | |
| 421 | |
| 422 if args.map_file: | |
| 423 if (not args.map_file.endswith('.map') | |
| 424 and not args.map_file.endswith('.map.gz')): | |
| 425 parser.error('Expected --map-file to end with .map or .map.gz') | |
| 426 map_file_path = args.map_file | |
| 427 else: | |
| 428 map_file_path = args.elf_file + '.map' | |
| 429 if not os.path.exists(map_file_path): | |
| 430 map_file_path += '.gz' | |
| 431 if not os.path.exists(map_file_path): | |
| 432 parser.error('Could not find .map(.gz)? file. Use --map-file.') | |
| 433 | |
| 434 lazy_paths = paths.LazyPaths(args=args, input_file=args.elf_file) | |
| 435 metadata = None | |
| 436 if args.elf_file: | |
| 437 logging.debug('Constructing metadata') | |
| 438 git_rev = _DetectGitRevision(os.path.dirname(args.elf_file)) | |
| 439 build_id = BuildIdFromElf(args.elf_file, lazy_paths.tool_prefix) | |
| 440 timestamp_obj = datetime.datetime.utcfromtimestamp(os.path.getmtime( | |
| 441 args.elf_file)) | |
| 442 timestamp = calendar.timegm(timestamp_obj.timetuple()) | |
| 443 gn_args = _ParseGnArgs(os.path.join(lazy_paths.output_directory, 'args.gn')) | |
| 444 | |
| 445 def relative_to_out(path): | |
| 446 return os.path.relpath(path, lazy_paths.VerifyOutputDirectory()) | |
| 447 | |
| 448 metadata = { | |
| 449 models.METADATA_GIT_REVISION: git_rev, | |
| 450 models.METADATA_MAP_FILENAME: relative_to_out(map_file_path), | |
| 451 models.METADATA_ELF_FILENAME: relative_to_out(args.elf_file), | |
| 452 models.METADATA_ELF_MTIME: timestamp, | |
| 453 models.METADATA_ELF_BUILD_ID: build_id, | |
| 454 models.METADATA_GN_ARGS: gn_args, | |
| 455 } | |
| 456 | |
| 457 size_info = CreateSizeInfo(map_file_path, lazy_paths, | |
| 458 no_source_paths=args.no_source_paths, | |
| 459 raw_only=True) | |
| 460 | |
| 461 if metadata: | |
| 462 size_info.metadata = metadata | |
| 463 logging.debug('Validating section sizes') | |
| 464 elf_section_sizes = _SectionSizesFromElf(args.elf_file, | |
| 465 lazy_paths.tool_prefix) | |
| 466 for k, v in elf_section_sizes.iteritems(): | |
| 467 assert v == size_info.section_sizes.get(k), ( | |
| 468 'ELF file and .map file do not match.') | |
| 469 | |
| 470 logging.info('Recording metadata: \n %s', | |
| 471 '\n '.join(describe.DescribeMetadata(size_info.metadata))) | |
| 472 logging.info('Saving result to %s', args.output_file) | |
| 473 file_format.SaveSizeInfo(size_info, args.output_file) | |
| 474 logging.info('Done') | |
| 475 | |
| 476 | |
| 477 if __name__ == '__main__': | |
| 478 sys.exit(main(sys.argv)) | |
| OLD | NEW |