| OLD | NEW |
| 1 # Copyright 2017 The Chromium Authors. All rights reserved. | 1 # Copyright 2017 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 """Main Python API for analyzing binary size.""" | 5 """Main Python API for analyzing binary size.""" |
| 6 | 6 |
| 7 import argparse | 7 import argparse |
| 8 import calendar | 8 import calendar |
| 9 import collections | 9 import collections |
| 10 import datetime | 10 import datetime |
| 11 import gzip | 11 import gzip |
| 12 import logging | 12 import logging |
| 13 import os | 13 import os |
| 14 import posixpath | 14 import posixpath |
| 15 import re | 15 import re |
| 16 import subprocess | 16 import subprocess |
| 17 import sys | 17 import sys |
| 18 import tempfile | 18 import tempfile |
| 19 import zipfile | 19 import zipfile |
| 20 | 20 |
| 21 import concurrent |
| 21 import describe | 22 import describe |
| 22 import file_format | 23 import file_format |
| 23 import function_signature | 24 import function_signature |
| 24 import helpers | |
| 25 import linker_map_parser | 25 import linker_map_parser |
| 26 import models | 26 import models |
| 27 import ninja_parser | 27 import ninja_parser |
| 28 import nm |
| 28 import paths | 29 import paths |
| 29 | 30 |
| 30 | 31 |
| 31 def _OpenMaybeGz(path, mode=None): | 32 def _OpenMaybeGz(path, mode=None): |
| 32 """Calls `gzip.open()` if |path| ends in ".gz", otherwise calls `open()`.""" | 33 """Calls `gzip.open()` if |path| ends in ".gz", otherwise calls `open()`.""" |
| 33 if path.endswith('.gz'): | 34 if path.endswith('.gz'): |
| 34 if mode and 'w' in mode: | 35 if mode and 'w' in mode: |
| 35 return gzip.GzipFile(path, mode, 1) | 36 return gzip.GzipFile(path, mode, 1) |
| 36 return gzip.open(path, mode) | 37 return gzip.open(path, mode) |
| 37 return open(path, mode or 'r') | 38 return open(path, mode or 'r') |
| (...skipping 13 matching lines...) Expand all Loading... |
| 51 symbol.flags |= models.FLAG_UNLIKELY | 52 symbol.flags |= models.FLAG_UNLIKELY |
| 52 symbol.name = name[9:] | 53 symbol.name = name[9:] |
| 53 elif name.startswith('rel.local.'): | 54 elif name.startswith('rel.local.'): |
| 54 symbol.flags |= models.FLAG_REL_LOCAL | 55 symbol.flags |= models.FLAG_REL_LOCAL |
| 55 symbol.name = name[10:] | 56 symbol.name = name[10:] |
| 56 elif name.startswith('rel.'): | 57 elif name.startswith('rel.'): |
| 57 symbol.flags |= models.FLAG_REL | 58 symbol.flags |= models.FLAG_REL |
| 58 symbol.name = name[4:] | 59 symbol.name = name[4:] |
| 59 | 60 |
| 60 | 61 |
| 62 def _UnmangleNames(names, tool_prefix): |
| 63 proc = subprocess.Popen([tool_prefix + 'c++filt'], stdin=subprocess.PIPE, |
| 64 stdout=subprocess.PIPE) |
| 65 stdout = proc.communicate('\n'.join(names))[0] |
| 66 assert proc.returncode == 0 |
| 67 return stdout.splitlines() |
| 68 |
| 69 |
| 61 def _UnmangleRemainingSymbols(symbols, tool_prefix): | 70 def _UnmangleRemainingSymbols(symbols, tool_prefix): |
| 62 """Uses c++filt to unmangle any symbols that need it.""" | 71 """Uses c++filt to unmangle any symbols that need it.""" |
| 63 to_process = [s for s in symbols if s.name.startswith('_Z')] | 72 to_process = [s for s in symbols if s.name.startswith('_Z')] |
| 64 if not to_process: | 73 if not to_process: |
| 65 return | 74 return |
| 66 | 75 |
| 67 logging.info('Unmangling %d names', len(to_process)) | 76 logging.info('Unmangling %d names', len(to_process)) |
| 68 proc = subprocess.Popen([tool_prefix + 'c++filt'], stdin=subprocess.PIPE, | 77 unmangled = _UnmangleNames((s.name for s in to_process), tool_prefix) |
| 69 stdout=subprocess.PIPE) | 78 for i, line in enumerate(unmangled): |
| 70 stdout = proc.communicate('\n'.join(s.name for s in to_process))[0] | |
| 71 assert proc.returncode == 0 | |
| 72 | |
| 73 for i, line in enumerate(stdout.splitlines()): | |
| 74 to_process[i].name = line | 79 to_process[i].name = line |
| 75 | 80 |
| 76 | 81 |
| 77 def _NormalizeNames(symbols): | 82 def _NormalizeNames(symbols): |
| 78 """Ensures that all names are formatted in a useful way. | 83 """Ensures that all names are formatted in a useful way. |
| 79 | 84 |
| 80 This includes: | 85 This includes: |
| 81 - Assigning of |full_name|. | 86 - Assigning of |full_name|. |
| 82 - Stripping of return types in |full_name| and |name| (for functions). | 87 - Stripping of return types in |full_name| and |name| (for functions). |
| 83 - Stripping parameters from |name|. | 88 - Stripping parameters from |name|. |
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 120 symbol.full_name = symbol.name | 125 symbol.full_name = symbol.name |
| 121 symbol.name = re.sub(r'\(.*\)', '', symbol.full_name) | 126 symbol.name = re.sub(r'\(.*\)', '', symbol.full_name) |
| 122 | 127 |
| 123 # Don't bother storing both if they are the same. | 128 # Don't bother storing both if they are the same. |
| 124 if symbol.full_name == symbol.name: | 129 if symbol.full_name == symbol.name: |
| 125 symbol.full_name = '' | 130 symbol.full_name = '' |
| 126 | 131 |
| 127 logging.debug('Found name prefixes of: %r', found_prefixes) | 132 logging.debug('Found name prefixes of: %r', found_prefixes) |
| 128 | 133 |
| 129 | 134 |
| 130 def _NormalizeObjectPaths(symbols): | 135 def _NormalizeObjectPath(path): |
| 131 """Ensures that all paths are formatted in a useful way.""" | 136 if path.startswith('obj/'): |
| 132 for symbol in symbols: | 137 # Convert obj/third_party/... -> third_party/... |
| 133 path = symbol.object_path | 138 path = path[4:] |
| 134 if path.startswith('obj/'): | 139 elif path.startswith('../../'): |
| 135 # Convert obj/third_party/... -> third_party/... | 140 # Convert ../../third_party/... -> third_party/... |
| 136 path = path[4:] | 141 path = path[6:] |
| 137 elif path.startswith('../../'): | 142 if path.endswith(')'): |
| 138 # Convert ../../third_party/... -> third_party/... | 143 # Convert foo/bar.a(baz.o) -> foo/bar.a/baz.o |
| 139 path = path[6:] | 144 start_idx = path.index('(') |
| 140 if path.endswith(')'): | 145 path = os.path.join(path[:start_idx], path[start_idx + 1:-1]) |
| 141 # Convert foo/bar.a(baz.o) -> foo/bar.a/baz.o | 146 return path |
| 142 start_idx = path.index('(') | |
| 143 path = os.path.join(path[:start_idx], path[start_idx + 1:-1]) | |
| 144 symbol.object_path = path | |
| 145 | 147 |
| 146 | 148 |
| 147 def _NormalizeSourcePath(path): | 149 def _NormalizeSourcePath(path): |
| 148 if path.startswith('gen/'): | 150 if path.startswith('gen/'): |
| 149 # Convert gen/third_party/... -> third_party/... | 151 # Convert gen/third_party/... -> third_party/... |
| 150 return path[4:] | 152 return path[4:] |
| 151 if path.startswith('../../'): | 153 if path.startswith('../../'): |
| 152 # Convert ../../third_party/... -> third_party/... | 154 # Convert ../../third_party/... -> third_party/... |
| 153 return path[6:] | 155 return path[6:] |
| 154 return path | 156 return path |
| 155 | 157 |
| 156 | 158 |
| 159 def _SourcePathForObjectPath(object_path, source_mapper): |
| 160 # We don't have source info for prebuilt .a files. |
| 161 if not os.path.isabs(object_path) and not object_path.startswith('..'): |
| 162 source_path = source_mapper.FindSourceForPath(object_path) |
| 163 if source_path: |
| 164 return _NormalizeSourcePath(source_path) |
| 165 return '' |
| 166 |
| 167 |
| 157 def _ExtractSourcePaths(symbols, source_mapper): | 168 def _ExtractSourcePaths(symbols, source_mapper): |
| 158 """Fills in the .source_path attribute of all symbols.""" | 169 """Fills in the |source_path| attribute.""" |
| 159 logging.debug('Parsed %d .ninja files.', source_mapper.parsed_file_count) | 170 logging.debug('Parsed %d .ninja files.', source_mapper.parsed_file_count) |
| 160 | |
| 161 for symbol in symbols: | 171 for symbol in symbols: |
| 162 object_path = symbol.object_path | 172 object_path = symbol.object_path |
| 163 if symbol.source_path or not object_path: | 173 if object_path and not symbol.source_path: |
| 174 symbol.source_path = _SourcePathForObjectPath(object_path, source_mapper) |
| 175 |
| 176 |
| 177 def _ComputeAnscestorPath(paths): |
| 178 """Returns the common anscestor of the given paths.""" |
| 179 # Ignore missing paths. |
| 180 paths = [p for p in paths if p] |
| 181 prefix = os.path.commonprefix(paths) |
| 182 # Put the path count as a subdirectory to allow for better grouping when |
| 183 # path-based breakdowns. |
| 184 if not prefix: |
| 185 if len(paths) < 2: |
| 186 return '' |
| 187 return os.path.join('{shared}', str(len(paths))) |
| 188 if prefix == paths[0]: |
| 189 return prefix |
| 190 assert len(paths) > 1, 'paths: ' + repr(paths) |
| 191 return os.path.join(os.path.dirname(prefix), '{shared}', str(len(paths))) |
| 192 |
| 193 |
| 194 # This must normalize object paths at the same time because normalization |
| 195 # needs to occur before finding common ancestor. |
| 196 def _ComputeAnscestorPathsAndNormalizeObjectPaths( |
| 197 symbols, object_paths_by_name, source_mapper): |
| 198 num_found_paths = 0 |
| 199 num_unknown_names = 0 |
| 200 num_path_mismatches = 0 |
| 201 num_unmatched_aliases = 0 |
| 202 for i, symbol in enumerate(symbols): |
| 203 name = symbol.name |
| 204 if (symbol.IsBss() or |
| 205 not name or |
| 206 name[0] in '*.' or # e.g. ** merge symbols, .Lswitch.table |
| 207 name == 'startup'): |
| 208 symbol.object_path = _NormalizeObjectPath(symbol.object_path) |
| 164 continue | 209 continue |
| 165 # We don't have source info for prebuilt .a files. | 210 |
| 166 if not os.path.isabs(object_path) and not object_path.startswith('..'): | 211 object_paths = object_paths_by_name.get(name) |
| 167 source_path = source_mapper.FindSourceForPath(object_path) | 212 if object_paths: |
| 168 if source_path: | 213 num_found_paths += 1 |
| 169 symbol.source_path = _NormalizeSourcePath(source_path) | 214 else: |
| 215 if not symbol.object_path and symbol.aliases: |
| 216 # Happens when aliases are from object files where all symbols were |
| 217 # pruned or de-duped as aliases. Since we are only scanning .o files |
| 218 # referenced by included symbols, such files are missed. |
| 219 # TODO(agrieve): This could be fixed by retrieving linker inputs from |
| 220 # build.ninja, or by looking for paths within the .map file's |
| 221 # discarded sections. |
| 222 num_unmatched_aliases += 1 |
| 223 continue |
| 224 if num_unknown_names < 10: |
| 225 logging.warning('Symbol not found in any .o files: %r', symbol) |
| 226 num_unknown_names += 1 |
| 227 symbol.object_path = _NormalizeObjectPath(symbol.object_path) |
| 228 continue |
| 229 |
| 230 if symbol.object_path and symbol.object_path not in object_paths: |
| 231 if num_path_mismatches < 10: |
| 232 logging.warning('Symbol path reported by .map not found by nm.') |
| 233 logging.warning('sym=%r', symbol) |
| 234 logging.warning('paths=%r', object_paths) |
| 235 num_path_mismatches += 1 |
| 236 |
| 237 if source_mapper: |
| 238 source_paths = [ |
| 239 _SourcePathForObjectPath(p, source_mapper) for p in object_paths] |
| 240 symbol.source_path = _ComputeAnscestorPath(source_paths) |
| 241 |
| 242 object_paths = [_NormalizeObjectPath(p) for p in object_paths] |
| 243 symbol.object_path = _ComputeAnscestorPath(object_paths) |
| 244 |
| 245 logging.debug('Cross-referenced %d symbols with nm output. ' |
| 246 'num_unknown_names=%d num_path_mismatches=%d ' |
| 247 'num_unused_aliases=%d', num_found_paths, num_unknown_names, |
| 248 num_path_mismatches, num_unmatched_aliases) |
| 249 |
| 250 |
| 251 def _DiscoverMissedObjectPaths(symbols, elf_object_paths): |
| 252 # Missing object paths are caused by .a files added by -l flags, which are not |
| 253 # listed as explicit inputs within .ninja rules. |
| 254 parsed_inputs = set(elf_object_paths) |
| 255 missed_inputs = set() |
| 256 for symbol in symbols: |
| 257 path = symbol.object_path |
| 258 if path.endswith(')'): |
| 259 # Convert foo/bar.a(baz.o) -> foo/bar.a |
| 260 path = path[:path.index('(')] |
| 261 if path and path not in parsed_inputs: |
| 262 missed_inputs.add(path) |
| 263 return missed_inputs |
| 170 | 264 |
| 171 | 265 |
| 172 def _CalculatePadding(symbols): | 266 def _CalculatePadding(symbols): |
| 173 """Populates the |padding| field based on symbol addresses. | 267 """Populates the |padding| field based on symbol addresses. |
| 174 | 268 |
| 175 Symbols must already be sorted by |address|. | 269 Symbols must already be sorted by |address|. |
| 176 """ | 270 """ |
| 177 seen_sections = [] | 271 seen_sections = [] |
| 178 for i, symbol in enumerate(symbols[1:]): | 272 for i, symbol in enumerate(symbols[1:]): |
| 179 prev_symbol = symbols[i] | 273 prev_symbol = symbols[i] |
| 180 if prev_symbol.section_name != symbol.section_name: | 274 if prev_symbol.section_name != symbol.section_name: |
| 181 assert symbol.section_name not in seen_sections, ( | 275 assert symbol.section_name not in seen_sections, ( |
| 182 'Input symbols must be sorted by section, then address.') | 276 'Input symbols must be sorted by section, then address.') |
| 183 seen_sections.append(symbol.section_name) | 277 seen_sections.append(symbol.section_name) |
| 184 continue | 278 continue |
| 185 if symbol.address <= 0 or prev_symbol.address <= 0: | 279 if symbol.address <= 0 or prev_symbol.address <= 0: |
| 186 continue | 280 continue |
| 187 # Padding-only symbols happen for ** symbol gaps. | 281 |
| 188 prev_is_padding_only = prev_symbol.size_without_padding == 0 | 282 if symbol.address == prev_symbol.address: |
| 189 if symbol.address == prev_symbol.address and not prev_is_padding_only: | 283 # Padding-only symbols happen for ** symbol gaps. |
| 190 assert False, 'Found duplicate symbols:\n%r\n%r' % (prev_symbol, symbol) | 284 prev_is_padding_only = prev_symbol.size_without_padding == 0 |
| 191 # Even with symbols at the same address removed, overlaps can still | 285 if not prev_is_padding_only: |
| 192 # happen. In this case, padding will be negative (and this is fine). | 286 # Must be an alias. Clone its padding. |
| 287 assert symbol.aliases, ( |
| 288 'Found duplicate symbols:\n%r\n%r' % (prev_symbol, symbol)) |
| 289 symbol.padding = prev_symbol.padding |
| 290 symbol.size = prev_symbol.size |
| 291 continue |
| 292 |
| 193 padding = symbol.address - prev_symbol.end_address | 293 padding = symbol.address - prev_symbol.end_address |
| 194 # These thresholds were found by manually auditing arm32 Chrome. | 294 # These thresholds were found by experimenting with arm32 Chrome. |
| 195 # E.g.: Set them to 0 and see what warnings get logged. | 295 # E.g.: Set them to 0 and see what warnings get logged, then take max value. |
| 196 # TODO(agrieve): See if these thresholds make sense for architectures | 296 # TODO(agrieve): See if these thresholds make sense for architectures |
| 197 # other than arm32. | 297 # other than arm32. |
| 198 if not symbol.name.startswith('*') and ( | 298 if not symbol.name.startswith('*') and ( |
| 199 symbol.section in 'rd' and padding >= 256 or | 299 symbol.section in 'rd' and padding >= 256 or |
| 200 symbol.section in 't' and padding >= 64): | 300 symbol.section in 't' and padding >= 64): |
| 201 # For nm data, this is caused by data that has no associated symbol. | 301 # Should not happen. |
| 202 # The linker map file lists them with no name, but with a file. | 302 logging.warning('Large padding of %d between:\n A) %r\n B) %r' % ( |
| 203 # Example: | 303 padding, prev_symbol, symbol)) |
| 204 # .data 0x02d42764 0x120 .../V8SharedWorkerGlobalScope.o | |
| 205 # Where as most look like: | |
| 206 # .data.MANGLED_NAME... | |
| 207 logging.debug('Large padding of %d between:\n A) %r\n B) %r' % ( | |
| 208 padding, prev_symbol, symbol)) | |
| 209 continue | |
| 210 symbol.padding = padding | 304 symbol.padding = padding |
| 211 symbol.size += padding | 305 symbol.size += padding |
| 212 assert symbol.size >= 0, ( | 306 assert symbol.size >= 0, ( |
| 213 'Symbol has negative size (likely not sorted propertly): ' | 307 'Symbol has negative size (likely not sorted propertly): ' |
| 214 '%r\nprev symbol: %r' % (symbol, prev_symbol)) | 308 '%r\nprev symbol: %r' % (symbol, prev_symbol)) |
| 215 | 309 |
| 216 | 310 |
| 217 def _ClusterSymbols(symbols): | 311 def _ClusterSymbols(symbols): |
| 218 """Returns a new list of symbols with some symbols moved into groups. | 312 """Returns a new list of symbols with some symbols moved into groups. |
| 219 | 313 |
| (...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 310 group_symbols, name=name_tup[0], full_name=name_tup[1], | 404 group_symbols, name=name_tup[0], full_name=name_tup[1], |
| 311 section_name=group_symbols[0].section_name) | 405 section_name=group_symbols[0].section_name) |
| 312 dest_index += 1 | 406 dest_index += 1 |
| 313 | 407 |
| 314 assert len(grouped_symbols[dest_index:None]) == len(symbols[src_index:None]) | 408 assert len(grouped_symbols[dest_index:None]) == len(symbols[src_index:None]) |
| 315 grouped_symbols[dest_index:None] = symbols[src_index:None] | 409 grouped_symbols[dest_index:None] = symbols[src_index:None] |
| 316 logging.debug('Finished making groups.') | 410 logging.debug('Finished making groups.') |
| 317 return grouped_symbols | 411 return grouped_symbols |
| 318 | 412 |
| 319 | 413 |
| 414 def _AddSymbolAliases(symbols, aliases_by_address): |
| 415 # Step 1: Create list of (index_of_symbol, name_list). |
| 416 logging.debug('Creating alias list') |
| 417 replacements = [] |
| 418 num_new_symbols = 0 |
| 419 for i, s in enumerate(symbols): |
| 420 # Don't alias padding-only symbols (e.g. ** symbol gap) |
| 421 if s.size_without_padding == 0: |
| 422 continue |
| 423 name_list = aliases_by_address.get(s.address) |
| 424 if name_list: |
| 425 if s.name not in name_list: |
| 426 logging.warning('Name missing from aliases: %s %s', s.name, name_list) |
| 427 continue |
| 428 replacements.append((i, name_list)) |
| 429 num_new_symbols += len(name_list) - 1 |
| 430 |
| 431 # Step 2: Create new symbols as siblings to each existing one. |
| 432 logging.debug('Creating %d aliases', num_new_symbols) |
| 433 src_cursor_end = len(symbols) |
| 434 symbols += [None] * num_new_symbols |
| 435 dst_cursor_end = len(symbols) |
| 436 num_copied = 0 |
| 437 for src_index, name_list in reversed(replacements): |
| 438 # Copy over symbols that come after the current one. |
| 439 chunk_size = src_cursor_end - src_index - 1 |
| 440 dst_cursor_end -= chunk_size |
| 441 src_cursor_end -= chunk_size |
| 442 symbols[dst_cursor_end:dst_cursor_end + chunk_size] = ( |
| 443 symbols[src_cursor_end:src_cursor_end + chunk_size]) |
| 444 sym = symbols[src_index] |
| 445 src_cursor_end -= 1 |
| 446 |
| 447 # Create aliases (does not bother reusing the existing symbol). |
| 448 aliases = [None] * len(name_list) |
| 449 for i, name in enumerate(name_list): |
| 450 aliases[i] = models.Symbol( |
| 451 sym.section_name, sym.size, address=sym.address, name=name, |
| 452 aliases=aliases) |
| 453 |
| 454 dst_cursor_end -= len(aliases) |
| 455 symbols[dst_cursor_end:dst_cursor_end + len(aliases)] = aliases |
| 456 |
| 457 assert dst_cursor_end == src_cursor_end |
| 458 |
| 459 |
| 320 def LoadAndPostProcessSizeInfo(path): | 460 def LoadAndPostProcessSizeInfo(path): |
| 321 """Returns a SizeInfo for the given |path|.""" | 461 """Returns a SizeInfo for the given |path|.""" |
| 322 logging.debug('Loading results from: %s', path) | 462 logging.debug('Loading results from: %s', path) |
| 323 size_info = file_format.LoadSizeInfo(path) | 463 size_info = file_format.LoadSizeInfo(path) |
| 324 _PostProcessSizeInfo(size_info) | 464 _PostProcessSizeInfo(size_info) |
| 325 return size_info | 465 return size_info |
| 326 | 466 |
| 327 | 467 |
| 328 def _PostProcessSizeInfo(size_info): | 468 def _PostProcessSizeInfo(size_info): |
| 329 logging.info('Normalizing symbol names') | 469 logging.info('Normalizing symbol names') |
| 330 _NormalizeNames(size_info.raw_symbols) | 470 _NormalizeNames(size_info.raw_symbols) |
| 331 logging.info('Calculating padding') | 471 logging.info('Calculating padding') |
| 332 _CalculatePadding(size_info.raw_symbols) | 472 _CalculatePadding(size_info.raw_symbols) |
| 333 logging.info('Grouping decomposed functions') | 473 logging.info('Grouping decomposed functions') |
| 334 size_info.symbols = models.SymbolGroup( | 474 size_info.symbols = models.SymbolGroup( |
| 335 _ClusterSymbols(size_info.raw_symbols)) | 475 _ClusterSymbols(size_info.raw_symbols)) |
| 336 logging.info('Processed %d symbols', len(size_info.raw_symbols)) | 476 logging.info('Processed %d symbols', len(size_info.raw_symbols)) |
| 337 | 477 |
| 338 | 478 |
| 339 def CreateSizeInfo(map_path, lazy_paths=None, no_source_paths=False, | 479 def CreateMetadata(map_path, elf_path, apk_path, tool_prefix, output_directory): |
| 480 metadata = None |
| 481 if elf_path: |
| 482 logging.debug('Constructing metadata') |
| 483 git_rev = _DetectGitRevision(os.path.dirname(elf_path)) |
| 484 architecture = _ArchFromElf(elf_path, tool_prefix) |
| 485 build_id = BuildIdFromElf(elf_path, tool_prefix) |
| 486 timestamp_obj = datetime.datetime.utcfromtimestamp(os.path.getmtime( |
| 487 elf_path)) |
| 488 timestamp = calendar.timegm(timestamp_obj.timetuple()) |
| 489 |
| 490 metadata = { |
| 491 models.METADATA_GIT_REVISION: git_rev, |
| 492 models.METADATA_ELF_ARCHITECTURE: architecture, |
| 493 models.METADATA_ELF_MTIME: timestamp, |
| 494 models.METADATA_ELF_BUILD_ID: build_id, |
| 495 } |
| 496 |
| 497 if output_directory: |
| 498 relative_to_out = lambda path: os.path.relpath(path, output_directory) |
| 499 gn_args = _ParseGnArgs(os.path.join(output_directory, 'args.gn')) |
| 500 metadata[models.METADATA_MAP_FILENAME] = relative_to_out(map_path) |
| 501 metadata[models.METADATA_ELF_FILENAME] = relative_to_out(elf_path) |
| 502 metadata[models.METADATA_GN_ARGS] = gn_args |
| 503 |
| 504 if apk_path: |
| 505 metadata[models.METADATA_APK_FILENAME] = relative_to_out(apk_path) |
| 506 return metadata |
| 507 |
| 508 |
| 509 def CreateSizeInfo(map_path, elf_path, tool_prefix, output_directory, |
| 340 raw_only=False): | 510 raw_only=False): |
| 341 """Creates a SizeInfo from the given map file.""" | 511 """Creates a SizeInfo. |
| 342 # tool_prefix needed for c++filt. | |
| 343 lazy_paths.VerifyToolPrefix() | |
| 344 | 512 |
| 345 if not no_source_paths: | 513 Args: |
| 346 # Parse .ninja files at the same time as parsing the .map file. | 514 map_path: Path to the linker .map(.gz) file to parse. |
| 347 source_mapper_result = helpers.ForkAndCall( | 515 elf_path: Path to the corresponding unstripped ELF file. Used to find symbol |
| 348 ninja_parser.Parse, lazy_paths.VerifyOutputDirectory()) | 516 aliases and inlined functions. Can be None. |
| 517 tool_prefix: Prefix for c++filt & nm (required). |
| 518 output_directory: Build output directory. If None, source_paths and symbol |
| 519 alias information will not be recorded. |
| 520 raw_only: Fill in just the information required for creating a .size file. |
| 521 """ |
| 522 source_mapper = None |
| 523 if output_directory: |
| 524 # Start by finding the elf_object_paths, so that nm can run on them while |
| 525 # the linker .map is being parsed. |
| 526 logging.info('Parsing ninja files.') |
| 527 source_mapper, elf_object_paths = ninja_parser.Parse( |
| 528 output_directory, elf_path) |
| 529 assert not elf_path or elf_object_paths, ( |
| 530 'Failed to find link command in ninja files for ' + |
| 531 os.path.relpath(elf_path, output_directory)) |
| 532 |
| 533 if elf_path: |
| 534 # Run nm on the elf file to retrieve the list of symbol names per-address. |
| 535 # This list is required because the .map file contains only a single name |
| 536 # for each address, yet multiple symbols are often coalesced when they are |
| 537 # identical. This coalescing happens mainly for small symbols and for C++ |
| 538 # templates. Such symbols make up ~500kb of libchrome.so on Android. |
| 539 elf_nm_result = nm.CollectAliasesByAddressAsync(elf_path, tool_prefix) |
| 540 |
| 541 # Run nm on all .o/.a files to retrieve the symbol names within them. |
| 542 # The list is used to detect when mutiple .o files contain the same symbol |
| 543 # (e.g. inline functions), and to update the object_path / source_path |
| 544 # fields accordingly. |
| 545 # Looking in object files is required because the .map file choses a |
| 546 # single path for these symbols. |
| 547 # Rather than record all paths for each symbol, set the paths to be the |
| 548 # common ancestor of all paths. |
| 549 if output_directory: |
| 550 objects_nm_result1 = nm.BulkAnalyzeObjectFilesAsync( |
| 551 elf_object_paths, tool_prefix, output_directory) |
| 349 | 552 |
| 350 with _OpenMaybeGz(map_path) as map_file: | 553 with _OpenMaybeGz(map_path) as map_file: |
| 351 section_sizes, raw_symbols = ( | 554 section_sizes, raw_symbols = ( |
| 352 linker_map_parser.MapFileParser().Parse(map_file)) | 555 linker_map_parser.MapFileParser().Parse(map_file)) |
| 353 | 556 |
| 354 if not no_source_paths: | 557 if elf_path: |
| 355 logging.info('Extracting source paths from .ninja files') | 558 logging.debug('Validating section sizes') |
| 356 source_mapper = source_mapper_result.get() | 559 elf_section_sizes = _SectionSizesFromElf(elf_path, tool_prefix) |
| 560 for k, v in elf_section_sizes.iteritems(): |
| 561 if v != section_sizes.get(k): |
| 562 logging.error('ELF file and .map file do not agree on section sizes.') |
| 563 logging.error('.map file: %r', section_sizes) |
| 564 logging.error('readelf: %r', elf_section_sizes) |
| 565 sys.exit(1) |
| 566 |
| 567 if elf_path and output_directory: |
| 568 missed_object_paths = _DiscoverMissedObjectPaths( |
| 569 raw_symbols, elf_object_paths) |
| 570 objects_nm_result2 = nm.BulkAnalyzeObjectFilesAsync( |
| 571 missed_object_paths, tool_prefix, output_directory) |
| 572 |
| 573 if source_mapper: |
| 574 logging.info('Looking up source paths from ninja files') |
| 357 _ExtractSourcePaths(raw_symbols, source_mapper) | 575 _ExtractSourcePaths(raw_symbols, source_mapper) |
| 358 assert source_mapper.unmatched_paths_count == 0, ( | 576 assert source_mapper.unmatched_paths_count == 0, ( |
| 359 'One or more source file paths could not be found. Likely caused by ' | 577 'One or more source file paths could not be found. Likely caused by ' |
| 360 '.ninja files being generated at a different time than the .map file.') | 578 '.ninja files being generated at a different time than the .map file.') |
| 361 | 579 |
| 362 logging.info('Stripping linker prefixes from symbol names') | 580 logging.info('Stripping linker prefixes from symbol names') |
| 363 _StripLinkerAddedSymbolPrefixes(raw_symbols) | 581 _StripLinkerAddedSymbolPrefixes(raw_symbols) |
| 364 # Map file for some reason doesn't unmangle all names. | 582 # Map file for some reason doesn't unmangle all names. |
| 365 # Unmangle prints its own log statement. | 583 # Unmangle prints its own log statement. |
| 366 _UnmangleRemainingSymbols(raw_symbols, lazy_paths.tool_prefix) | 584 _UnmangleRemainingSymbols(raw_symbols, tool_prefix) |
| 367 logging.info('Normalizing object paths') | 585 |
| 368 _NormalizeObjectPaths(raw_symbols) | 586 if elf_path: |
| 587 logging.info('Adding aliased symbols, as reported by nm') |
| 588 # This normally does not block (it's finished by this time). |
| 589 aliases_by_address = elf_nm_result.get() |
| 590 _AddSymbolAliases(raw_symbols, aliases_by_address) |
| 591 |
| 592 if output_directory: |
| 593 # For aliases, this provides path information where there wasn't any. |
| 594 logging.info('Computing ancestor paths for inline functions and ' |
| 595 'normalizing object paths') |
| 596 |
| 597 missed_object_paths_by_name = objects_nm_result2.get() |
| 598 logging.debug('Fetched %d symbols from %d impliciit inputs', |
| 599 len(missed_object_paths_by_name), len(missed_object_paths)) |
| 600 |
| 601 object_paths_by_name = objects_nm_result1.get() |
| 602 logging.debug('Fetched %d symbols from %d explicit inputs', |
| 603 len(object_paths_by_name), len(elf_object_paths)) |
| 604 for name, paths in missed_object_paths_by_name.iteritems(): |
| 605 object_paths_by_name.setdefault(name, []).extend(paths) |
| 606 _ComputeAnscestorPathsAndNormalizeObjectPaths( |
| 607 raw_symbols, object_paths_by_name, source_mapper) |
| 608 else: |
| 609 logging.info('Normalizing object paths.') |
| 610 for symbol in raw_symbols: |
| 611 symbol.object_path = _NormalizeObjectPath(symbol.object_path) |
| 612 |
| 369 size_info = models.SizeInfo(section_sizes, raw_symbols) | 613 size_info = models.SizeInfo(section_sizes, raw_symbols) |
| 370 | 614 |
| 371 # Name normalization not strictly required, but makes for smaller files. | 615 # Name normalization not strictly required, but makes for smaller files. |
| 372 if raw_only: | 616 if raw_only: |
| 373 logging.info('Normalizing symbol names') | 617 logging.info('Normalizing symbol names') |
| 374 _NormalizeNames(size_info.raw_symbols) | 618 _NormalizeNames(size_info.raw_symbols) |
| 375 else: | 619 else: |
| 376 _PostProcessSizeInfo(size_info) | 620 _PostProcessSizeInfo(size_info) |
| 377 | 621 |
| 378 if logging.getLogger().isEnabledFor(logging.DEBUG): | 622 if logging.getLogger().isEnabledFor(logging.DEBUG): |
| (...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 481 any_path_within_output_directory=any_input) | 725 any_path_within_output_directory=any_input) |
| 482 if apk_path: | 726 if apk_path: |
| 483 with zipfile.ZipFile(apk_path) as z: | 727 with zipfile.ZipFile(apk_path) as z: |
| 484 lib_infos = [f for f in z.infolist() | 728 lib_infos = [f for f in z.infolist() |
| 485 if f.filename.endswith('.so') and f.file_size > 0] | 729 if f.filename.endswith('.so') and f.file_size > 0] |
| 486 assert lib_infos, 'APK has no .so files.' | 730 assert lib_infos, 'APK has no .so files.' |
| 487 # TODO(agrieve): Add support for multiple .so files, and take into account | 731 # TODO(agrieve): Add support for multiple .so files, and take into account |
| 488 # secondary architectures. | 732 # secondary architectures. |
| 489 apk_so_path = max(lib_infos, key=lambda x:x.file_size).filename | 733 apk_so_path = max(lib_infos, key=lambda x:x.file_size).filename |
| 490 logging.debug('Sub-apk path=%s', apk_so_path) | 734 logging.debug('Sub-apk path=%s', apk_so_path) |
| 491 if not elf_path: | 735 if not elf_path and lazy_paths.output_directory: |
| 492 elf_path = os.path.join( | 736 elf_path = os.path.join( |
| 493 lazy_paths.output_directory, 'lib.unstripped', | 737 lazy_paths.output_directory, 'lib.unstripped', |
| 494 os.path.basename(apk_so_path.replace('crazy.', ''))) | 738 os.path.basename(apk_so_path.replace('crazy.', ''))) |
| 495 logging.debug('Detected --elf-file=%s', elf_path) | 739 logging.debug('Detected --elf-file=%s', elf_path) |
| 496 | 740 |
| 497 if map_path: | 741 if map_path: |
| 498 if not map_path.endswith('.map') and not map_path.endswith('.map.gz'): | 742 if not map_path.endswith('.map') and not map_path.endswith('.map.gz'): |
| 499 parser.error('Expected --map-file to end with .map or .map.gz') | 743 parser.error('Expected --map-file to end with .map or .map.gz') |
| 500 else: | 744 else: |
| 501 map_path = elf_path + '.map' | 745 map_path = elf_path + '.map' |
| 502 if not os.path.exists(map_path): | 746 if not os.path.exists(map_path): |
| 503 map_path += '.gz' | 747 map_path += '.gz' |
| 504 if not os.path.exists(map_path): | 748 if not os.path.exists(map_path): |
| 505 parser.error('Could not find .map(.gz)? file. Use --map-file.') | 749 parser.error('Could not find .map(.gz)? file. Use --map-file.') |
| 506 | 750 |
| 507 metadata = None | 751 tool_prefix = lazy_paths.VerifyToolPrefix() |
| 508 if elf_path: | 752 output_directory = None |
| 509 logging.debug('Constructing metadata') | 753 if not args.no_source_paths: |
| 510 git_rev = _DetectGitRevision(os.path.dirname(elf_path)) | 754 output_directory = lazy_paths.VerifyOutputDirectory() |
| 511 architecture = _ArchFromElf(elf_path, lazy_paths.tool_prefix) | |
| 512 build_id = BuildIdFromElf(elf_path, lazy_paths.tool_prefix) | |
| 513 timestamp_obj = datetime.datetime.utcfromtimestamp(os.path.getmtime( | |
| 514 elf_path)) | |
| 515 timestamp = calendar.timegm(timestamp_obj.timetuple()) | |
| 516 gn_args = _ParseGnArgs(os.path.join(lazy_paths.output_directory, 'args.gn')) | |
| 517 | 755 |
| 518 def relative_to_out(path): | 756 metadata = CreateMetadata(map_path, elf_path, apk_path, tool_prefix, |
| 519 return os.path.relpath(path, lazy_paths.VerifyOutputDirectory()) | 757 output_directory) |
| 520 | 758 if apk_path and elf_path: |
| 521 metadata = { | 759 # Extraction takes around 1 second, so do it in parallel. |
| 522 models.METADATA_GIT_REVISION: git_rev, | 760 apk_elf_result = concurrent.ForkAndCall( |
| 523 models.METADATA_MAP_FILENAME: relative_to_out(map_path), | 761 _ElfInfoFromApk, (apk_path, apk_so_path, tool_prefix)) |
| 524 models.METADATA_ELF_ARCHITECTURE: architecture, | |
| 525 models.METADATA_ELF_FILENAME: relative_to_out(elf_path), | |
| 526 models.METADATA_ELF_MTIME: timestamp, | |
| 527 models.METADATA_ELF_BUILD_ID: build_id, | |
| 528 models.METADATA_GN_ARGS: gn_args, | |
| 529 } | |
| 530 | |
| 531 if apk_path: | |
| 532 metadata[models.METADATA_APK_FILENAME] = relative_to_out(apk_path) | |
| 533 # Extraction takes around 1 second, so do it in parallel. | |
| 534 apk_elf_result = helpers.ForkAndCall( | |
| 535 _ElfInfoFromApk, apk_path, apk_so_path, lazy_paths.tool_prefix) | |
| 536 | 762 |
| 537 size_info = CreateSizeInfo( | 763 size_info = CreateSizeInfo( |
| 538 map_path, lazy_paths, no_source_paths=args.no_source_paths, raw_only=True) | 764 map_path, elf_path, tool_prefix, output_directory, raw_only=True) |
| 539 | 765 |
| 540 if metadata: | 766 if metadata: |
| 541 size_info.metadata = metadata | 767 size_info.metadata = metadata |
| 542 logging.debug('Validating section sizes') | |
| 543 elf_section_sizes = _SectionSizesFromElf(elf_path, lazy_paths.tool_prefix) | |
| 544 for k, v in elf_section_sizes.iteritems(): | |
| 545 assert v == size_info.section_sizes.get(k), ( | |
| 546 'ELF file and .map file do not match.') | |
| 547 | 768 |
| 548 if apk_path: | 769 if apk_path: |
| 549 logging.debug('Extracting section sizes from .so within .apk') | 770 logging.debug('Extracting section sizes from .so within .apk') |
| 550 unstripped_section_sizes = size_info.section_sizes | 771 unstripped_section_sizes = size_info.section_sizes |
| 551 apk_build_id, size_info.section_sizes = apk_elf_result.get() | 772 apk_build_id, size_info.section_sizes = apk_elf_result.get() |
| 552 assert apk_build_id == build_id, ( | 773 assert apk_build_id == metadata[models.METADATA_ELF_BUILD_ID], ( |
| 553 'BuildID for %s within %s did not match the one at %s' % | 774 'BuildID for %s within %s did not match the one at %s' % |
| 554 (apk_so_path, apk_path, elf_path)) | 775 (apk_so_path, apk_path, elf_path)) |
| 555 | 776 |
| 556 packed_section_name = None | 777 packed_section_name = None |
| 778 architecture = metadata[models.METADATA_ELF_ARCHITECTURE] |
| 557 if architecture == 'ARM': | 779 if architecture == 'ARM': |
| 558 packed_section_name = '.rel.dyn' | 780 packed_section_name = '.rel.dyn' |
| 559 elif architecture == 'AArch64': | 781 elif architecture == 'AArch64': |
| 560 packed_section_name = '.rela.dyn' | 782 packed_section_name = '.rela.dyn' |
| 561 | 783 |
| 562 if packed_section_name: | 784 if packed_section_name: |
| 563 logging.debug('Recording size of unpacked relocations') | 785 logging.debug('Recording size of unpacked relocations') |
| 564 if packed_section_name not in size_info.section_sizes: | 786 if packed_section_name not in size_info.section_sizes: |
| 565 logging.warning('Packed section not present: %s', packed_section_name) | 787 logging.warning('Packed section not present: %s', packed_section_name) |
| 566 else: | 788 else: |
| 567 size_info.section_sizes['%s (unpacked)' % packed_section_name] = ( | 789 size_info.section_sizes['%s (unpacked)' % packed_section_name] = ( |
| 568 unstripped_section_sizes.get(packed_section_name)) | 790 unstripped_section_sizes.get(packed_section_name)) |
| 569 | 791 |
| 570 logging.info('Recording metadata: \n %s', | 792 logging.info('Recording metadata: \n %s', |
| 571 '\n '.join(describe.DescribeMetadata(size_info.metadata))) | 793 '\n '.join(describe.DescribeMetadata(size_info.metadata))) |
| 572 logging.info('Saving result to %s', args.size_file) | 794 logging.info('Saving result to %s', args.size_file) |
| 573 file_format.SaveSizeInfo(size_info, args.size_file) | 795 file_format.SaveSizeInfo(size_info, args.size_file) |
| 574 logging.info('Done') | 796 logging.info('Done') |
| OLD | NEW |