Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 # Copyright 2017 The Chromium Authors. All rights reserved. | 1 # Copyright 2017 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 """Main Python API for analyzing binary size.""" | 5 """Main Python API for analyzing binary size.""" |
| 6 | 6 |
| 7 import argparse | 7 import argparse |
| 8 import calendar | 8 import calendar |
| 9 import collections | 9 import collections |
| 10 import datetime | 10 import datetime |
| 11 import gzip | 11 import gzip |
| 12 import logging | 12 import logging |
| 13 import os | 13 import os |
| 14 import posixpath | 14 import posixpath |
| 15 import re | 15 import re |
| 16 import subprocess | 16 import subprocess |
| 17 import sys | 17 import sys |
| 18 import tempfile | 18 import tempfile |
| 19 import zipfile | 19 import zipfile |
| 20 | 20 |
| 21 import concurrent | |
| 21 import describe | 22 import describe |
| 22 import file_format | 23 import file_format |
| 23 import function_signature | 24 import function_signature |
| 24 import helpers | |
| 25 import linker_map_parser | 25 import linker_map_parser |
| 26 import models | 26 import models |
| 27 import ninja_parser | 27 import ninja_parser |
| 28 import nm | |
| 28 import paths | 29 import paths |
| 29 | 30 |
| 30 | 31 |
| 31 def _OpenMaybeGz(path, mode=None): | 32 def _OpenMaybeGz(path, mode=None): |
| 32 """Calls `gzip.open()` if |path| ends in ".gz", otherwise calls `open()`.""" | 33 """Calls `gzip.open()` if |path| ends in ".gz", otherwise calls `open()`.""" |
| 33 if path.endswith('.gz'): | 34 if path.endswith('.gz'): |
| 34 if mode and 'w' in mode: | 35 if mode and 'w' in mode: |
| 35 return gzip.GzipFile(path, mode, 1) | 36 return gzip.GzipFile(path, mode, 1) |
| 36 return gzip.open(path, mode) | 37 return gzip.open(path, mode) |
| 37 return open(path, mode or 'r') | 38 return open(path, mode or 'r') |
| (...skipping 13 matching lines...) Expand all Loading... | |
| 51 symbol.flags |= models.FLAG_UNLIKELY | 52 symbol.flags |= models.FLAG_UNLIKELY |
| 52 symbol.name = name[9:] | 53 symbol.name = name[9:] |
| 53 elif name.startswith('rel.local.'): | 54 elif name.startswith('rel.local.'): |
| 54 symbol.flags |= models.FLAG_REL_LOCAL | 55 symbol.flags |= models.FLAG_REL_LOCAL |
| 55 symbol.name = name[10:] | 56 symbol.name = name[10:] |
| 56 elif name.startswith('rel.'): | 57 elif name.startswith('rel.'): |
| 57 symbol.flags |= models.FLAG_REL | 58 symbol.flags |= models.FLAG_REL |
| 58 symbol.name = name[4:] | 59 symbol.name = name[4:] |
| 59 | 60 |
| 60 | 61 |
| 62 def _UnmangleNames(names, tool_prefix): | |
|
estevenson
2017/04/28 17:06:11
revert this part
agrieve
2017/04/28 19:26:58
Done.
| |
| 63 proc = subprocess.Popen([tool_prefix + 'c++filt'], stdin=subprocess.PIPE, | |
| 64 stdout=subprocess.PIPE) | |
| 65 stdout = proc.communicate('\n'.join(names))[0] | |
| 66 assert proc.returncode == 0 | |
| 67 return stdout.splitlines() | |
| 68 | |
| 69 | |
| 61 def _UnmangleRemainingSymbols(symbols, tool_prefix): | 70 def _UnmangleRemainingSymbols(symbols, tool_prefix): |
| 62 """Uses c++filt to unmangle any symbols that need it.""" | 71 """Uses c++filt to unmangle any symbols that need it.""" |
| 63 to_process = [s for s in symbols if s.name.startswith('_Z')] | 72 to_process = [s for s in symbols if s.name.startswith('_Z')] |
| 64 if not to_process: | 73 if not to_process: |
| 65 return | 74 return |
| 66 | 75 |
| 67 logging.info('Unmangling %d names', len(to_process)) | 76 logging.info('Unmangling %d names', len(to_process)) |
| 68 proc = subprocess.Popen([tool_prefix + 'c++filt'], stdin=subprocess.PIPE, | 77 unmangled = _UnmangleNames((s.name for s in to_process), tool_prefix) |
| 69 stdout=subprocess.PIPE) | 78 for i, line in enumerate(unmangled): |
| 70 stdout = proc.communicate('\n'.join(s.name for s in to_process))[0] | |
| 71 assert proc.returncode == 0 | |
| 72 | |
| 73 for i, line in enumerate(stdout.splitlines()): | |
| 74 to_process[i].name = line | 79 to_process[i].name = line |
| 75 | 80 |
| 76 | 81 |
| 77 def _NormalizeNames(symbols): | 82 def _NormalizeNames(symbols): |
| 78 """Ensures that all names are formatted in a useful way. | 83 """Ensures that all names are formatted in a useful way. |
| 79 | 84 |
| 80 This includes: | 85 This includes: |
| 81 - Assigning of |full_name|. | 86 - Assigning of |full_name|. |
| 82 - Stripping of return types in |full_name| and |name| (for functions). | 87 - Stripping of return types in |full_name| and |name| (for functions). |
| 83 - Stripping parameters from |name|. | 88 - Stripping parameters from |name|. |
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 120 symbol.full_name = symbol.name | 125 symbol.full_name = symbol.name |
| 121 symbol.name = re.sub(r'\(.*\)', '', symbol.full_name) | 126 symbol.name = re.sub(r'\(.*\)', '', symbol.full_name) |
| 122 | 127 |
| 123 # Don't bother storing both if they are the same. | 128 # Don't bother storing both if they are the same. |
| 124 if symbol.full_name == symbol.name: | 129 if symbol.full_name == symbol.name: |
| 125 symbol.full_name = '' | 130 symbol.full_name = '' |
| 126 | 131 |
| 127 logging.debug('Found name prefixes of: %r', found_prefixes) | 132 logging.debug('Found name prefixes of: %r', found_prefixes) |
| 128 | 133 |
| 129 | 134 |
| 130 def _NormalizeObjectPaths(symbols): | 135 def _NormalizeObjectPath(path): |
| 131 """Ensures that all paths are formatted in a useful way.""" | 136 if path.startswith('obj/'): |
| 132 for symbol in symbols: | 137 # Convert obj/third_party/... -> third_party/... |
| 133 path = symbol.object_path | 138 path = path[4:] |
| 134 if path.startswith('obj/'): | 139 elif path.startswith('../../'): |
| 135 # Convert obj/third_party/... -> third_party/... | 140 # Convert ../../third_party/... -> third_party/... |
| 136 path = path[4:] | 141 path = path[6:] |
| 137 elif path.startswith('../../'): | 142 if path.endswith(')'): |
| 138 # Convert ../../third_party/... -> third_party/... | 143 # Convert foo/bar.a(baz.o) -> foo/bar.a/baz.o |
| 139 path = path[6:] | 144 start_idx = path.index('(') |
| 140 if path.endswith(')'): | 145 path = os.path.join(path[:start_idx], path[start_idx + 1:-1]) |
| 141 # Convert foo/bar.a(baz.o) -> foo/bar.a/baz.o | 146 return path |
| 142 start_idx = path.index('(') | |
| 143 path = os.path.join(path[:start_idx], path[start_idx + 1:-1]) | |
| 144 symbol.object_path = path | |
| 145 | 147 |
| 146 | 148 |
| 147 def _NormalizeSourcePath(path): | 149 def _NormalizeSourcePath(path): |
| 148 if path.startswith('gen/'): | 150 if path.startswith('gen/'): |
| 149 # Convert gen/third_party/... -> third_party/... | 151 # Convert gen/third_party/... -> third_party/... |
| 150 return path[4:] | 152 return path[4:] |
| 151 if path.startswith('../../'): | 153 if path.startswith('../../'): |
| 152 # Convert ../../third_party/... -> third_party/... | 154 # Convert ../../third_party/... -> third_party/... |
| 153 return path[6:] | 155 return path[6:] |
| 154 return path | 156 return path |
| 155 | 157 |
| 156 | 158 |
| 159 def _SourcePathForObjectPath(object_path, source_mapper): | |
| 160 # We don't have source info for prebuilt .a files. | |
| 161 if not os.path.isabs(object_path) and not object_path.startswith('..'): | |
| 162 source_path = source_mapper.FindSourceForPath(object_path) | |
| 163 if source_path: | |
| 164 return _NormalizeSourcePath(source_path) | |
| 165 return '' | |
| 166 | |
| 167 | |
| 157 def _ExtractSourcePaths(symbols, source_mapper): | 168 def _ExtractSourcePaths(symbols, source_mapper): |
| 158 """Fills in the .source_path attribute of all symbols.""" | 169 """Fills in the |source_path| attribute.""" |
| 159 logging.debug('Parsed %d .ninja files.', source_mapper.parsed_file_count) | 170 logging.debug('Parsed %d .ninja files.', source_mapper.parsed_file_count) |
| 160 | |
| 161 for symbol in symbols: | 171 for symbol in symbols: |
| 162 object_path = symbol.object_path | 172 object_path = symbol.object_path |
| 163 if symbol.source_path or not object_path: | 173 if object_path and not symbol.source_path: |
| 174 symbol.source_path = _SourcePathForObjectPath(object_path, source_mapper) | |
| 175 | |
| 176 | |
| 177 def _ComputeAnscestorPath(path_list): | |
| 178 """Returns the common anscestor of the given paths.""" | |
| 179 # Ignore missing paths. | |
| 180 path_list = [p for p in path_list if p] | |
| 181 prefix = os.path.commonprefix(path_list) | |
| 182 # Put the path count as a subdirectory to allow for better grouping when | |
| 183 # path-based breakdowns. | |
| 184 if not prefix: | |
| 185 if len(path_list) < 2: | |
| 186 return '' | |
| 187 return os.path.join('{shared}', str(len(path_list))) | |
| 188 if prefix == path_list[0]: | |
| 189 return prefix | |
| 190 assert len(path_list) > 1, 'path_list: ' + repr(path_list) | |
| 191 return os.path.join(os.path.dirname(prefix), '{shared}', str(len(path_list))) | |
| 192 | |
| 193 | |
| 194 # This must normalize object paths at the same time because normalization | |
| 195 # needs to occur before finding common ancestor. | |
| 196 def _ComputeAnscestorPathsAndNormalizeObjectPaths( | |
| 197 symbols, object_paths_by_name, source_mapper): | |
| 198 num_found_paths = 0 | |
| 199 num_unknown_names = 0 | |
| 200 num_path_mismatches = 0 | |
| 201 num_unmatched_aliases = 0 | |
| 202 for symbol in symbols: | |
| 203 name = symbol.name | |
| 204 if (symbol.IsBss() or | |
| 205 not name or | |
| 206 name[0] in '*.' or # e.g. ** merge symbols, .Lswitch.table | |
| 207 name == 'startup'): | |
| 208 symbol.object_path = _NormalizeObjectPath(symbol.object_path) | |
| 164 continue | 209 continue |
| 165 # We don't have source info for prebuilt .a files. | 210 |
| 166 if not os.path.isabs(object_path) and not object_path.startswith('..'): | 211 object_paths = object_paths_by_name.get(name) |
| 167 source_path = source_mapper.FindSourceForPath(object_path) | 212 if object_paths: |
| 168 if source_path: | 213 num_found_paths += 1 |
| 169 symbol.source_path = _NormalizeSourcePath(source_path) | 214 else: |
| 215 if not symbol.object_path and symbol.aliases: | |
| 216 # Happens when aliases are from object files where all symbols were | |
| 217 # pruned or de-duped as aliases. Since we are only scanning .o files | |
| 218 # referenced by included symbols, such files are missed. | |
| 219 # TODO(agrieve): This could be fixed by retrieving linker inputs from | |
| 220 # build.ninja, or by looking for paths within the .map file's | |
| 221 # discarded sections. | |
| 222 num_unmatched_aliases += 1 | |
| 223 continue | |
| 224 if num_unknown_names < 10: | |
| 225 logging.warning('Symbol not found in any .o files: %r', symbol) | |
| 226 num_unknown_names += 1 | |
| 227 symbol.object_path = _NormalizeObjectPath(symbol.object_path) | |
| 228 continue | |
| 229 | |
| 230 if symbol.object_path and symbol.object_path not in object_paths: | |
| 231 if num_path_mismatches < 10: | |
| 232 logging.warning('Symbol path reported by .map not found by nm.') | |
| 233 logging.warning('sym=%r', symbol) | |
| 234 logging.warning('paths=%r', object_paths) | |
| 235 num_path_mismatches += 1 | |
| 236 | |
| 237 if source_mapper: | |
| 238 source_paths = [ | |
| 239 _SourcePathForObjectPath(p, source_mapper) for p in object_paths] | |
| 240 symbol.source_path = _ComputeAnscestorPath(source_paths) | |
| 241 | |
| 242 object_paths = [_NormalizeObjectPath(p) for p in object_paths] | |
| 243 symbol.object_path = _ComputeAnscestorPath(object_paths) | |
| 244 | |
| 245 logging.debug('Cross-referenced %d symbols with nm output. ' | |
| 246 'num_unknown_names=%d num_path_mismatches=%d ' | |
| 247 'num_unused_aliases=%d', num_found_paths, num_unknown_names, | |
| 248 num_path_mismatches, num_unmatched_aliases) | |
| 249 | |
| 250 | |
| 251 def _DiscoverMissedObjectPaths(symbols, elf_object_paths): | |
| 252 # Missing object paths are caused by .a files added by -l flags, which are not | |
| 253 # listed as explicit inputs within .ninja rules. | |
| 254 parsed_inputs = set(elf_object_paths) | |
| 255 missed_inputs = set() | |
| 256 for symbol in symbols: | |
| 257 path = symbol.object_path | |
| 258 if path.endswith(')'): | |
| 259 # Convert foo/bar.a(baz.o) -> foo/bar.a | |
| 260 path = path[:path.index('(')] | |
| 261 if path and path not in parsed_inputs: | |
| 262 missed_inputs.add(path) | |
| 263 return missed_inputs | |
| 170 | 264 |
| 171 | 265 |
| 172 def _CalculatePadding(symbols): | 266 def _CalculatePadding(symbols): |
| 173 """Populates the |padding| field based on symbol addresses. | 267 """Populates the |padding| field based on symbol addresses. |
| 174 | 268 |
| 175 Symbols must already be sorted by |address|. | 269 Symbols must already be sorted by |address|. |
| 176 """ | 270 """ |
| 177 seen_sections = [] | 271 seen_sections = [] |
| 178 for i, symbol in enumerate(symbols[1:]): | 272 for i, symbol in enumerate(symbols[1:]): |
| 179 prev_symbol = symbols[i] | 273 prev_symbol = symbols[i] |
| 180 if prev_symbol.section_name != symbol.section_name: | 274 if prev_symbol.section_name != symbol.section_name: |
| 181 assert symbol.section_name not in seen_sections, ( | 275 assert symbol.section_name not in seen_sections, ( |
| 182 'Input symbols must be sorted by section, then address.') | 276 'Input symbols must be sorted by section, then address.') |
| 183 seen_sections.append(symbol.section_name) | 277 seen_sections.append(symbol.section_name) |
| 184 continue | 278 continue |
| 185 if symbol.address <= 0 or prev_symbol.address <= 0: | 279 if symbol.address <= 0 or prev_symbol.address <= 0: |
| 186 continue | 280 continue |
| 187 # Padding-only symbols happen for ** symbol gaps. | 281 |
| 188 prev_is_padding_only = prev_symbol.size_without_padding == 0 | 282 if symbol.address == prev_symbol.address: |
| 189 if symbol.address == prev_symbol.address and not prev_is_padding_only: | 283 # Padding-only symbols happen for ** symbol gaps. |
| 190 assert False, 'Found duplicate symbols:\n%r\n%r' % (prev_symbol, symbol) | 284 prev_is_padding_only = prev_symbol.size_without_padding == 0 |
|
estevenson
2017/04/28 17:06:11
This would be better to check that the symbol is i
agrieve
2017/04/28 19:26:58
Done.
| |
| 191 # Even with symbols at the same address removed, overlaps can still | 285 if not prev_is_padding_only: |
| 192 # happen. In this case, padding will be negative (and this is fine). | 286 # Must be an alias. Clone its padding. |
| 287 assert symbol.aliases, ( | |
| 288 'Found duplicate symbols:\n%r\n%r' % (prev_symbol, symbol)) | |
| 289 symbol.padding = prev_symbol.padding | |
| 290 symbol.size = prev_symbol.size | |
| 291 continue | |
| 292 | |
| 193 padding = symbol.address - prev_symbol.end_address | 293 padding = symbol.address - prev_symbol.end_address |
| 194 # These thresholds were found by manually auditing arm32 Chrome. | 294 # These thresholds were found by experimenting with arm32 Chrome. |
| 195 # E.g.: Set them to 0 and see what warnings get logged. | 295 # E.g.: Set them to 0 and see what warnings get logged, then take max value. |
| 196 # TODO(agrieve): See if these thresholds make sense for architectures | 296 # TODO(agrieve): See if these thresholds make sense for architectures |
| 197 # other than arm32. | 297 # other than arm32. |
| 198 if not symbol.name.startswith('*') and ( | 298 if not symbol.name.startswith('*') and ( |
| 199 symbol.section in 'rd' and padding >= 256 or | 299 symbol.section in 'rd' and padding >= 256 or |
| 200 symbol.section in 't' and padding >= 64): | 300 symbol.section in 't' and padding >= 64): |
| 201 # For nm data, this is caused by data that has no associated symbol. | 301 # Should not happen. |
| 202 # The linker map file lists them with no name, but with a file. | 302 logging.warning('Large padding of %d between:\n A) %r\n B) %r' % ( |
| 203 # Example: | 303 padding, prev_symbol, symbol)) |
| 204 # .data 0x02d42764 0x120 .../V8SharedWorkerGlobalScope.o | |
| 205 # Where as most look like: | |
| 206 # .data.MANGLED_NAME... | |
| 207 logging.debug('Large padding of %d between:\n A) %r\n B) %r' % ( | |
| 208 padding, prev_symbol, symbol)) | |
| 209 continue | |
| 210 symbol.padding = padding | 304 symbol.padding = padding |
| 211 symbol.size += padding | 305 symbol.size += padding |
| 212 assert symbol.size >= 0, ( | 306 assert symbol.size >= 0, ( |
| 213 'Symbol has negative size (likely not sorted propertly): ' | 307 'Symbol has negative size (likely not sorted propertly): ' |
| 214 '%r\nprev symbol: %r' % (symbol, prev_symbol)) | 308 '%r\nprev symbol: %r' % (symbol, prev_symbol)) |
| 215 | 309 |
| 216 | 310 |
| 217 def _ClusterSymbols(symbols): | 311 def _ClusterSymbols(symbols): |
| 218 """Returns a new list of symbols with some symbols moved into groups. | 312 """Returns a new list of symbols with some symbols moved into groups. |
| 219 | 313 |
| (...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 310 group_symbols, name=name_tup[0], full_name=name_tup[1], | 404 group_symbols, name=name_tup[0], full_name=name_tup[1], |
| 311 section_name=group_symbols[0].section_name) | 405 section_name=group_symbols[0].section_name) |
| 312 dest_index += 1 | 406 dest_index += 1 |
| 313 | 407 |
| 314 assert len(grouped_symbols[dest_index:None]) == len(symbols[src_index:None]) | 408 assert len(grouped_symbols[dest_index:None]) == len(symbols[src_index:None]) |
| 315 grouped_symbols[dest_index:None] = symbols[src_index:None] | 409 grouped_symbols[dest_index:None] = symbols[src_index:None] |
| 316 logging.debug('Finished making groups.') | 410 logging.debug('Finished making groups.') |
| 317 return grouped_symbols | 411 return grouped_symbols |
| 318 | 412 |
| 319 | 413 |
| 414 def _AddSymbolAliases(symbols, aliases_by_address): | |
| 415 # Step 1: Create list of (index_of_symbol, name_list). | |
| 416 logging.debug('Creating alias list') | |
| 417 replacements = [] | |
| 418 num_new_symbols = 0 | |
| 419 for i, s in enumerate(symbols): | |
| 420 # Don't alias padding-only symbols (e.g. ** symbol gap) | |
| 421 if s.size_without_padding == 0: | |
| 422 continue | |
| 423 name_list = aliases_by_address.get(s.address) | |
| 424 if name_list: | |
| 425 if s.name not in name_list: | |
| 426 logging.warning('Name missing from aliases: %s %s', s.name, name_list) | |
| 427 continue | |
| 428 replacements.append((i, name_list)) | |
| 429 num_new_symbols += len(name_list) - 1 | |
| 430 | |
| 431 # Step 2: Create new symbols as siblings to each existing one. | |
| 432 logging.debug('Creating %d aliases', num_new_symbols) | |
| 433 src_cursor_end = len(symbols) | |
| 434 symbols += [None] * num_new_symbols | |
| 435 dst_cursor_end = len(symbols) | |
| 436 for src_index, name_list in reversed(replacements): | |
| 437 # Copy over symbols that come after the current one. | |
| 438 chunk_size = src_cursor_end - src_index - 1 | |
| 439 dst_cursor_end -= chunk_size | |
| 440 src_cursor_end -= chunk_size | |
| 441 symbols[dst_cursor_end:dst_cursor_end + chunk_size] = ( | |
| 442 symbols[src_cursor_end:src_cursor_end + chunk_size]) | |
| 443 sym = symbols[src_index] | |
| 444 src_cursor_end -= 1 | |
| 445 | |
| 446 # Create aliases (does not bother reusing the existing symbol). | |
| 447 aliases = [None] * len(name_list) | |
| 448 for i, name in enumerate(name_list): | |
| 449 aliases[i] = models.Symbol( | |
| 450 sym.section_name, sym.size, address=sym.address, name=name, | |
| 451 aliases=aliases) | |
| 452 | |
| 453 dst_cursor_end -= len(aliases) | |
| 454 symbols[dst_cursor_end:dst_cursor_end + len(aliases)] = aliases | |
| 455 | |
| 456 assert dst_cursor_end == src_cursor_end | |
| 457 | |
| 458 | |
| 320 def LoadAndPostProcessSizeInfo(path): | 459 def LoadAndPostProcessSizeInfo(path): |
| 321 """Returns a SizeInfo for the given |path|.""" | 460 """Returns a SizeInfo for the given |path|.""" |
| 322 logging.debug('Loading results from: %s', path) | 461 logging.debug('Loading results from: %s', path) |
| 323 size_info = file_format.LoadSizeInfo(path) | 462 size_info = file_format.LoadSizeInfo(path) |
| 324 _PostProcessSizeInfo(size_info) | 463 _PostProcessSizeInfo(size_info) |
| 325 return size_info | 464 return size_info |
| 326 | 465 |
| 327 | 466 |
| 328 def _PostProcessSizeInfo(size_info): | 467 def _PostProcessSizeInfo(size_info): |
| 329 logging.info('Normalizing symbol names') | 468 logging.info('Normalizing symbol names') |
| 330 _NormalizeNames(size_info.raw_symbols) | 469 _NormalizeNames(size_info.raw_symbols) |
| 331 logging.info('Calculating padding') | 470 logging.info('Calculating padding') |
| 332 _CalculatePadding(size_info.raw_symbols) | 471 _CalculatePadding(size_info.raw_symbols) |
| 333 logging.info('Grouping decomposed functions') | 472 logging.info('Grouping decomposed functions') |
| 334 size_info.symbols = models.SymbolGroup( | 473 size_info.symbols = models.SymbolGroup( |
| 335 _ClusterSymbols(size_info.raw_symbols)) | 474 _ClusterSymbols(size_info.raw_symbols)) |
| 336 logging.info('Processed %d symbols', len(size_info.raw_symbols)) | 475 logging.info('Processed %d symbols', len(size_info.raw_symbols)) |
| 337 | 476 |
| 338 | 477 |
| 339 def CreateSizeInfo(map_path, lazy_paths=None, no_source_paths=False, | 478 def CreateMetadata(map_path, elf_path, apk_path, tool_prefix, output_directory): |
| 479 metadata = None | |
| 480 if elf_path: | |
| 481 logging.debug('Constructing metadata') | |
| 482 git_rev = _DetectGitRevision(os.path.dirname(elf_path)) | |
| 483 architecture = _ArchFromElf(elf_path, tool_prefix) | |
| 484 build_id = BuildIdFromElf(elf_path, tool_prefix) | |
| 485 timestamp_obj = datetime.datetime.utcfromtimestamp(os.path.getmtime( | |
| 486 elf_path)) | |
| 487 timestamp = calendar.timegm(timestamp_obj.timetuple()) | |
| 488 | |
| 489 metadata = { | |
| 490 models.METADATA_GIT_REVISION: git_rev, | |
| 491 models.METADATA_ELF_ARCHITECTURE: architecture, | |
| 492 models.METADATA_ELF_MTIME: timestamp, | |
| 493 models.METADATA_ELF_BUILD_ID: build_id, | |
| 494 } | |
| 495 | |
| 496 if output_directory: | |
| 497 relative_to_out = lambda path: os.path.relpath(path, output_directory) | |
| 498 gn_args = _ParseGnArgs(os.path.join(output_directory, 'args.gn')) | |
| 499 metadata[models.METADATA_MAP_FILENAME] = relative_to_out(map_path) | |
| 500 metadata[models.METADATA_ELF_FILENAME] = relative_to_out(elf_path) | |
| 501 metadata[models.METADATA_GN_ARGS] = gn_args | |
| 502 | |
| 503 if apk_path: | |
| 504 metadata[models.METADATA_APK_FILENAME] = relative_to_out(apk_path) | |
| 505 return metadata | |
| 506 | |
| 507 | |
| 508 def CreateSizeInfo(map_path, elf_path, tool_prefix, output_directory, | |
| 340 raw_only=False): | 509 raw_only=False): |
| 341 """Creates a SizeInfo from the given map file.""" | 510 """Creates a SizeInfo. |
| 342 # tool_prefix needed for c++filt. | |
| 343 lazy_paths.VerifyToolPrefix() | |
| 344 | 511 |
| 345 if not no_source_paths: | 512 Args: |
| 346 # Parse .ninja files at the same time as parsing the .map file. | 513 map_path: Path to the linker .map(.gz) file to parse. |
| 347 source_mapper_result = helpers.ForkAndCall( | 514 elf_path: Path to the corresponding unstripped ELF file. Used to find symbol |
| 348 ninja_parser.Parse, lazy_paths.VerifyOutputDirectory()) | 515 aliases and inlined functions. Can be None. |
| 516 tool_prefix: Prefix for c++filt & nm (required). | |
| 517 output_directory: Build output directory. If None, source_paths and symbol | |
| 518 alias information will not be recorded. | |
| 519 raw_only: Fill in just the information required for creating a .size file. | |
| 520 """ | |
| 521 source_mapper = None | |
| 522 if output_directory: | |
| 523 # Start by finding the elf_object_paths, so that nm can run on them while | |
| 524 # the linker .map is being parsed. | |
| 525 logging.info('Parsing ninja files.') | |
| 526 source_mapper, elf_object_paths = ninja_parser.Parse( | |
| 527 output_directory, elf_path) | |
| 528 assert not elf_path or elf_object_paths, ( | |
| 529 'Failed to find link command in ninja files for ' + | |
| 530 os.path.relpath(elf_path, output_directory)) | |
| 531 | |
| 532 if elf_path: | |
| 533 # Run nm on the elf file to retrieve the list of symbol names per-address. | |
| 534 # This list is required because the .map file contains only a single name | |
| 535 # for each address, yet multiple symbols are often coalesced when they are | |
| 536 # identical. This coalescing happens mainly for small symbols and for C++ | |
| 537 # templates. Such symbols make up ~500kb of libchrome.so on Android. | |
| 538 elf_nm_result = nm.CollectAliasesByAddressAsync(elf_path, tool_prefix) | |
| 539 | |
| 540 # Run nm on all .o/.a files to retrieve the symbol names within them. | |
| 541 # The list is used to detect when mutiple .o files contain the same symbol | |
| 542 # (e.g. inline functions), and to update the object_path / source_path | |
| 543 # fields accordingly. | |
| 544 # Looking in object files is required because the .map file choses a | |
| 545 # single path for these symbols. | |
| 546 # Rather than record all paths for each symbol, set the paths to be the | |
| 547 # common ancestor of all paths. | |
| 548 if output_directory: | |
| 549 bulk_analyzer = nm.BulkObjectFileAnalyzer(tool_prefix, output_directory) | |
| 550 bulk_analyzer.AnalyzePaths(elf_object_paths) | |
| 349 | 551 |
| 350 with _OpenMaybeGz(map_path) as map_file: | 552 with _OpenMaybeGz(map_path) as map_file: |
| 351 section_sizes, raw_symbols = ( | 553 section_sizes, raw_symbols = ( |
| 352 linker_map_parser.MapFileParser().Parse(map_file)) | 554 linker_map_parser.MapFileParser().Parse(map_file)) |
| 353 | 555 |
| 354 if not no_source_paths: | 556 if elf_path: |
| 355 logging.info('Extracting source paths from .ninja files') | 557 logging.debug('Validating section sizes') |
| 356 source_mapper = source_mapper_result.get() | 558 elf_section_sizes = _SectionSizesFromElf(elf_path, tool_prefix) |
| 559 for k, v in elf_section_sizes.iteritems(): | |
| 560 if v != section_sizes.get(k): | |
| 561 logging.error('ELF file and .map file do not agree on section sizes.') | |
| 562 logging.error('.map file: %r', section_sizes) | |
| 563 logging.error('readelf: %r', elf_section_sizes) | |
| 564 sys.exit(1) | |
| 565 | |
| 566 if elf_path and output_directory: | |
| 567 missed_object_paths = _DiscoverMissedObjectPaths( | |
| 568 raw_symbols, elf_object_paths) | |
| 569 bulk_analyzer.AnalyzePaths(missed_object_paths) | |
| 570 bulk_analyzer.Close() | |
| 571 | |
| 572 if source_mapper: | |
| 573 logging.info('Looking up source paths from ninja files') | |
| 357 _ExtractSourcePaths(raw_symbols, source_mapper) | 574 _ExtractSourcePaths(raw_symbols, source_mapper) |
| 358 assert source_mapper.unmatched_paths_count == 0, ( | 575 assert source_mapper.unmatched_paths_count == 0, ( |
| 359 'One or more source file paths could not be found. Likely caused by ' | 576 'One or more source file paths could not be found. Likely caused by ' |
| 360 '.ninja files being generated at a different time than the .map file.') | 577 '.ninja files being generated at a different time than the .map file.') |
| 361 | 578 |
| 362 logging.info('Stripping linker prefixes from symbol names') | 579 logging.info('Stripping linker prefixes from symbol names') |
| 363 _StripLinkerAddedSymbolPrefixes(raw_symbols) | 580 _StripLinkerAddedSymbolPrefixes(raw_symbols) |
| 364 # Map file for some reason doesn't unmangle all names. | 581 # Map file for some reason doesn't unmangle all names. |
| 365 # Unmangle prints its own log statement. | 582 # Unmangle prints its own log statement. |
| 366 _UnmangleRemainingSymbols(raw_symbols, lazy_paths.tool_prefix) | 583 _UnmangleRemainingSymbols(raw_symbols, tool_prefix) |
| 367 logging.info('Normalizing object paths') | 584 |
| 368 _NormalizeObjectPaths(raw_symbols) | 585 if elf_path: |
| 586 logging.info('Adding aliased symbols, as reported by nm') | |
| 587 # This normally does not block (it's finished by this time). | |
| 588 aliases_by_address = elf_nm_result.get() | |
| 589 _AddSymbolAliases(raw_symbols, aliases_by_address) | |
| 590 | |
| 591 if output_directory: | |
| 592 # For aliases, this provides path information where there wasn't any. | |
| 593 logging.info('Computing ancestor paths for inline functions and ' | |
| 594 'normalizing object paths') | |
| 595 | |
| 596 object_paths_by_name = bulk_analyzer.Get() | |
| 597 logging.debug('Fetched path information for %d symbols from %d files', | |
| 598 len(object_paths_by_name), | |
| 599 len(elf_object_paths) + len(missed_object_paths)) | |
| 600 _ComputeAnscestorPathsAndNormalizeObjectPaths( | |
| 601 raw_symbols, object_paths_by_name, source_mapper) | |
| 602 else: | |
| 603 logging.info('Normalizing object paths.') | |
| 604 for symbol in raw_symbols: | |
| 605 symbol.object_path = _NormalizeObjectPath(symbol.object_path) | |
| 606 | |
| 369 size_info = models.SizeInfo(section_sizes, raw_symbols) | 607 size_info = models.SizeInfo(section_sizes, raw_symbols) |
| 370 | 608 |
| 371 # Name normalization not strictly required, but makes for smaller files. | 609 # Name normalization not strictly required, but makes for smaller files. |
| 372 if raw_only: | 610 if raw_only: |
| 373 logging.info('Normalizing symbol names') | 611 logging.info('Normalizing symbol names') |
| 374 _NormalizeNames(size_info.raw_symbols) | 612 _NormalizeNames(size_info.raw_symbols) |
| 375 else: | 613 else: |
| 376 _PostProcessSizeInfo(size_info) | 614 _PostProcessSizeInfo(size_info) |
| 377 | 615 |
| 378 if logging.getLogger().isEnabledFor(logging.DEBUG): | 616 if logging.getLogger().isEnabledFor(logging.DEBUG): |
| (...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 481 any_path_within_output_directory=any_input) | 719 any_path_within_output_directory=any_input) |
| 482 if apk_path: | 720 if apk_path: |
| 483 with zipfile.ZipFile(apk_path) as z: | 721 with zipfile.ZipFile(apk_path) as z: |
| 484 lib_infos = [f for f in z.infolist() | 722 lib_infos = [f for f in z.infolist() |
| 485 if f.filename.endswith('.so') and f.file_size > 0] | 723 if f.filename.endswith('.so') and f.file_size > 0] |
| 486 assert lib_infos, 'APK has no .so files.' | 724 assert lib_infos, 'APK has no .so files.' |
| 487 # TODO(agrieve): Add support for multiple .so files, and take into account | 725 # TODO(agrieve): Add support for multiple .so files, and take into account |
| 488 # secondary architectures. | 726 # secondary architectures. |
| 489 apk_so_path = max(lib_infos, key=lambda x:x.file_size).filename | 727 apk_so_path = max(lib_infos, key=lambda x:x.file_size).filename |
| 490 logging.debug('Sub-apk path=%s', apk_so_path) | 728 logging.debug('Sub-apk path=%s', apk_so_path) |
| 491 if not elf_path: | 729 if not elf_path and lazy_paths.output_directory: |
| 492 elf_path = os.path.join( | 730 elf_path = os.path.join( |
| 493 lazy_paths.output_directory, 'lib.unstripped', | 731 lazy_paths.output_directory, 'lib.unstripped', |
| 494 os.path.basename(apk_so_path.replace('crazy.', ''))) | 732 os.path.basename(apk_so_path.replace('crazy.', ''))) |
| 495 logging.debug('Detected --elf-file=%s', elf_path) | 733 logging.debug('Detected --elf-file=%s', elf_path) |
| 496 | 734 |
| 497 if map_path: | 735 if map_path: |
| 498 if not map_path.endswith('.map') and not map_path.endswith('.map.gz'): | 736 if not map_path.endswith('.map') and not map_path.endswith('.map.gz'): |
| 499 parser.error('Expected --map-file to end with .map or .map.gz') | 737 parser.error('Expected --map-file to end with .map or .map.gz') |
| 500 else: | 738 else: |
| 501 map_path = elf_path + '.map' | 739 map_path = elf_path + '.map' |
| 502 if not os.path.exists(map_path): | 740 if not os.path.exists(map_path): |
| 503 map_path += '.gz' | 741 map_path += '.gz' |
| 504 if not os.path.exists(map_path): | 742 if not os.path.exists(map_path): |
| 505 parser.error('Could not find .map(.gz)? file. Use --map-file.') | 743 parser.error('Could not find .map(.gz)? file. Use --map-file.') |
| 506 | 744 |
| 507 metadata = None | 745 tool_prefix = lazy_paths.VerifyToolPrefix() |
| 508 if elf_path: | 746 output_directory = None |
| 509 logging.debug('Constructing metadata') | 747 if not args.no_source_paths: |
| 510 git_rev = _DetectGitRevision(os.path.dirname(elf_path)) | 748 output_directory = lazy_paths.VerifyOutputDirectory() |
| 511 architecture = _ArchFromElf(elf_path, lazy_paths.tool_prefix) | |
| 512 build_id = BuildIdFromElf(elf_path, lazy_paths.tool_prefix) | |
| 513 timestamp_obj = datetime.datetime.utcfromtimestamp(os.path.getmtime( | |
| 514 elf_path)) | |
| 515 timestamp = calendar.timegm(timestamp_obj.timetuple()) | |
| 516 gn_args = _ParseGnArgs(os.path.join(lazy_paths.output_directory, 'args.gn')) | |
| 517 | 749 |
| 518 def relative_to_out(path): | 750 metadata = CreateMetadata(map_path, elf_path, apk_path, tool_prefix, |
| 519 return os.path.relpath(path, lazy_paths.VerifyOutputDirectory()) | 751 output_directory) |
| 520 | 752 if apk_path and elf_path: |
| 521 metadata = { | 753 # Extraction takes around 1 second, so do it in parallel. |
| 522 models.METADATA_GIT_REVISION: git_rev, | 754 apk_elf_result = concurrent.ForkAndCall( |
| 523 models.METADATA_MAP_FILENAME: relative_to_out(map_path), | 755 _ElfInfoFromApk, (apk_path, apk_so_path, tool_prefix)) |
| 524 models.METADATA_ELF_ARCHITECTURE: architecture, | |
| 525 models.METADATA_ELF_FILENAME: relative_to_out(elf_path), | |
| 526 models.METADATA_ELF_MTIME: timestamp, | |
| 527 models.METADATA_ELF_BUILD_ID: build_id, | |
| 528 models.METADATA_GN_ARGS: gn_args, | |
| 529 } | |
| 530 | |
| 531 if apk_path: | |
| 532 metadata[models.METADATA_APK_FILENAME] = relative_to_out(apk_path) | |
| 533 # Extraction takes around 1 second, so do it in parallel. | |
| 534 apk_elf_result = helpers.ForkAndCall( | |
| 535 _ElfInfoFromApk, apk_path, apk_so_path, lazy_paths.tool_prefix) | |
| 536 | 756 |
| 537 size_info = CreateSizeInfo( | 757 size_info = CreateSizeInfo( |
| 538 map_path, lazy_paths, no_source_paths=args.no_source_paths, raw_only=True) | 758 map_path, elf_path, tool_prefix, output_directory, raw_only=True) |
| 539 | 759 |
| 540 if metadata: | 760 if metadata: |
| 541 size_info.metadata = metadata | 761 size_info.metadata = metadata |
| 542 logging.debug('Validating section sizes') | |
| 543 elf_section_sizes = _SectionSizesFromElf(elf_path, lazy_paths.tool_prefix) | |
| 544 for k, v in elf_section_sizes.iteritems(): | |
| 545 assert v == size_info.section_sizes.get(k), ( | |
| 546 'ELF file and .map file do not match.') | |
| 547 | 762 |
| 548 if apk_path: | 763 if apk_path: |
| 549 logging.debug('Extracting section sizes from .so within .apk') | 764 logging.debug('Extracting section sizes from .so within .apk') |
| 550 unstripped_section_sizes = size_info.section_sizes | 765 unstripped_section_sizes = size_info.section_sizes |
| 551 apk_build_id, size_info.section_sizes = apk_elf_result.get() | 766 apk_build_id, size_info.section_sizes = apk_elf_result.get() |
| 552 assert apk_build_id == build_id, ( | 767 assert apk_build_id == metadata[models.METADATA_ELF_BUILD_ID], ( |
| 553 'BuildID for %s within %s did not match the one at %s' % | 768 'BuildID for %s within %s did not match the one at %s' % |
| 554 (apk_so_path, apk_path, elf_path)) | 769 (apk_so_path, apk_path, elf_path)) |
| 555 | 770 |
| 556 packed_section_name = None | 771 packed_section_name = None |
| 772 architecture = metadata[models.METADATA_ELF_ARCHITECTURE] | |
| 557 if architecture == 'ARM': | 773 if architecture == 'ARM': |
| 558 packed_section_name = '.rel.dyn' | 774 packed_section_name = '.rel.dyn' |
| 559 elif architecture == 'AArch64': | 775 elif architecture == 'AArch64': |
| 560 packed_section_name = '.rela.dyn' | 776 packed_section_name = '.rela.dyn' |
| 561 | 777 |
| 562 if packed_section_name: | 778 if packed_section_name: |
| 563 logging.debug('Recording size of unpacked relocations') | 779 logging.debug('Recording size of unpacked relocations') |
| 564 if packed_section_name not in size_info.section_sizes: | 780 if packed_section_name not in size_info.section_sizes: |
| 565 logging.warning('Packed section not present: %s', packed_section_name) | 781 logging.warning('Packed section not present: %s', packed_section_name) |
| 566 else: | 782 else: |
| 567 size_info.section_sizes['%s (unpacked)' % packed_section_name] = ( | 783 size_info.section_sizes['%s (unpacked)' % packed_section_name] = ( |
| 568 unstripped_section_sizes.get(packed_section_name)) | 784 unstripped_section_sizes.get(packed_section_name)) |
| 569 | 785 |
| 570 logging.info('Recording metadata: \n %s', | 786 logging.info('Recording metadata: \n %s', |
| 571 '\n '.join(describe.DescribeMetadata(size_info.metadata))) | 787 '\n '.join(describe.DescribeMetadata(size_info.metadata))) |
| 572 logging.info('Saving result to %s', args.size_file) | 788 logging.info('Saving result to %s', args.size_file) |
| 573 file_format.SaveSizeInfo(size_info, args.size_file) | 789 file_format.SaveSizeInfo(size_info, args.size_file) |
| 574 logging.info('Done') | 790 logging.info('Done') |
| OLD | NEW |