OLD | NEW |
---|---|
1 # Copyright 2017 The Chromium Authors. All rights reserved. | 1 # Copyright 2017 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 """Main Python API for analyzing binary size.""" | 5 """Main Python API for analyzing binary size.""" |
6 | 6 |
7 import argparse | 7 import argparse |
8 import calendar | 8 import calendar |
9 import collections | 9 import collections |
10 import datetime | 10 import datetime |
11 import gzip | 11 import gzip |
12 import logging | 12 import logging |
13 import os | 13 import os |
14 import posixpath | 14 import posixpath |
15 import re | 15 import re |
16 import subprocess | 16 import subprocess |
17 import sys | 17 import sys |
18 import tempfile | 18 import tempfile |
19 import zipfile | 19 import zipfile |
20 | 20 |
21 import concurrent | |
21 import describe | 22 import describe |
22 import file_format | 23 import file_format |
23 import function_signature | 24 import function_signature |
24 import helpers | |
25 import linker_map_parser | 25 import linker_map_parser |
26 import models | 26 import models |
27 import ninja_parser | 27 import ninja_parser |
28 import nm | |
28 import paths | 29 import paths |
29 | 30 |
30 | 31 |
31 def _OpenMaybeGz(path, mode=None): | 32 def _OpenMaybeGz(path, mode=None): |
32 """Calls `gzip.open()` if |path| ends in ".gz", otherwise calls `open()`.""" | 33 """Calls `gzip.open()` if |path| ends in ".gz", otherwise calls `open()`.""" |
33 if path.endswith('.gz'): | 34 if path.endswith('.gz'): |
34 if mode and 'w' in mode: | 35 if mode and 'w' in mode: |
35 return gzip.GzipFile(path, mode, 1) | 36 return gzip.GzipFile(path, mode, 1) |
36 return gzip.open(path, mode) | 37 return gzip.open(path, mode) |
37 return open(path, mode or 'r') | 38 return open(path, mode or 'r') |
(...skipping 13 matching lines...) Expand all Loading... | |
51 symbol.flags |= models.FLAG_UNLIKELY | 52 symbol.flags |= models.FLAG_UNLIKELY |
52 symbol.name = name[9:] | 53 symbol.name = name[9:] |
53 elif name.startswith('rel.local.'): | 54 elif name.startswith('rel.local.'): |
54 symbol.flags |= models.FLAG_REL_LOCAL | 55 symbol.flags |= models.FLAG_REL_LOCAL |
55 symbol.name = name[10:] | 56 symbol.name = name[10:] |
56 elif name.startswith('rel.'): | 57 elif name.startswith('rel.'): |
57 symbol.flags |= models.FLAG_REL | 58 symbol.flags |= models.FLAG_REL |
58 symbol.name = name[4:] | 59 symbol.name = name[4:] |
59 | 60 |
60 | 61 |
62 def _UnmangleNames(names, tool_prefix): | |
estevenson
2017/04/28 17:06:11
revert this part
agrieve
2017/04/28 19:26:58
Done.
| |
63 proc = subprocess.Popen([tool_prefix + 'c++filt'], stdin=subprocess.PIPE, | |
64 stdout=subprocess.PIPE) | |
65 stdout = proc.communicate('\n'.join(names))[0] | |
66 assert proc.returncode == 0 | |
67 return stdout.splitlines() | |
68 | |
69 | |
61 def _UnmangleRemainingSymbols(symbols, tool_prefix): | 70 def _UnmangleRemainingSymbols(symbols, tool_prefix): |
62 """Uses c++filt to unmangle any symbols that need it.""" | 71 """Uses c++filt to unmangle any symbols that need it.""" |
63 to_process = [s for s in symbols if s.name.startswith('_Z')] | 72 to_process = [s for s in symbols if s.name.startswith('_Z')] |
64 if not to_process: | 73 if not to_process: |
65 return | 74 return |
66 | 75 |
67 logging.info('Unmangling %d names', len(to_process)) | 76 logging.info('Unmangling %d names', len(to_process)) |
68 proc = subprocess.Popen([tool_prefix + 'c++filt'], stdin=subprocess.PIPE, | 77 unmangled = _UnmangleNames((s.name for s in to_process), tool_prefix) |
69 stdout=subprocess.PIPE) | 78 for i, line in enumerate(unmangled): |
70 stdout = proc.communicate('\n'.join(s.name for s in to_process))[0] | |
71 assert proc.returncode == 0 | |
72 | |
73 for i, line in enumerate(stdout.splitlines()): | |
74 to_process[i].name = line | 79 to_process[i].name = line |
75 | 80 |
76 | 81 |
77 def _NormalizeNames(symbols): | 82 def _NormalizeNames(symbols): |
78 """Ensures that all names are formatted in a useful way. | 83 """Ensures that all names are formatted in a useful way. |
79 | 84 |
80 This includes: | 85 This includes: |
81 - Assigning of |full_name|. | 86 - Assigning of |full_name|. |
82 - Stripping of return types in |full_name| and |name| (for functions). | 87 - Stripping of return types in |full_name| and |name| (for functions). |
83 - Stripping parameters from |name|. | 88 - Stripping parameters from |name|. |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
120 symbol.full_name = symbol.name | 125 symbol.full_name = symbol.name |
121 symbol.name = re.sub(r'\(.*\)', '', symbol.full_name) | 126 symbol.name = re.sub(r'\(.*\)', '', symbol.full_name) |
122 | 127 |
123 # Don't bother storing both if they are the same. | 128 # Don't bother storing both if they are the same. |
124 if symbol.full_name == symbol.name: | 129 if symbol.full_name == symbol.name: |
125 symbol.full_name = '' | 130 symbol.full_name = '' |
126 | 131 |
127 logging.debug('Found name prefixes of: %r', found_prefixes) | 132 logging.debug('Found name prefixes of: %r', found_prefixes) |
128 | 133 |
129 | 134 |
130 def _NormalizeObjectPaths(symbols): | 135 def _NormalizeObjectPath(path): |
131 """Ensures that all paths are formatted in a useful way.""" | 136 if path.startswith('obj/'): |
132 for symbol in symbols: | 137 # Convert obj/third_party/... -> third_party/... |
133 path = symbol.object_path | 138 path = path[4:] |
134 if path.startswith('obj/'): | 139 elif path.startswith('../../'): |
135 # Convert obj/third_party/... -> third_party/... | 140 # Convert ../../third_party/... -> third_party/... |
136 path = path[4:] | 141 path = path[6:] |
137 elif path.startswith('../../'): | 142 if path.endswith(')'): |
138 # Convert ../../third_party/... -> third_party/... | 143 # Convert foo/bar.a(baz.o) -> foo/bar.a/baz.o |
139 path = path[6:] | 144 start_idx = path.index('(') |
140 if path.endswith(')'): | 145 path = os.path.join(path[:start_idx], path[start_idx + 1:-1]) |
141 # Convert foo/bar.a(baz.o) -> foo/bar.a/baz.o | 146 return path |
142 start_idx = path.index('(') | |
143 path = os.path.join(path[:start_idx], path[start_idx + 1:-1]) | |
144 symbol.object_path = path | |
145 | 147 |
146 | 148 |
147 def _NormalizeSourcePath(path): | 149 def _NormalizeSourcePath(path): |
148 if path.startswith('gen/'): | 150 if path.startswith('gen/'): |
149 # Convert gen/third_party/... -> third_party/... | 151 # Convert gen/third_party/... -> third_party/... |
150 return path[4:] | 152 return path[4:] |
151 if path.startswith('../../'): | 153 if path.startswith('../../'): |
152 # Convert ../../third_party/... -> third_party/... | 154 # Convert ../../third_party/... -> third_party/... |
153 return path[6:] | 155 return path[6:] |
154 return path | 156 return path |
155 | 157 |
156 | 158 |
159 def _SourcePathForObjectPath(object_path, source_mapper): | |
160 # We don't have source info for prebuilt .a files. | |
161 if not os.path.isabs(object_path) and not object_path.startswith('..'): | |
162 source_path = source_mapper.FindSourceForPath(object_path) | |
163 if source_path: | |
164 return _NormalizeSourcePath(source_path) | |
165 return '' | |
166 | |
167 | |
157 def _ExtractSourcePaths(symbols, source_mapper): | 168 def _ExtractSourcePaths(symbols, source_mapper): |
158 """Fills in the .source_path attribute of all symbols.""" | 169 """Fills in the |source_path| attribute.""" |
159 logging.debug('Parsed %d .ninja files.', source_mapper.parsed_file_count) | 170 logging.debug('Parsed %d .ninja files.', source_mapper.parsed_file_count) |
160 | |
161 for symbol in symbols: | 171 for symbol in symbols: |
162 object_path = symbol.object_path | 172 object_path = symbol.object_path |
163 if symbol.source_path or not object_path: | 173 if object_path and not symbol.source_path: |
174 symbol.source_path = _SourcePathForObjectPath(object_path, source_mapper) | |
175 | |
176 | |
177 def _ComputeAnscestorPath(path_list): | |
178 """Returns the common anscestor of the given paths.""" | |
179 # Ignore missing paths. | |
180 path_list = [p for p in path_list if p] | |
181 prefix = os.path.commonprefix(path_list) | |
182 # Put the path count as a subdirectory to allow for better grouping when | |
183 # path-based breakdowns. | |
184 if not prefix: | |
185 if len(path_list) < 2: | |
186 return '' | |
187 return os.path.join('{shared}', str(len(path_list))) | |
188 if prefix == path_list[0]: | |
189 return prefix | |
190 assert len(path_list) > 1, 'path_list: ' + repr(path_list) | |
191 return os.path.join(os.path.dirname(prefix), '{shared}', str(len(path_list))) | |
192 | |
193 | |
194 # This must normalize object paths at the same time because normalization | |
195 # needs to occur before finding common ancestor. | |
196 def _ComputeAnscestorPathsAndNormalizeObjectPaths( | |
197 symbols, object_paths_by_name, source_mapper): | |
198 num_found_paths = 0 | |
199 num_unknown_names = 0 | |
200 num_path_mismatches = 0 | |
201 num_unmatched_aliases = 0 | |
202 for symbol in symbols: | |
203 name = symbol.name | |
204 if (symbol.IsBss() or | |
205 not name or | |
206 name[0] in '*.' or # e.g. ** merge symbols, .Lswitch.table | |
207 name == 'startup'): | |
208 symbol.object_path = _NormalizeObjectPath(symbol.object_path) | |
164 continue | 209 continue |
165 # We don't have source info for prebuilt .a files. | 210 |
166 if not os.path.isabs(object_path) and not object_path.startswith('..'): | 211 object_paths = object_paths_by_name.get(name) |
167 source_path = source_mapper.FindSourceForPath(object_path) | 212 if object_paths: |
168 if source_path: | 213 num_found_paths += 1 |
169 symbol.source_path = _NormalizeSourcePath(source_path) | 214 else: |
215 if not symbol.object_path and symbol.aliases: | |
216 # Happens when aliases are from object files where all symbols were | |
217 # pruned or de-duped as aliases. Since we are only scanning .o files | |
218 # referenced by included symbols, such files are missed. | |
219 # TODO(agrieve): This could be fixed by retrieving linker inputs from | |
220 # build.ninja, or by looking for paths within the .map file's | |
221 # discarded sections. | |
222 num_unmatched_aliases += 1 | |
223 continue | |
224 if num_unknown_names < 10: | |
225 logging.warning('Symbol not found in any .o files: %r', symbol) | |
226 num_unknown_names += 1 | |
227 symbol.object_path = _NormalizeObjectPath(symbol.object_path) | |
228 continue | |
229 | |
230 if symbol.object_path and symbol.object_path not in object_paths: | |
231 if num_path_mismatches < 10: | |
232 logging.warning('Symbol path reported by .map not found by nm.') | |
233 logging.warning('sym=%r', symbol) | |
234 logging.warning('paths=%r', object_paths) | |
235 num_path_mismatches += 1 | |
236 | |
237 if source_mapper: | |
238 source_paths = [ | |
239 _SourcePathForObjectPath(p, source_mapper) for p in object_paths] | |
240 symbol.source_path = _ComputeAnscestorPath(source_paths) | |
241 | |
242 object_paths = [_NormalizeObjectPath(p) for p in object_paths] | |
243 symbol.object_path = _ComputeAnscestorPath(object_paths) | |
244 | |
245 logging.debug('Cross-referenced %d symbols with nm output. ' | |
246 'num_unknown_names=%d num_path_mismatches=%d ' | |
247 'num_unused_aliases=%d', num_found_paths, num_unknown_names, | |
248 num_path_mismatches, num_unmatched_aliases) | |
249 | |
250 | |
251 def _DiscoverMissedObjectPaths(symbols, elf_object_paths): | |
252 # Missing object paths are caused by .a files added by -l flags, which are not | |
253 # listed as explicit inputs within .ninja rules. | |
254 parsed_inputs = set(elf_object_paths) | |
255 missed_inputs = set() | |
256 for symbol in symbols: | |
257 path = symbol.object_path | |
258 if path.endswith(')'): | |
259 # Convert foo/bar.a(baz.o) -> foo/bar.a | |
260 path = path[:path.index('(')] | |
261 if path and path not in parsed_inputs: | |
262 missed_inputs.add(path) | |
263 return missed_inputs | |
170 | 264 |
171 | 265 |
172 def _CalculatePadding(symbols): | 266 def _CalculatePadding(symbols): |
173 """Populates the |padding| field based on symbol addresses. | 267 """Populates the |padding| field based on symbol addresses. |
174 | 268 |
175 Symbols must already be sorted by |address|. | 269 Symbols must already be sorted by |address|. |
176 """ | 270 """ |
177 seen_sections = [] | 271 seen_sections = [] |
178 for i, symbol in enumerate(symbols[1:]): | 272 for i, symbol in enumerate(symbols[1:]): |
179 prev_symbol = symbols[i] | 273 prev_symbol = symbols[i] |
180 if prev_symbol.section_name != symbol.section_name: | 274 if prev_symbol.section_name != symbol.section_name: |
181 assert symbol.section_name not in seen_sections, ( | 275 assert symbol.section_name not in seen_sections, ( |
182 'Input symbols must be sorted by section, then address.') | 276 'Input symbols must be sorted by section, then address.') |
183 seen_sections.append(symbol.section_name) | 277 seen_sections.append(symbol.section_name) |
184 continue | 278 continue |
185 if symbol.address <= 0 or prev_symbol.address <= 0: | 279 if symbol.address <= 0 or prev_symbol.address <= 0: |
186 continue | 280 continue |
187 # Padding-only symbols happen for ** symbol gaps. | 281 |
188 prev_is_padding_only = prev_symbol.size_without_padding == 0 | 282 if symbol.address == prev_symbol.address: |
189 if symbol.address == prev_symbol.address and not prev_is_padding_only: | 283 # Padding-only symbols happen for ** symbol gaps. |
190 assert False, 'Found duplicate symbols:\n%r\n%r' % (prev_symbol, symbol) | 284 prev_is_padding_only = prev_symbol.size_without_padding == 0 |
estevenson
2017/04/28 17:06:11
This would be better to check that the symbol is i
agrieve
2017/04/28 19:26:58
Done.
| |
191 # Even with symbols at the same address removed, overlaps can still | 285 if not prev_is_padding_only: |
192 # happen. In this case, padding will be negative (and this is fine). | 286 # Must be an alias. Clone its padding. |
287 assert symbol.aliases, ( | |
288 'Found duplicate symbols:\n%r\n%r' % (prev_symbol, symbol)) | |
289 symbol.padding = prev_symbol.padding | |
290 symbol.size = prev_symbol.size | |
291 continue | |
292 | |
193 padding = symbol.address - prev_symbol.end_address | 293 padding = symbol.address - prev_symbol.end_address |
194 # These thresholds were found by manually auditing arm32 Chrome. | 294 # These thresholds were found by experimenting with arm32 Chrome. |
195 # E.g.: Set them to 0 and see what warnings get logged. | 295 # E.g.: Set them to 0 and see what warnings get logged, then take max value. |
196 # TODO(agrieve): See if these thresholds make sense for architectures | 296 # TODO(agrieve): See if these thresholds make sense for architectures |
197 # other than arm32. | 297 # other than arm32. |
198 if not symbol.name.startswith('*') and ( | 298 if not symbol.name.startswith('*') and ( |
199 symbol.section in 'rd' and padding >= 256 or | 299 symbol.section in 'rd' and padding >= 256 or |
200 symbol.section in 't' and padding >= 64): | 300 symbol.section in 't' and padding >= 64): |
201 # For nm data, this is caused by data that has no associated symbol. | 301 # Should not happen. |
202 # The linker map file lists them with no name, but with a file. | 302 logging.warning('Large padding of %d between:\n A) %r\n B) %r' % ( |
203 # Example: | 303 padding, prev_symbol, symbol)) |
204 # .data 0x02d42764 0x120 .../V8SharedWorkerGlobalScope.o | |
205 # Where as most look like: | |
206 # .data.MANGLED_NAME... | |
207 logging.debug('Large padding of %d between:\n A) %r\n B) %r' % ( | |
208 padding, prev_symbol, symbol)) | |
209 continue | |
210 symbol.padding = padding | 304 symbol.padding = padding |
211 symbol.size += padding | 305 symbol.size += padding |
212 assert symbol.size >= 0, ( | 306 assert symbol.size >= 0, ( |
213 'Symbol has negative size (likely not sorted propertly): ' | 307 'Symbol has negative size (likely not sorted propertly): ' |
214 '%r\nprev symbol: %r' % (symbol, prev_symbol)) | 308 '%r\nprev symbol: %r' % (symbol, prev_symbol)) |
215 | 309 |
216 | 310 |
217 def _ClusterSymbols(symbols): | 311 def _ClusterSymbols(symbols): |
218 """Returns a new list of symbols with some symbols moved into groups. | 312 """Returns a new list of symbols with some symbols moved into groups. |
219 | 313 |
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
310 group_symbols, name=name_tup[0], full_name=name_tup[1], | 404 group_symbols, name=name_tup[0], full_name=name_tup[1], |
311 section_name=group_symbols[0].section_name) | 405 section_name=group_symbols[0].section_name) |
312 dest_index += 1 | 406 dest_index += 1 |
313 | 407 |
314 assert len(grouped_symbols[dest_index:None]) == len(symbols[src_index:None]) | 408 assert len(grouped_symbols[dest_index:None]) == len(symbols[src_index:None]) |
315 grouped_symbols[dest_index:None] = symbols[src_index:None] | 409 grouped_symbols[dest_index:None] = symbols[src_index:None] |
316 logging.debug('Finished making groups.') | 410 logging.debug('Finished making groups.') |
317 return grouped_symbols | 411 return grouped_symbols |
318 | 412 |
319 | 413 |
414 def _AddSymbolAliases(symbols, aliases_by_address): | |
415 # Step 1: Create list of (index_of_symbol, name_list). | |
416 logging.debug('Creating alias list') | |
417 replacements = [] | |
418 num_new_symbols = 0 | |
419 for i, s in enumerate(symbols): | |
420 # Don't alias padding-only symbols (e.g. ** symbol gap) | |
421 if s.size_without_padding == 0: | |
422 continue | |
423 name_list = aliases_by_address.get(s.address) | |
424 if name_list: | |
425 if s.name not in name_list: | |
426 logging.warning('Name missing from aliases: %s %s', s.name, name_list) | |
427 continue | |
428 replacements.append((i, name_list)) | |
429 num_new_symbols += len(name_list) - 1 | |
430 | |
431 # Step 2: Create new symbols as siblings to each existing one. | |
432 logging.debug('Creating %d aliases', num_new_symbols) | |
433 src_cursor_end = len(symbols) | |
434 symbols += [None] * num_new_symbols | |
435 dst_cursor_end = len(symbols) | |
436 for src_index, name_list in reversed(replacements): | |
437 # Copy over symbols that come after the current one. | |
438 chunk_size = src_cursor_end - src_index - 1 | |
439 dst_cursor_end -= chunk_size | |
440 src_cursor_end -= chunk_size | |
441 symbols[dst_cursor_end:dst_cursor_end + chunk_size] = ( | |
442 symbols[src_cursor_end:src_cursor_end + chunk_size]) | |
443 sym = symbols[src_index] | |
444 src_cursor_end -= 1 | |
445 | |
446 # Create aliases (does not bother reusing the existing symbol). | |
447 aliases = [None] * len(name_list) | |
448 for i, name in enumerate(name_list): | |
449 aliases[i] = models.Symbol( | |
450 sym.section_name, sym.size, address=sym.address, name=name, | |
451 aliases=aliases) | |
452 | |
453 dst_cursor_end -= len(aliases) | |
454 symbols[dst_cursor_end:dst_cursor_end + len(aliases)] = aliases | |
455 | |
456 assert dst_cursor_end == src_cursor_end | |
457 | |
458 | |
320 def LoadAndPostProcessSizeInfo(path): | 459 def LoadAndPostProcessSizeInfo(path): |
321 """Returns a SizeInfo for the given |path|.""" | 460 """Returns a SizeInfo for the given |path|.""" |
322 logging.debug('Loading results from: %s', path) | 461 logging.debug('Loading results from: %s', path) |
323 size_info = file_format.LoadSizeInfo(path) | 462 size_info = file_format.LoadSizeInfo(path) |
324 _PostProcessSizeInfo(size_info) | 463 _PostProcessSizeInfo(size_info) |
325 return size_info | 464 return size_info |
326 | 465 |
327 | 466 |
328 def _PostProcessSizeInfo(size_info): | 467 def _PostProcessSizeInfo(size_info): |
329 logging.info('Normalizing symbol names') | 468 logging.info('Normalizing symbol names') |
330 _NormalizeNames(size_info.raw_symbols) | 469 _NormalizeNames(size_info.raw_symbols) |
331 logging.info('Calculating padding') | 470 logging.info('Calculating padding') |
332 _CalculatePadding(size_info.raw_symbols) | 471 _CalculatePadding(size_info.raw_symbols) |
333 logging.info('Grouping decomposed functions') | 472 logging.info('Grouping decomposed functions') |
334 size_info.symbols = models.SymbolGroup( | 473 size_info.symbols = models.SymbolGroup( |
335 _ClusterSymbols(size_info.raw_symbols)) | 474 _ClusterSymbols(size_info.raw_symbols)) |
336 logging.info('Processed %d symbols', len(size_info.raw_symbols)) | 475 logging.info('Processed %d symbols', len(size_info.raw_symbols)) |
337 | 476 |
338 | 477 |
339 def CreateSizeInfo(map_path, lazy_paths=None, no_source_paths=False, | 478 def CreateMetadata(map_path, elf_path, apk_path, tool_prefix, output_directory): |
479 metadata = None | |
480 if elf_path: | |
481 logging.debug('Constructing metadata') | |
482 git_rev = _DetectGitRevision(os.path.dirname(elf_path)) | |
483 architecture = _ArchFromElf(elf_path, tool_prefix) | |
484 build_id = BuildIdFromElf(elf_path, tool_prefix) | |
485 timestamp_obj = datetime.datetime.utcfromtimestamp(os.path.getmtime( | |
486 elf_path)) | |
487 timestamp = calendar.timegm(timestamp_obj.timetuple()) | |
488 | |
489 metadata = { | |
490 models.METADATA_GIT_REVISION: git_rev, | |
491 models.METADATA_ELF_ARCHITECTURE: architecture, | |
492 models.METADATA_ELF_MTIME: timestamp, | |
493 models.METADATA_ELF_BUILD_ID: build_id, | |
494 } | |
495 | |
496 if output_directory: | |
497 relative_to_out = lambda path: os.path.relpath(path, output_directory) | |
498 gn_args = _ParseGnArgs(os.path.join(output_directory, 'args.gn')) | |
499 metadata[models.METADATA_MAP_FILENAME] = relative_to_out(map_path) | |
500 metadata[models.METADATA_ELF_FILENAME] = relative_to_out(elf_path) | |
501 metadata[models.METADATA_GN_ARGS] = gn_args | |
502 | |
503 if apk_path: | |
504 metadata[models.METADATA_APK_FILENAME] = relative_to_out(apk_path) | |
505 return metadata | |
506 | |
507 | |
508 def CreateSizeInfo(map_path, elf_path, tool_prefix, output_directory, | |
340 raw_only=False): | 509 raw_only=False): |
341 """Creates a SizeInfo from the given map file.""" | 510 """Creates a SizeInfo. |
342 # tool_prefix needed for c++filt. | |
343 lazy_paths.VerifyToolPrefix() | |
344 | 511 |
345 if not no_source_paths: | 512 Args: |
346 # Parse .ninja files at the same time as parsing the .map file. | 513 map_path: Path to the linker .map(.gz) file to parse. |
347 source_mapper_result = helpers.ForkAndCall( | 514 elf_path: Path to the corresponding unstripped ELF file. Used to find symbol |
348 ninja_parser.Parse, lazy_paths.VerifyOutputDirectory()) | 515 aliases and inlined functions. Can be None. |
516 tool_prefix: Prefix for c++filt & nm (required). | |
517 output_directory: Build output directory. If None, source_paths and symbol | |
518 alias information will not be recorded. | |
519 raw_only: Fill in just the information required for creating a .size file. | |
520 """ | |
521 source_mapper = None | |
522 if output_directory: | |
523 # Start by finding the elf_object_paths, so that nm can run on them while | |
524 # the linker .map is being parsed. | |
525 logging.info('Parsing ninja files.') | |
526 source_mapper, elf_object_paths = ninja_parser.Parse( | |
527 output_directory, elf_path) | |
528 assert not elf_path or elf_object_paths, ( | |
529 'Failed to find link command in ninja files for ' + | |
530 os.path.relpath(elf_path, output_directory)) | |
531 | |
532 if elf_path: | |
533 # Run nm on the elf file to retrieve the list of symbol names per-address. | |
534 # This list is required because the .map file contains only a single name | |
535 # for each address, yet multiple symbols are often coalesced when they are | |
536 # identical. This coalescing happens mainly for small symbols and for C++ | |
537 # templates. Such symbols make up ~500kb of libchrome.so on Android. | |
538 elf_nm_result = nm.CollectAliasesByAddressAsync(elf_path, tool_prefix) | |
539 | |
540 # Run nm on all .o/.a files to retrieve the symbol names within them. | |
541 # The list is used to detect when mutiple .o files contain the same symbol | |
542 # (e.g. inline functions), and to update the object_path / source_path | |
543 # fields accordingly. | |
544 # Looking in object files is required because the .map file choses a | |
545 # single path for these symbols. | |
546 # Rather than record all paths for each symbol, set the paths to be the | |
547 # common ancestor of all paths. | |
548 if output_directory: | |
549 bulk_analyzer = nm.BulkObjectFileAnalyzer(tool_prefix, output_directory) | |
550 bulk_analyzer.AnalyzePaths(elf_object_paths) | |
349 | 551 |
350 with _OpenMaybeGz(map_path) as map_file: | 552 with _OpenMaybeGz(map_path) as map_file: |
351 section_sizes, raw_symbols = ( | 553 section_sizes, raw_symbols = ( |
352 linker_map_parser.MapFileParser().Parse(map_file)) | 554 linker_map_parser.MapFileParser().Parse(map_file)) |
353 | 555 |
354 if not no_source_paths: | 556 if elf_path: |
355 logging.info('Extracting source paths from .ninja files') | 557 logging.debug('Validating section sizes') |
356 source_mapper = source_mapper_result.get() | 558 elf_section_sizes = _SectionSizesFromElf(elf_path, tool_prefix) |
559 for k, v in elf_section_sizes.iteritems(): | |
560 if v != section_sizes.get(k): | |
561 logging.error('ELF file and .map file do not agree on section sizes.') | |
562 logging.error('.map file: %r', section_sizes) | |
563 logging.error('readelf: %r', elf_section_sizes) | |
564 sys.exit(1) | |
565 | |
566 if elf_path and output_directory: | |
567 missed_object_paths = _DiscoverMissedObjectPaths( | |
568 raw_symbols, elf_object_paths) | |
569 bulk_analyzer.AnalyzePaths(missed_object_paths) | |
570 bulk_analyzer.Close() | |
571 | |
572 if source_mapper: | |
573 logging.info('Looking up source paths from ninja files') | |
357 _ExtractSourcePaths(raw_symbols, source_mapper) | 574 _ExtractSourcePaths(raw_symbols, source_mapper) |
358 assert source_mapper.unmatched_paths_count == 0, ( | 575 assert source_mapper.unmatched_paths_count == 0, ( |
359 'One or more source file paths could not be found. Likely caused by ' | 576 'One or more source file paths could not be found. Likely caused by ' |
360 '.ninja files being generated at a different time than the .map file.') | 577 '.ninja files being generated at a different time than the .map file.') |
361 | 578 |
362 logging.info('Stripping linker prefixes from symbol names') | 579 logging.info('Stripping linker prefixes from symbol names') |
363 _StripLinkerAddedSymbolPrefixes(raw_symbols) | 580 _StripLinkerAddedSymbolPrefixes(raw_symbols) |
364 # Map file for some reason doesn't unmangle all names. | 581 # Map file for some reason doesn't unmangle all names. |
365 # Unmangle prints its own log statement. | 582 # Unmangle prints its own log statement. |
366 _UnmangleRemainingSymbols(raw_symbols, lazy_paths.tool_prefix) | 583 _UnmangleRemainingSymbols(raw_symbols, tool_prefix) |
367 logging.info('Normalizing object paths') | 584 |
368 _NormalizeObjectPaths(raw_symbols) | 585 if elf_path: |
586 logging.info('Adding aliased symbols, as reported by nm') | |
587 # This normally does not block (it's finished by this time). | |
588 aliases_by_address = elf_nm_result.get() | |
589 _AddSymbolAliases(raw_symbols, aliases_by_address) | |
590 | |
591 if output_directory: | |
592 # For aliases, this provides path information where there wasn't any. | |
593 logging.info('Computing ancestor paths for inline functions and ' | |
594 'normalizing object paths') | |
595 | |
596 object_paths_by_name = bulk_analyzer.Get() | |
597 logging.debug('Fetched path information for %d symbols from %d files', | |
598 len(object_paths_by_name), | |
599 len(elf_object_paths) + len(missed_object_paths)) | |
600 _ComputeAnscestorPathsAndNormalizeObjectPaths( | |
601 raw_symbols, object_paths_by_name, source_mapper) | |
602 else: | |
603 logging.info('Normalizing object paths.') | |
604 for symbol in raw_symbols: | |
605 symbol.object_path = _NormalizeObjectPath(symbol.object_path) | |
606 | |
369 size_info = models.SizeInfo(section_sizes, raw_symbols) | 607 size_info = models.SizeInfo(section_sizes, raw_symbols) |
370 | 608 |
371 # Name normalization not strictly required, but makes for smaller files. | 609 # Name normalization not strictly required, but makes for smaller files. |
372 if raw_only: | 610 if raw_only: |
373 logging.info('Normalizing symbol names') | 611 logging.info('Normalizing symbol names') |
374 _NormalizeNames(size_info.raw_symbols) | 612 _NormalizeNames(size_info.raw_symbols) |
375 else: | 613 else: |
376 _PostProcessSizeInfo(size_info) | 614 _PostProcessSizeInfo(size_info) |
377 | 615 |
378 if logging.getLogger().isEnabledFor(logging.DEBUG): | 616 if logging.getLogger().isEnabledFor(logging.DEBUG): |
(...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
481 any_path_within_output_directory=any_input) | 719 any_path_within_output_directory=any_input) |
482 if apk_path: | 720 if apk_path: |
483 with zipfile.ZipFile(apk_path) as z: | 721 with zipfile.ZipFile(apk_path) as z: |
484 lib_infos = [f for f in z.infolist() | 722 lib_infos = [f for f in z.infolist() |
485 if f.filename.endswith('.so') and f.file_size > 0] | 723 if f.filename.endswith('.so') and f.file_size > 0] |
486 assert lib_infos, 'APK has no .so files.' | 724 assert lib_infos, 'APK has no .so files.' |
487 # TODO(agrieve): Add support for multiple .so files, and take into account | 725 # TODO(agrieve): Add support for multiple .so files, and take into account |
488 # secondary architectures. | 726 # secondary architectures. |
489 apk_so_path = max(lib_infos, key=lambda x:x.file_size).filename | 727 apk_so_path = max(lib_infos, key=lambda x:x.file_size).filename |
490 logging.debug('Sub-apk path=%s', apk_so_path) | 728 logging.debug('Sub-apk path=%s', apk_so_path) |
491 if not elf_path: | 729 if not elf_path and lazy_paths.output_directory: |
492 elf_path = os.path.join( | 730 elf_path = os.path.join( |
493 lazy_paths.output_directory, 'lib.unstripped', | 731 lazy_paths.output_directory, 'lib.unstripped', |
494 os.path.basename(apk_so_path.replace('crazy.', ''))) | 732 os.path.basename(apk_so_path.replace('crazy.', ''))) |
495 logging.debug('Detected --elf-file=%s', elf_path) | 733 logging.debug('Detected --elf-file=%s', elf_path) |
496 | 734 |
497 if map_path: | 735 if map_path: |
498 if not map_path.endswith('.map') and not map_path.endswith('.map.gz'): | 736 if not map_path.endswith('.map') and not map_path.endswith('.map.gz'): |
499 parser.error('Expected --map-file to end with .map or .map.gz') | 737 parser.error('Expected --map-file to end with .map or .map.gz') |
500 else: | 738 else: |
501 map_path = elf_path + '.map' | 739 map_path = elf_path + '.map' |
502 if not os.path.exists(map_path): | 740 if not os.path.exists(map_path): |
503 map_path += '.gz' | 741 map_path += '.gz' |
504 if not os.path.exists(map_path): | 742 if not os.path.exists(map_path): |
505 parser.error('Could not find .map(.gz)? file. Use --map-file.') | 743 parser.error('Could not find .map(.gz)? file. Use --map-file.') |
506 | 744 |
507 metadata = None | 745 tool_prefix = lazy_paths.VerifyToolPrefix() |
508 if elf_path: | 746 output_directory = None |
509 logging.debug('Constructing metadata') | 747 if not args.no_source_paths: |
510 git_rev = _DetectGitRevision(os.path.dirname(elf_path)) | 748 output_directory = lazy_paths.VerifyOutputDirectory() |
511 architecture = _ArchFromElf(elf_path, lazy_paths.tool_prefix) | |
512 build_id = BuildIdFromElf(elf_path, lazy_paths.tool_prefix) | |
513 timestamp_obj = datetime.datetime.utcfromtimestamp(os.path.getmtime( | |
514 elf_path)) | |
515 timestamp = calendar.timegm(timestamp_obj.timetuple()) | |
516 gn_args = _ParseGnArgs(os.path.join(lazy_paths.output_directory, 'args.gn')) | |
517 | 749 |
518 def relative_to_out(path): | 750 metadata = CreateMetadata(map_path, elf_path, apk_path, tool_prefix, |
519 return os.path.relpath(path, lazy_paths.VerifyOutputDirectory()) | 751 output_directory) |
520 | 752 if apk_path and elf_path: |
521 metadata = { | 753 # Extraction takes around 1 second, so do it in parallel. |
522 models.METADATA_GIT_REVISION: git_rev, | 754 apk_elf_result = concurrent.ForkAndCall( |
523 models.METADATA_MAP_FILENAME: relative_to_out(map_path), | 755 _ElfInfoFromApk, (apk_path, apk_so_path, tool_prefix)) |
524 models.METADATA_ELF_ARCHITECTURE: architecture, | |
525 models.METADATA_ELF_FILENAME: relative_to_out(elf_path), | |
526 models.METADATA_ELF_MTIME: timestamp, | |
527 models.METADATA_ELF_BUILD_ID: build_id, | |
528 models.METADATA_GN_ARGS: gn_args, | |
529 } | |
530 | |
531 if apk_path: | |
532 metadata[models.METADATA_APK_FILENAME] = relative_to_out(apk_path) | |
533 # Extraction takes around 1 second, so do it in parallel. | |
534 apk_elf_result = helpers.ForkAndCall( | |
535 _ElfInfoFromApk, apk_path, apk_so_path, lazy_paths.tool_prefix) | |
536 | 756 |
537 size_info = CreateSizeInfo( | 757 size_info = CreateSizeInfo( |
538 map_path, lazy_paths, no_source_paths=args.no_source_paths, raw_only=True) | 758 map_path, elf_path, tool_prefix, output_directory, raw_only=True) |
539 | 759 |
540 if metadata: | 760 if metadata: |
541 size_info.metadata = metadata | 761 size_info.metadata = metadata |
542 logging.debug('Validating section sizes') | |
543 elf_section_sizes = _SectionSizesFromElf(elf_path, lazy_paths.tool_prefix) | |
544 for k, v in elf_section_sizes.iteritems(): | |
545 assert v == size_info.section_sizes.get(k), ( | |
546 'ELF file and .map file do not match.') | |
547 | 762 |
548 if apk_path: | 763 if apk_path: |
549 logging.debug('Extracting section sizes from .so within .apk') | 764 logging.debug('Extracting section sizes from .so within .apk') |
550 unstripped_section_sizes = size_info.section_sizes | 765 unstripped_section_sizes = size_info.section_sizes |
551 apk_build_id, size_info.section_sizes = apk_elf_result.get() | 766 apk_build_id, size_info.section_sizes = apk_elf_result.get() |
552 assert apk_build_id == build_id, ( | 767 assert apk_build_id == metadata[models.METADATA_ELF_BUILD_ID], ( |
553 'BuildID for %s within %s did not match the one at %s' % | 768 'BuildID for %s within %s did not match the one at %s' % |
554 (apk_so_path, apk_path, elf_path)) | 769 (apk_so_path, apk_path, elf_path)) |
555 | 770 |
556 packed_section_name = None | 771 packed_section_name = None |
772 architecture = metadata[models.METADATA_ELF_ARCHITECTURE] | |
557 if architecture == 'ARM': | 773 if architecture == 'ARM': |
558 packed_section_name = '.rel.dyn' | 774 packed_section_name = '.rel.dyn' |
559 elif architecture == 'AArch64': | 775 elif architecture == 'AArch64': |
560 packed_section_name = '.rela.dyn' | 776 packed_section_name = '.rela.dyn' |
561 | 777 |
562 if packed_section_name: | 778 if packed_section_name: |
563 logging.debug('Recording size of unpacked relocations') | 779 logging.debug('Recording size of unpacked relocations') |
564 if packed_section_name not in size_info.section_sizes: | 780 if packed_section_name not in size_info.section_sizes: |
565 logging.warning('Packed section not present: %s', packed_section_name) | 781 logging.warning('Packed section not present: %s', packed_section_name) |
566 else: | 782 else: |
567 size_info.section_sizes['%s (unpacked)' % packed_section_name] = ( | 783 size_info.section_sizes['%s (unpacked)' % packed_section_name] = ( |
568 unstripped_section_sizes.get(packed_section_name)) | 784 unstripped_section_sizes.get(packed_section_name)) |
569 | 785 |
570 logging.info('Recording metadata: \n %s', | 786 logging.info('Recording metadata: \n %s', |
571 '\n '.join(describe.DescribeMetadata(size_info.metadata))) | 787 '\n '.join(describe.DescribeMetadata(size_info.metadata))) |
572 logging.info('Saving result to %s', args.size_file) | 788 logging.info('Saving result to %s', args.size_file) |
573 file_format.SaveSizeInfo(size_info, args.size_file) | 789 file_format.SaveSizeInfo(size_info, args.size_file) |
574 logging.info('Done') | 790 logging.info('Done') |
OLD | NEW |