Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(84)

Side by Side Diff: tools/binary_size/map2size.py

Issue 2813963002: //tools/binary_size: Consolidate most tools into "supersize" command (Closed)
Patch Set: Fix readme formatting. Make archive's --outoput-file a positional arg Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « tools/binary_size/linker_map_parser.py ('k') | tools/binary_size/match_util.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 # Copyright 2017 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 """Main Python API for analyzing binary size."""
7
8 import argparse
9 import calendar
10 import collections
11 import datetime
12 import gzip
13 import logging
14 import os
15 import re
16 import subprocess
17 import sys
18
19 import describe
20 import file_format
21 import function_signature
22 import helpers
23 import linker_map_parser
24 import models
25 import ninja_parser
26 import paths
27
28
29 def _OpenMaybeGz(path, mode=None):
30 """Calls `gzip.open()` if |path| ends in ".gz", otherwise calls `open()`."""
31 if path.endswith('.gz'):
32 if mode and 'w' in mode:
33 return gzip.GzipFile(path, mode, 1)
34 return gzip.open(path, mode)
35 return open(path, mode or 'r')
36
37
38 def _UnmangleRemainingSymbols(symbols, tool_prefix):
39 """Uses c++filt to unmangle any symbols that need it."""
40 to_process = [s for s in symbols if s.name.startswith('_Z')]
41 if not to_process:
42 return
43
44 logging.info('Unmangling %d names', len(to_process))
45 proc = subprocess.Popen([tool_prefix + 'c++filt'], stdin=subprocess.PIPE,
46 stdout=subprocess.PIPE)
47 stdout = proc.communicate('\n'.join(s.name for s in to_process))[0]
48 assert proc.returncode == 0
49
50 for i, line in enumerate(stdout.splitlines()):
51 to_process[i].name = line
52
53
54 def _NormalizeNames(symbols):
55 """Ensures that all names are formatted in a useful way.
56
57 This includes:
58 - Assigning of |full_name|.
59 - Stripping of return types in |full_name| and |name| (for functions).
60 - Stripping parameters from |name|.
61 - Moving "vtable for" and the like to be suffixes rather than prefixes.
62 """
63 found_prefixes = set()
64 for symbol in symbols:
65 if symbol.name.startswith('*'):
66 # See comment in _CalculatePadding() about when this
67 # can happen.
68 continue
69
70 # E.g.: vtable for FOO
71 idx = symbol.name.find(' for ', 0, 30)
72 if idx != -1:
73 found_prefixes.add(symbol.name[:idx + 4])
74 symbol.name = symbol.name[idx + 5:] + ' [' + symbol.name[:idx] + ']'
75
76 # E.g.: virtual thunk to FOO
77 idx = symbol.name.find(' to ', 0, 30)
78 if idx != -1:
79 found_prefixes.add(symbol.name[:idx + 3])
80 symbol.name = symbol.name[idx + 4:] + ' [' + symbol.name[:idx] + ']'
81
82 # Strip out return type, and identify where parameter list starts.
83 if symbol.section == 't':
84 symbol.full_name, symbol.name = function_signature.Parse(symbol.name)
85
86 # Remove anonymous namespaces (they just harm clustering).
87 non_anonymous = symbol.name.replace('(anonymous namespace)::', '')
88 if symbol.name != non_anonymous:
89 symbol.is_anonymous = True
90 symbol.name = non_anonymous
91 symbol.full_name = symbol.full_name.replace(
92 '(anonymous namespace)::', '')
93
94 if symbol.section != 't' and '(' in symbol.name:
95 # Pretty rare. Example:
96 # blink::CSSValueKeywordsHash::findValueImpl(char const*)::value_word_list
97 symbol.full_name = symbol.name
98 symbol.name = re.sub(r'\(.*\)', '', symbol.full_name)
99
100 # Don't bother storing both if they are the same.
101 if symbol.full_name == symbol.name:
102 symbol.full_name = ''
103
104 logging.debug('Found name prefixes of: %r', found_prefixes)
105
106
107 def _NormalizeObjectPaths(symbols):
108 """Ensures that all paths are formatted in a useful way."""
109 for symbol in symbols:
110 path = symbol.object_path
111 if path.startswith('obj/'):
112 # Convert obj/third_party/... -> third_party/...
113 path = path[4:]
114 elif path.startswith('../../'):
115 # Convert ../../third_party/... -> third_party/...
116 path = path[6:]
117 if path.endswith(')'):
118 # Convert foo/bar.a(baz.o) -> foo/bar.a/baz.o
119 start_idx = path.index('(')
120 path = os.path.join(path[:start_idx], path[start_idx + 1:-1])
121 symbol.object_path = path
122
123
124 def _NormalizeSourcePath(path):
125 if path.startswith('gen/'):
126 # Convert gen/third_party/... -> third_party/...
127 return path[4:]
128 if path.startswith('../../'):
129 # Convert ../../third_party/... -> third_party/...
130 return path[6:]
131 return path
132
133
134 def _ExtractSourcePaths(symbols, output_directory):
135 """Fills in the .source_path attribute of all symbols.
136
137 Returns True if source paths were found.
138 """
139 all_found = True
140 mapper = ninja_parser.SourceFileMapper(output_directory)
141
142 for symbol in symbols:
143 object_path = symbol.object_path
144 if symbol.source_path or not object_path:
145 continue
146 # We don't have source info for prebuilt .a files.
147 if not object_path.startswith('..'):
148 source_path = mapper.FindSourceForPath(object_path)
149 if source_path:
150 symbol.source_path = _NormalizeSourcePath(source_path)
151 else:
152 all_found = False
153 logging.warning('Could not find source path for %s', object_path)
154 logging.debug('Parsed %d .ninja files.', mapper.GetParsedFileCount())
155 return all_found
156
157
158 def _CalculatePadding(symbols):
159 """Populates the |padding| field based on symbol addresses.
160
161 Symbols must already be sorted by |address|.
162 """
163 seen_sections = []
164 for i, symbol in enumerate(symbols[1:]):
165 prev_symbol = symbols[i]
166 if prev_symbol.section_name != symbol.section_name:
167 assert symbol.section_name not in seen_sections, (
168 'Input symbols must be sorted by section, then address.')
169 seen_sections.append(symbol.section_name)
170 continue
171 if symbol.address <= 0 or prev_symbol.address <= 0:
172 continue
173 # Padding-only symbols happen for ** symbol gaps.
174 prev_is_padding_only = prev_symbol.size_without_padding == 0
175 if symbol.address == prev_symbol.address and not prev_is_padding_only:
176 assert False, 'Found duplicate symbols:\n%r\n%r' % (prev_symbol, symbol)
177 # Even with symbols at the same address removed, overlaps can still
178 # happen. In this case, padding will be negative (and this is fine).
179 padding = symbol.address - prev_symbol.end_address
180 # These thresholds were found by manually auditing arm32 Chrome.
181 # E.g.: Set them to 0 and see what warnings get logged.
182 # TODO(agrieve): See if these thresholds make sense for architectures
183 # other than arm32.
184 if not symbol.name.startswith('*') and (
185 symbol.section in 'rd' and padding >= 256 or
186 symbol.section in 't' and padding >= 64):
187 # For nm data, this is caused by data that has no associated symbol.
188 # The linker map file lists them with no name, but with a file.
189 # Example:
190 # .data 0x02d42764 0x120 .../V8SharedWorkerGlobalScope.o
191 # Where as most look like:
192 # .data.MANGLED_NAME...
193 logging.debug('Large padding of %d between:\n A) %r\n B) %r' % (
194 padding, prev_symbol, symbol))
195 continue
196 symbol.padding = padding
197 symbol.size += padding
198 assert symbol.size >= 0, (
199 'Symbol has negative size (likely not sorted propertly): '
200 '%r\nprev symbol: %r' % (symbol, prev_symbol))
201
202
203 def _ClusterSymbols(symbols):
204 """Returns a new list of symbols with some symbols moved into groups.
205
206 Groups include:
207 * Symbols that have [clone] in their name (created by compiler optimization).
208 * Star symbols (such as "** merge strings", and "** symbol gap")
209 """
210 # http://unix.stackexchange.com/questions/223013/function-symbol-gets-part-suf fix-after-compilation
211 # Example name suffixes:
212 # [clone .part.322]
213 # [clone .isra.322]
214 # [clone .constprop.1064]
215
216 # Step 1: Create name map, find clones, collect star syms into replacements.
217 logging.debug('Creating name -> symbol map')
218 clone_indices = []
219 indices_by_full_name = {}
220 # (name, full_name) -> [(index, sym),...]
221 replacements_by_name = collections.defaultdict(list)
222 for i, symbol in enumerate(symbols):
223 if symbol.name.startswith('**'):
224 # "symbol gap 3" -> "symbol gaps"
225 name = re.sub(r'\s+\d+$', 's', symbol.name)
226 replacements_by_name[(name, None)].append((i, symbol))
227 elif symbol.full_name:
228 if symbol.full_name.endswith(']') and ' [clone ' in symbol.full_name:
229 clone_indices.append(i)
230 else:
231 indices_by_full_name[symbol.full_name] = i
232
233 # Step 2: Collect same-named clone symbols.
234 logging.debug('Grouping all clones')
235 group_names_by_index = {}
236 for i in clone_indices:
237 symbol = symbols[i]
238 # Multiple attributes could exist, so search from left-to-right.
239 stripped_name = symbol.name[:symbol.name.index(' [clone ')]
240 stripped_full_name = symbol.full_name[:symbol.full_name.index(' [clone ')]
241 name_tup = (stripped_name, stripped_full_name)
242 replacement_list = replacements_by_name[name_tup]
243
244 if not replacement_list:
245 # First occurance, check for non-clone symbol.
246 non_clone_idx = indices_by_full_name.get(stripped_name)
247 if non_clone_idx is not None:
248 non_clone_symbol = symbols[non_clone_idx]
249 replacement_list.append((non_clone_idx, non_clone_symbol))
250 group_names_by_index[non_clone_idx] = stripped_name
251
252 replacement_list.append((i, symbol))
253 group_names_by_index[i] = stripped_name
254
255 # Step 3: Undo clustering when length=1.
256 # Removing these groups means Diff() logic must know about [clone] suffix.
257 to_clear = []
258 for name_tup, replacement_list in replacements_by_name.iteritems():
259 if len(replacement_list) == 1:
260 to_clear.append(name_tup)
261 for name_tup in to_clear:
262 del replacements_by_name[name_tup]
263
264 # Step 4: Replace first symbol from each cluster with a SymbolGroup.
265 before_symbol_count = sum(len(x) for x in replacements_by_name.itervalues())
266 logging.debug('Creating %d symbol groups from %d symbols. %d clones had only '
267 'one symbol.', len(replacements_by_name), before_symbol_count,
268 len(to_clear))
269
270 len_delta = len(replacements_by_name) - before_symbol_count
271 grouped_symbols = [None] * (len(symbols) + len_delta)
272 dest_index = 0
273 src_index = 0
274 seen_names = set()
275 replacement_names_by_index = {}
276 for name_tup, replacement_list in replacements_by_name.iteritems():
277 for tup in replacement_list:
278 replacement_names_by_index[tup[0]] = name_tup
279
280 sorted_items = replacement_names_by_index.items()
281 sorted_items.sort(key=lambda tup: tup[0])
282 for index, name_tup in sorted_items:
283 count = index - src_index
284 grouped_symbols[dest_index:dest_index + count] = (
285 symbols[src_index:src_index + count])
286 src_index = index + 1
287 dest_index += count
288 if name_tup not in seen_names:
289 seen_names.add(name_tup)
290 group_symbols = [tup[1] for tup in replacements_by_name[name_tup]]
291 grouped_symbols[dest_index] = models.SymbolGroup(
292 group_symbols, name=name_tup[0], full_name=name_tup[1],
293 section_name=group_symbols[0].section_name)
294 dest_index += 1
295
296 assert len(grouped_symbols[dest_index:None]) == len(symbols[src_index:None])
297 grouped_symbols[dest_index:None] = symbols[src_index:None]
298 logging.debug('Finished making groups.')
299 return grouped_symbols
300
301
302 def LoadAndPostProcessSizeInfo(path):
303 """Returns a SizeInfo for the given |path|."""
304 logging.debug('Loading results from: %s', path)
305 size_info = file_format.LoadSizeInfo(path)
306 _PostProcessSizeInfo(size_info)
307 return size_info
308
309
310 def _PostProcessSizeInfo(size_info):
311 logging.info('Normalizing symbol names')
312 _NormalizeNames(size_info.raw_symbols)
313 logging.info('Calculating padding')
314 _CalculatePadding(size_info.raw_symbols)
315 logging.info('Grouping decomposed functions')
316 size_info.symbols = models.SymbolGroup(
317 _ClusterSymbols(size_info.raw_symbols))
318 logging.info('Processed %d symbols', len(size_info.raw_symbols))
319
320
321 def CreateSizeInfo(map_path, lazy_paths=None, no_source_paths=False,
322 raw_only=False):
323 """Creates a SizeInfo from the given map file."""
324 if not no_source_paths:
325 # output_directory needed for source file information.
326 lazy_paths.VerifyOutputDirectory()
327 # tool_prefix needed for c++filt.
328 lazy_paths.VerifyToolPrefix()
329
330 with _OpenMaybeGz(map_path) as map_file:
331 section_sizes, raw_symbols = (
332 linker_map_parser.MapFileParser().Parse(map_file))
333
334 if not no_source_paths:
335 logging.info('Extracting source paths from .ninja files')
336 all_found = _ExtractSourcePaths(raw_symbols, lazy_paths.output_directory)
337 assert all_found, (
338 'One or more source file paths could not be found. Likely caused by '
339 '.ninja files being generated at a different time than the .map file.')
340 # Map file for some reason doesn't unmangle all names.
341 # Unmangle prints its own log statement.
342 _UnmangleRemainingSymbols(raw_symbols, lazy_paths.tool_prefix)
343 logging.info('Normalizing object paths')
344 _NormalizeObjectPaths(raw_symbols)
345 size_info = models.SizeInfo(section_sizes, raw_symbols)
346
347 # Name normalization not strictly required, but makes for smaller files.
348 if raw_only:
349 logging.info('Normalizing symbol names')
350 _NormalizeNames(size_info.raw_symbols)
351 else:
352 _PostProcessSizeInfo(size_info)
353
354 if logging.getLogger().isEnabledFor(logging.DEBUG):
355 for line in describe.DescribeSizeInfoCoverage(size_info):
356 logging.info(line)
357 logging.info('Recorded info for %d symbols', len(size_info.raw_symbols))
358 return size_info
359
360
361 def _DetectGitRevision(directory):
362 try:
363 git_rev = subprocess.check_output(
364 ['git', '-C', directory, 'rev-parse', 'HEAD'])
365 return git_rev.rstrip()
366 except Exception:
367 logging.warning('Failed to detect git revision for file metadata.')
368 return None
369
370
371 def BuildIdFromElf(elf_path, tool_prefix):
372 args = [tool_prefix + 'readelf', '-n', elf_path]
373 stdout = subprocess.check_output(args)
374 match = re.search(r'Build ID: (\w+)', stdout)
375 assert match, 'Build ID not found from running: ' + ' '.join(args)
376 return match.group(1)
377
378
379 def _SectionSizesFromElf(elf_path, tool_prefix):
380 args = [tool_prefix + 'readelf', '-S', '--wide', elf_path]
381 stdout = subprocess.check_output(args)
382 section_sizes = {}
383 # Matches [ 2] .hash HASH 00000000006681f0 0001f0 003154 04 A 3 0 8
384 for match in re.finditer(r'\[[\s\d]+\] (\..*)$', stdout, re.MULTILINE):
385 items = match.group(1).split()
386 section_sizes[items[0]] = int(items[4], 16)
387 return section_sizes
388
389
390 def _ParseGnArgs(args_path):
391 """Returns a list of normalized "key=value" strings."""
392 args = {}
393 with open(args_path) as f:
394 for l in f:
395 # Strips #s even if within string literal. Not a problem in practice.
396 parts = l.split('#')[0].split('=')
397 if len(parts) != 2:
398 continue
399 args[parts[0].strip()] = parts[1].strip()
400 return ["%s=%s" % x for x in sorted(args.iteritems())]
401
402
403 def main(argv):
404 parser = argparse.ArgumentParser(argv)
405 parser.add_argument('--elf-file', required=True,
406 help='Path to input ELF file. Currently used for '
407 'capturing metadata. Pass "" to skip metadata '
408 'collection.')
409 parser.add_argument('--map-file',
410 help='Path to input .map(.gz) file. Defaults to '
411 '{{elf_file}}.map(.gz)?')
412 parser.add_argument('--output-file', required=True,
413 help='Path to output .size file.')
414 parser.add_argument('--no-source-paths', action='store_true',
415 help='Do not use .ninja files to map '
416 'object_path -> source_path')
417 paths.AddOptions(parser)
418 args = helpers.AddCommonOptionsAndParseArgs(parser, argv)
419 if not args.output_file.endswith('.size'):
420 parser.error('output_file must end with .size')
421
422 if args.map_file:
423 if (not args.map_file.endswith('.map')
424 and not args.map_file.endswith('.map.gz')):
425 parser.error('Expected --map-file to end with .map or .map.gz')
426 map_file_path = args.map_file
427 else:
428 map_file_path = args.elf_file + '.map'
429 if not os.path.exists(map_file_path):
430 map_file_path += '.gz'
431 if not os.path.exists(map_file_path):
432 parser.error('Could not find .map(.gz)? file. Use --map-file.')
433
434 lazy_paths = paths.LazyPaths(args=args, input_file=args.elf_file)
435 metadata = None
436 if args.elf_file:
437 logging.debug('Constructing metadata')
438 git_rev = _DetectGitRevision(os.path.dirname(args.elf_file))
439 build_id = BuildIdFromElf(args.elf_file, lazy_paths.tool_prefix)
440 timestamp_obj = datetime.datetime.utcfromtimestamp(os.path.getmtime(
441 args.elf_file))
442 timestamp = calendar.timegm(timestamp_obj.timetuple())
443 gn_args = _ParseGnArgs(os.path.join(lazy_paths.output_directory, 'args.gn'))
444
445 def relative_to_out(path):
446 return os.path.relpath(path, lazy_paths.VerifyOutputDirectory())
447
448 metadata = {
449 models.METADATA_GIT_REVISION: git_rev,
450 models.METADATA_MAP_FILENAME: relative_to_out(map_file_path),
451 models.METADATA_ELF_FILENAME: relative_to_out(args.elf_file),
452 models.METADATA_ELF_MTIME: timestamp,
453 models.METADATA_ELF_BUILD_ID: build_id,
454 models.METADATA_GN_ARGS: gn_args,
455 }
456
457 size_info = CreateSizeInfo(map_file_path, lazy_paths,
458 no_source_paths=args.no_source_paths,
459 raw_only=True)
460
461 if metadata:
462 size_info.metadata = metadata
463 logging.debug('Validating section sizes')
464 elf_section_sizes = _SectionSizesFromElf(args.elf_file,
465 lazy_paths.tool_prefix)
466 for k, v in elf_section_sizes.iteritems():
467 assert v == size_info.section_sizes.get(k), (
468 'ELF file and .map file do not match.')
469
470 logging.info('Recording metadata: \n %s',
471 '\n '.join(describe.DescribeMetadata(size_info.metadata)))
472 logging.info('Saving result to %s', args.output_file)
473 file_format.SaveSizeInfo(size_info, args.output_file)
474 logging.info('Done')
475
476
477 if __name__ == '__main__':
478 sys.exit(main(sys.argv))
OLDNEW
« no previous file with comments | « tools/binary_size/linker_map_parser.py ('k') | tools/binary_size/match_util.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698