Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(345)

Side by Side Diff: tools/binary_size/analyze.py

Issue 2778963003: Revert of V2 of //tools/binary_size rewrite (diffs). (Closed)
Patch Set: Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « tools/binary_size/README.md ('k') | tools/binary_size/binary_size_utils.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 # Copyright 2017 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 """Main Python API for analyzing binary size."""
7
8 import argparse
9 import ast
10 import distutils.spawn
11 import gzip
12 import logging
13 import os
14 import re
15 import subprocess
16
17 import function_signature
18 import helpers
19 import mapfileparser
20 import symbols
21
22
23 # File format version for .size files.
24 _SERIALIZATION_VERSION = 1
25
26
27 def _OpenMaybeGz(path, mode=None):
28 """Calls `gzip.open()` if |path| ends in ".gz", otherwise calls `open()`."""
29 if path.endswith('.gz'):
30 if mode and 'w' in mode:
31 return gzip.GzipFile(path, mode, 1)
32 return gzip.open(path, mode)
33 return open(path, mode or 'r')
34
35
36 def _EndsWithMaybeGz(path, suffix):
37 return path.endswith(suffix) or path.endswith(suffix + '.gz')
38
39
40 def _IterLines(s):
41 prev_idx = -1
42 while True:
43 idx = s.find('\n', prev_idx + 1)
44 if idx == -1:
45 return
46 yield s[prev_idx + 1:idx]
47 prev_idx = idx
48
49
50 def _UnmangleRemainingSymbols(symbol_group, tool_prefix):
51 """Uses c++filt to unmangle any symbols that need it."""
52 to_process = [s for s in symbol_group if s.name and s.name.startswith('_Z')]
53 if not to_process:
54 return
55
56 logging.info('Unmangling %d names', len(to_process))
57 proc = subprocess.Popen([tool_prefix + 'c++filt'], stdin=subprocess.PIPE,
58 stdout=subprocess.PIPE)
59 stdout = proc.communicate('\n'.join(s.name for s in to_process))[0]
60 assert proc.returncode == 0
61
62 for i, line in enumerate(_IterLines(stdout)):
63 to_process[i].name = line
64
65
66 def _NormalizeNames(symbol_group):
67 """Ensures that all names are formatted in a useful way.
68
69 This includes:
70 - Assigning of |function_signature| (for functions).
71 - Stripping of return types in |function_signature| and |name|.
72 - Stripping parameters from |name|.
73 - Moving "vtable for" and the like to be suffixes rather than prefixes.
74 """
75 found_prefixes = set()
76 for symbol in symbol_group:
77 if not symbol.name or symbol.name.startswith('*'):
78 # See comment in _RemoveDuplicatesAndCalculatePadding() about when this
79 # can happen.
80 continue
81
82 # E.g.: vtable for FOO
83 idx = symbol.name.find(' for ', 0, 30)
84 if idx != -1:
85 found_prefixes.add(symbol.name[:idx + 4])
86 symbol.name = symbol.name[idx + 5:] + ' [' + symbol.name[:idx] + ']'
87
88 # E.g.: virtual thunk to FOO
89 idx = symbol.name.find(' to ', 0, 30)
90 if idx != -1:
91 found_prefixes.add(symbol.name[:idx + 3])
92 symbol.name = symbol.name[idx + 4:] + ' [' + symbol.name[:idx] + ']'
93
94 # Strip out return type, and identify where parameter list starts.
95 if symbol.section == 't':
96 symbol.function_signature, symbol.name = (
97 function_signature.Parse(symbol.name))
98
99 # Remove anonymous namespaces (they just harm clustering).
100 symbol.name = symbol.name.replace('(anonymous namespace)::', '')
101
102 logging.debug('Found name prefixes of: %r', found_prefixes)
103
104
105 def _NormalizeObjectPaths(symbol_group):
106 """Ensures that all paths are formatted in a useful way."""
107 for symbol in symbol_group:
108 if symbol.path:
109 if symbol.path.startswith('obj/'):
110 # Convert obj/third_party/... -> third_party/...
111 symbol.path = symbol.path[4:]
112 elif symbol.path.startswith('../../'):
113 # Convert ../../third_party/... -> third_party/...
114 symbol.path = symbol.path[6:]
115 if symbol.path.endswith(')'):
116 # Convert foo/bar.a(baz.o) -> foo/bar.a/baz.o
117 start_idx = symbol.path.index('(')
118 paren_path = symbol.path[start_idx + 1:-1]
119 symbol.path = symbol.path[:start_idx] + os.path.sep + paren_path
120
121
122 def _RemoveDuplicatesAndCalculatePadding(symbol_group):
123 """Removes symbols at the same address and calculates the |padding| field.
124
125 Symbols must already be sorted by |address|.
126 """
127 i = 0
128 to_remove = set()
129 all_symbols = symbol_group.symbols
130 for i in xrange(len(all_symbols)):
131 prev_symbol = all_symbols[i - 1]
132 symbol = all_symbols[i]
133 if prev_symbol.section_name is not symbol.section_name:
134 continue
135 if symbol.address > 0 and prev_symbol.address > 0:
136 # Fold symbols that are at the same address (happens in nm output).
137 if symbol.address == prev_symbol.address:
138 symbol.size = max(prev_symbol.size, symbol.size)
139 to_remove.add(i)
140 continue
141 # Even with symbols at the same address removed, overlaps can still
142 # happen. In this case, padding will be negative (and this is fine).
143 padding = symbol.address - prev_symbol.end_address
144 # These thresholds were found by manually auditing arm32 Chrome.
145 # E.g.: Set them to 0 and see what warnings get logged.
146 # TODO(agrieve): See if these thresholds make sense for architectures
147 # other than arm32.
148 if (symbol.section in 'rd' and padding >= 256 or
149 symbol.section in 't' and padding >= 64):
150 # For nm data, this is caused by data that has no associated symbol.
151 # The linker map file lists them with no name, but with a file.
152 # Example:
153 # .data 0x02d42764 0x120 .../V8SharedWorkerGlobalScope.o
154 # Where as most look like:
155 # .data.MANGLED_NAME...
156 logging.debug('Large padding of %d between:\n A) %r\n B) %r' % (
157 padding, prev_symbol, symbol))
158 continue
159 symbol.padding = padding
160 symbol.size += padding
161 assert symbol.size >= 0, 'Symbol has negative size: %r' % symbol
162 # Map files have no overlaps, so worth special-casing the no-op case.
163 if to_remove:
164 logging.info('Removing %d overlapping symbols', len(to_remove))
165 symbol_group.symbols = (
166 [s for i, s in enumerate(all_symbols) if i not in to_remove])
167
168
169 def _PrintStats(result, write_func):
170 """Prints out how accurate |result| is."""
171 for section in symbols.SECTION_TO_SECTION_NAME:
172 if section == 'd':
173 expected_size = sum(v for k, v in result.section_sizes.iteritems()
174 if k.startswith('.data'))
175 else:
176 expected_size = result.section_sizes[
177 symbols.SECTION_TO_SECTION_NAME[section]]
178
179 def one_stat(group):
180 template = ('Section %s has %.1f%% of %d bytes accounted for from '
181 '%d symbols. %d bytes are unaccounted for. Padding '
182 'accounts for %d bytes\n')
183 actual_size = group.size
184 count = len(group)
185 padding = group.padding
186 size_percent = 100.0 * actual_size / expected_size
187 return (template % (section, size_percent, actual_size, count,
188 expected_size - actual_size, padding))
189
190 in_section = result.symbol_group.WhereInSection(section)
191 write_func(one_stat(in_section))
192
193 star_syms = in_section.WhereNameMatches(r'^\*')
194 attributed_syms = star_syms.Inverted().WhereHasAnyAttribution()
195 anonymous_syms = attributed_syms.Inverted()
196 if star_syms or anonymous_syms:
197 missing_size = star_syms.size + anonymous_syms.size
198 write_func(('+ Without %d merge sections and %d anonymous entries ('
199 'accounting for %d bytes):\n') % (
200 len(star_syms), len(anonymous_syms), missing_size))
201 write_func('+ ' + one_stat(attributed_syms))
202
203
204 def _SaveResult(result, file_obj):
205 """Saves the result to the given file object."""
206 # Store one bucket per line.
207 file_obj.write('%d\n' % _SERIALIZATION_VERSION)
208 file_obj.write('%r\n' % result.section_sizes)
209 file_obj.write('%d\n' % len(result.symbol_group))
210 prev_section_name = None
211 # Store symbol fields as tab-separated.
212 # Store only non-derived fields.
213 for symbol in result.symbol_group:
214 if symbol.section_name != prev_section_name:
215 file_obj.write('%s\n' % symbol.section_name)
216 prev_section_name = symbol.section_name
217 # Don't write padding nor name since these are derived values.
218 file_obj.write('%x\t%x\t%s\t%s\n' % (
219 symbol.address, symbol.size_without_padding,
220 symbol.function_signature or symbol.name or '',
221 symbol.path or ''))
222
223
224 def _LoadResults(file_obj):
225 """Loads a result from the given file."""
226 lines = iter(file_obj)
227 actual_version = int(next(lines))
228 assert actual_version == _SERIALIZATION_VERSION, (
229 'Version mismatch. Need to write some upgrade code.')
230
231 section_sizes = ast.literal_eval(next(lines))
232 num_syms = int(next(lines))
233 symbol_list = [None] * num_syms
234 section_name = None
235 for i in xrange(num_syms):
236 line = next(lines)[:-1]
237 if '\t' not in line:
238 section_name = intern(line)
239 line = next(lines)[:-1]
240 new_sym = symbols.Symbol.__new__(symbols.Symbol)
241 parts = line.split('\t')
242 new_sym.section_name = section_name
243 new_sym.address = int(parts[0], 16)
244 new_sym.size = int(parts[1], 16)
245 new_sym.name = parts[2] or None
246 new_sym.path = parts[3] or None
247 new_sym.padding = 0 # Derived
248 new_sym.function_signature = None # Derived
249 symbol_list[i] = new_sym
250
251 # Recompute derived values (padding and function names).
252 result = mapfileparser.ParseResult(symbol_list, section_sizes)
253 logging.info('Calculating padding')
254 _RemoveDuplicatesAndCalculatePadding(result.symbol_group)
255 logging.info('Deriving signatures')
256 # Re-parse out function parameters.
257 _NormalizeNames(result.symbol_group.WhereInSection('t'))
258 return result
259
260
261 def AddOptions(parser):
262 parser.add_argument('input_file',
263 help='Path to input file. Can be a linker .map file, an '
264 'unstripped binary, or a saved result from '
265 'analyze.py')
266 parser.add_argument('--tool-prefix', default='',
267 help='Path prefix for c++filt.')
268 parser.add_argument('--output-directory',
269 help='Path to the root build directory.')
270
271
272 def _DetectToolPrefix(tool_prefix, input_file, output_directory=None):
273 """Calls Analyze with values from args."""
274 if not output_directory:
275 abs_path = os.path.abspath(input_file)
276 release_idx = abs_path.find('Release')
277 if release_idx != -1:
278 output_directory = abs_path[:release_idx] + 'Release'
279 output_directory = os.path.relpath(abs_path[:release_idx] + '/Release')
280 logging.debug('Detected --output-directory=%s', output_directory)
281
282 if not tool_prefix and output_directory:
283 # Auto-detect from build_vars.txt
284 build_vars_path = os.path.join(output_directory, 'build_vars.txt')
285 if os.path.exists(build_vars_path):
286 with open(build_vars_path) as f:
287 build_vars = dict(l.rstrip().split('=', 1) for l in f if '=' in l)
288 logging.debug('Found --tool-prefix from build_vars.txt')
289 tool_prefix = build_vars['android_tool_prefix']
290
291 if os.path.sep not in tool_prefix:
292 full_path = distutils.spawn.find_executable(tool_prefix + 'c++filt')
293 else:
294 full_path = tool_prefix + 'c++filt'
295
296 if not os.path.isfile(full_path):
297 raise Exception('Bad --tool-prefix. Path not found: %s' % full_path)
298 logging.info('Using --tool-prefix=%s', tool_prefix)
299 return tool_prefix
300
301
302 def AnalyzeWithArgs(args):
303 return Analyze(args.input_file, args.output_directory, args.tool_prefix)
304
305
306 def Analyze(path, output_directory=None, tool_prefix=''):
307 if _EndsWithMaybeGz(path, '.size'):
308 logging.info('Loading cached results.')
309 with _OpenMaybeGz(path) as f:
310 result = _LoadResults(f)
311 elif not _EndsWithMaybeGz(path, '.map'):
312 raise Exception('Expected input to be a .map or a .size')
313 else:
314 # Verify tool_prefix early.
315 tool_prefix = _DetectToolPrefix(tool_prefix, path, output_directory)
316
317 with _OpenMaybeGz(path) as map_file:
318 result = mapfileparser.MapFileParser().Parse(map_file)
319
320 # Map file for some reason doesn't unmangle all names.
321 logging.info('Calculating padding')
322 _RemoveDuplicatesAndCalculatePadding(result.symbol_group)
323 # Unmangle prints its own log statement.
324 _UnmangleRemainingSymbols(result.symbol_group, tool_prefix)
325 # Resolve paths prints its own log statement.
326 logging.info('Normalizing names')
327 _NormalizeNames(result.symbol_group)
328 logging.info('Normalizing paths')
329 _NormalizeObjectPaths(result.symbol_group)
330
331 if logging.getLogger().isEnabledFor(logging.INFO):
332 _PrintStats(result, lambda l: logging.info(l.rstrip()))
333 logging.info('Finished analyzing %d symbols', len(result.symbol_group))
334 return result
335
336
337 def main():
338 parser = argparse.ArgumentParser()
339 parser.add_argument('--output', required=True,
340 help='Path to store results. Must end in .size or '
341 '.size.gz')
342 AddOptions(parser)
343 args = helpers.AddCommonOptionsAndParseArgs(parser)
344 if not _EndsWithMaybeGz(args.output, '.size'):
345 raise Exception('--output must end with .size or .size.gz')
346
347 result = AnalyzeWithArgs(args)
348 logging.info('Saving result to %s', args.output)
349 with _OpenMaybeGz(args.output, 'wb') as f:
350 _SaveResult(result, f)
351
352 logging.info('Done')
353
354
355 if __name__ == '__main__':
356 main()
OLDNEW
« no previous file with comments | « tools/binary_size/README.md ('k') | tools/binary_size/binary_size_utils.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698