Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(8)

Side by Side Diff: tools/binary_size/map2size.py

Issue 2785483002: Reland of V2 of //tools/binary_size rewrite (diffs). (Closed)
Patch Set: add missing name= Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « tools/binary_size/linker_map_parser.py ('k') | tools/binary_size/mapfileparser.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # Copyright 2017 The Chromium Authors. All rights reserved. 2 # Copyright 2017 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be 3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file. 4 # found in the LICENSE file.
5 5
6 """Main Python API for analyzing binary size.""" 6 """Main Python API for analyzing binary size."""
7 7
8 import argparse 8 import argparse
9 import ast
10 import distutils.spawn 9 import distutils.spawn
11 import gzip
12 import logging 10 import logging
13 import os 11 import os
14 import re
15 import subprocess 12 import subprocess
13 import sys
16 14
15 import describe
16 import file_format
17 import function_signature 17 import function_signature
18 import helpers 18 import helpers
19 import mapfileparser 19 import linker_map_parser
20 import symbols 20 import models
21
22
23 # File format version for .size files.
24 _SERIALIZATION_VERSION = 1
25
26
27 def _OpenMaybeGz(path, mode=None):
28 """Calls `gzip.open()` if |path| ends in ".gz", otherwise calls `open()`."""
29 if path.endswith('.gz'):
30 if mode and 'w' in mode:
31 return gzip.GzipFile(path, mode, 1)
32 return gzip.open(path, mode)
33 return open(path, mode or 'r')
34
35
36 def _EndsWithMaybeGz(path, suffix):
37 return path.endswith(suffix) or path.endswith(suffix + '.gz')
38 21
39 22
40 def _IterLines(s): 23 def _IterLines(s):
41 prev_idx = -1 24 prev_idx = -1
42 while True: 25 while True:
43 idx = s.find('\n', prev_idx + 1) 26 idx = s.find('\n', prev_idx + 1)
44 if idx == -1: 27 if idx == -1:
45 return 28 return
46 yield s[prev_idx + 1:idx] 29 yield s[prev_idx + 1:idx]
47 prev_idx = idx 30 prev_idx = idx
48 31
49 32
50 def _UnmangleRemainingSymbols(symbol_group, tool_prefix): 33 def _UnmangleRemainingSymbols(symbol_group, tool_prefix):
51 """Uses c++filt to unmangle any symbols that need it.""" 34 """Uses c++filt to unmangle any symbols that need it."""
52 to_process = [s for s in symbol_group if s.name and s.name.startswith('_Z')] 35 to_process = [s for s in symbol_group if s.name.startswith('_Z')]
53 if not to_process: 36 if not to_process:
54 return 37 return
55 38
56 logging.info('Unmangling %d names', len(to_process)) 39 logging.info('Unmangling %d names', len(to_process))
57 proc = subprocess.Popen([tool_prefix + 'c++filt'], stdin=subprocess.PIPE, 40 proc = subprocess.Popen([tool_prefix + 'c++filt'], stdin=subprocess.PIPE,
58 stdout=subprocess.PIPE) 41 stdout=subprocess.PIPE)
59 stdout = proc.communicate('\n'.join(s.name for s in to_process))[0] 42 stdout = proc.communicate('\n'.join(s.name for s in to_process))[0]
60 assert proc.returncode == 0 43 assert proc.returncode == 0
61 44
62 for i, line in enumerate(_IterLines(stdout)): 45 for i, line in enumerate(_IterLines(stdout)):
63 to_process[i].name = line 46 to_process[i].name = line
64 47
65 48
66 def _NormalizeNames(symbol_group): 49 def _NormalizeNames(symbol_group):
67 """Ensures that all names are formatted in a useful way. 50 """Ensures that all names are formatted in a useful way.
68 51
69 This includes: 52 This includes:
70 - Assigning of |function_signature| (for functions). 53 - Assigning of |function_signature| (for functions).
71 - Stripping of return types in |function_signature| and |name|. 54 - Stripping of return types in |function_signature| and |name|.
72 - Stripping parameters from |name|. 55 - Stripping parameters from |name|.
73 - Moving "vtable for" and the like to be suffixes rather than prefixes. 56 - Moving "vtable for" and the like to be suffixes rather than prefixes.
74 """ 57 """
75 found_prefixes = set() 58 found_prefixes = set()
76 for symbol in symbol_group: 59 for symbol in symbol_group:
77 if not symbol.name or symbol.name.startswith('*'): 60 if symbol.name.startswith('*'):
78 # See comment in _RemoveDuplicatesAndCalculatePadding() about when this 61 # See comment in _RemoveDuplicatesAndCalculatePadding() about when this
79 # can happen. 62 # can happen.
80 continue 63 continue
81 64
82 # E.g.: vtable for FOO 65 # E.g.: vtable for FOO
83 idx = symbol.name.find(' for ', 0, 30) 66 idx = symbol.name.find(' for ', 0, 30)
84 if idx != -1: 67 if idx != -1:
85 found_prefixes.add(symbol.name[:idx + 4]) 68 found_prefixes.add(symbol.name[:idx + 4])
86 symbol.name = symbol.name[idx + 5:] + ' [' + symbol.name[:idx] + ']' 69 symbol.name = symbol.name[idx + 5:] + ' [' + symbol.name[:idx] + ']'
87 70
(...skipping 10 matching lines...) Expand all
98 81
99 # Remove anonymous namespaces (they just harm clustering). 82 # Remove anonymous namespaces (they just harm clustering).
100 symbol.name = symbol.name.replace('(anonymous namespace)::', '') 83 symbol.name = symbol.name.replace('(anonymous namespace)::', '')
101 84
102 logging.debug('Found name prefixes of: %r', found_prefixes) 85 logging.debug('Found name prefixes of: %r', found_prefixes)
103 86
104 87
105 def _NormalizeObjectPaths(symbol_group): 88 def _NormalizeObjectPaths(symbol_group):
106 """Ensures that all paths are formatted in a useful way.""" 89 """Ensures that all paths are formatted in a useful way."""
107 for symbol in symbol_group: 90 for symbol in symbol_group:
108 if symbol.path: 91 if symbol.path.startswith('obj/'):
109 if symbol.path.startswith('obj/'): 92 # Convert obj/third_party/... -> third_party/...
110 # Convert obj/third_party/... -> third_party/... 93 symbol.path = symbol.path[4:]
111 symbol.path = symbol.path[4:] 94 elif symbol.path.startswith('../../'):
112 elif symbol.path.startswith('../../'): 95 # Convert ../../third_party/... -> third_party/...
113 # Convert ../../third_party/... -> third_party/... 96 symbol.path = symbol.path[6:]
114 symbol.path = symbol.path[6:] 97 if symbol.path.endswith(')'):
115 if symbol.path.endswith(')'): 98 # Convert foo/bar.a(baz.o) -> foo/bar.a/baz.o
116 # Convert foo/bar.a(baz.o) -> foo/bar.a/baz.o 99 start_idx = symbol.path.index('(')
117 start_idx = symbol.path.index('(') 100 paren_path = symbol.path[start_idx + 1:-1]
118 paren_path = symbol.path[start_idx + 1:-1] 101 symbol.path = symbol.path[:start_idx] + os.path.sep + paren_path
119 symbol.path = symbol.path[:start_idx] + os.path.sep + paren_path
120 102
121 103
122 def _RemoveDuplicatesAndCalculatePadding(symbol_group): 104 def _RemoveDuplicatesAndCalculatePadding(symbol_group):
123 """Removes symbols at the same address and calculates the |padding| field. 105 """Removes symbols at the same address and calculates the |padding| field.
124 106
125 Symbols must already be sorted by |address|. 107 Symbols must already be sorted by |address|.
126 """ 108 """
127 i = 0 109 i = 0
128 to_remove = set() 110 to_remove = set()
129 all_symbols = symbol_group.symbols 111 all_symbols = symbol_group.symbols
130 for i in xrange(len(all_symbols)): 112 for i in xrange(len(all_symbols)):
131 prev_symbol = all_symbols[i - 1] 113 prev_symbol = all_symbols[i - 1]
132 symbol = all_symbols[i] 114 symbol = all_symbols[i]
133 if prev_symbol.section_name is not symbol.section_name: 115 if prev_symbol.section_name != symbol.section_name:
134 continue 116 continue
135 if symbol.address > 0 and prev_symbol.address > 0: 117 if symbol.address > 0 and prev_symbol.address > 0:
136 # Fold symbols that are at the same address (happens in nm output). 118 # Fold symbols that are at the same address (happens in nm output).
137 if symbol.address == prev_symbol.address: 119 if symbol.address == prev_symbol.address:
138 symbol.size = max(prev_symbol.size, symbol.size) 120 symbol.size = max(prev_symbol.size, symbol.size)
139 to_remove.add(i) 121 to_remove.add(i)
140 continue 122 continue
141 # Even with symbols at the same address removed, overlaps can still 123 # Even with symbols at the same address removed, overlaps can still
142 # happen. In this case, padding will be negative (and this is fine). 124 # happen. In this case, padding will be negative (and this is fine).
143 padding = symbol.address - prev_symbol.end_address 125 padding = symbol.address - prev_symbol.end_address
(...skipping 15 matching lines...) Expand all
159 symbol.padding = padding 141 symbol.padding = padding
160 symbol.size += padding 142 symbol.size += padding
161 assert symbol.size >= 0, 'Symbol has negative size: %r' % symbol 143 assert symbol.size >= 0, 'Symbol has negative size: %r' % symbol
162 # Map files have no overlaps, so worth special-casing the no-op case. 144 # Map files have no overlaps, so worth special-casing the no-op case.
163 if to_remove: 145 if to_remove:
164 logging.info('Removing %d overlapping symbols', len(to_remove)) 146 logging.info('Removing %d overlapping symbols', len(to_remove))
165 symbol_group.symbols = ( 147 symbol_group.symbols = (
166 [s for i, s in enumerate(all_symbols) if i not in to_remove]) 148 [s for i, s in enumerate(all_symbols) if i not in to_remove])
167 149
168 150
169 def _PrintStats(result, write_func):
170 """Prints out how accurate |result| is."""
171 for section in symbols.SECTION_TO_SECTION_NAME:
172 if section == 'd':
173 expected_size = sum(v for k, v in result.section_sizes.iteritems()
174 if k.startswith('.data'))
175 else:
176 expected_size = result.section_sizes[
177 symbols.SECTION_TO_SECTION_NAME[section]]
178
179 def one_stat(group):
180 template = ('Section %s has %.1f%% of %d bytes accounted for from '
181 '%d symbols. %d bytes are unaccounted for. Padding '
182 'accounts for %d bytes\n')
183 actual_size = group.size
184 count = len(group)
185 padding = group.padding
186 size_percent = 100.0 * actual_size / expected_size
187 return (template % (section, size_percent, actual_size, count,
188 expected_size - actual_size, padding))
189
190 in_section = result.symbol_group.WhereInSection(section)
191 write_func(one_stat(in_section))
192
193 star_syms = in_section.WhereNameMatches(r'^\*')
194 attributed_syms = star_syms.Inverted().WhereHasAnyAttribution()
195 anonymous_syms = attributed_syms.Inverted()
196 if star_syms or anonymous_syms:
197 missing_size = star_syms.size + anonymous_syms.size
198 write_func(('+ Without %d merge sections and %d anonymous entries ('
199 'accounting for %d bytes):\n') % (
200 len(star_syms), len(anonymous_syms), missing_size))
201 write_func('+ ' + one_stat(attributed_syms))
202
203
204 def _SaveResult(result, file_obj):
205 """Saves the result to the given file object."""
206 # Store one bucket per line.
207 file_obj.write('%d\n' % _SERIALIZATION_VERSION)
208 file_obj.write('%r\n' % result.section_sizes)
209 file_obj.write('%d\n' % len(result.symbol_group))
210 prev_section_name = None
211 # Store symbol fields as tab-separated.
212 # Store only non-derived fields.
213 for symbol in result.symbol_group:
214 if symbol.section_name != prev_section_name:
215 file_obj.write('%s\n' % symbol.section_name)
216 prev_section_name = symbol.section_name
217 # Don't write padding nor name since these are derived values.
218 file_obj.write('%x\t%x\t%s\t%s\n' % (
219 symbol.address, symbol.size_without_padding,
220 symbol.function_signature or symbol.name or '',
221 symbol.path or ''))
222
223
224 def _LoadResults(file_obj):
225 """Loads a result from the given file."""
226 lines = iter(file_obj)
227 actual_version = int(next(lines))
228 assert actual_version == _SERIALIZATION_VERSION, (
229 'Version mismatch. Need to write some upgrade code.')
230
231 section_sizes = ast.literal_eval(next(lines))
232 num_syms = int(next(lines))
233 symbol_list = [None] * num_syms
234 section_name = None
235 for i in xrange(num_syms):
236 line = next(lines)[:-1]
237 if '\t' not in line:
238 section_name = intern(line)
239 line = next(lines)[:-1]
240 new_sym = symbols.Symbol.__new__(symbols.Symbol)
241 parts = line.split('\t')
242 new_sym.section_name = section_name
243 new_sym.address = int(parts[0], 16)
244 new_sym.size = int(parts[1], 16)
245 new_sym.name = parts[2] or None
246 new_sym.path = parts[3] or None
247 new_sym.padding = 0 # Derived
248 new_sym.function_signature = None # Derived
249 symbol_list[i] = new_sym
250
251 # Recompute derived values (padding and function names).
252 result = mapfileparser.ParseResult(symbol_list, section_sizes)
253 logging.info('Calculating padding')
254 _RemoveDuplicatesAndCalculatePadding(result.symbol_group)
255 logging.info('Deriving signatures')
256 # Re-parse out function parameters.
257 _NormalizeNames(result.symbol_group.WhereInSection('t'))
258 return result
259
260
261 def AddOptions(parser): 151 def AddOptions(parser):
262 parser.add_argument('input_file',
263 help='Path to input file. Can be a linker .map file, an '
264 'unstripped binary, or a saved result from '
265 'analyze.py')
266 parser.add_argument('--tool-prefix', default='', 152 parser.add_argument('--tool-prefix', default='',
267 help='Path prefix for c++filt.') 153 help='Path prefix for c++filt.')
268 parser.add_argument('--output-directory', 154 parser.add_argument('--output-directory',
269 help='Path to the root build directory.') 155 help='Path to the root build directory.')
270 156
271 157
272 def _DetectToolPrefix(tool_prefix, input_file, output_directory=None): 158 def _DetectToolPrefix(tool_prefix, input_file, output_directory=None):
273 """Calls Analyze with values from args.""" 159 """Calls Analyze with values from args."""
274 if not output_directory: 160 if not output_directory:
275 abs_path = os.path.abspath(input_file) 161 abs_path = os.path.abspath(input_file)
276 release_idx = abs_path.find('Release') 162 release_idx = abs_path.find('Release')
277 if release_idx != -1: 163 if release_idx != -1:
278 output_directory = abs_path[:release_idx] + 'Release' 164 output_directory = abs_path[:release_idx] + 'Release'
279 output_directory = os.path.relpath(abs_path[:release_idx] + '/Release') 165 output_directory = os.path.relpath(abs_path[:release_idx] + '/Release')
280 logging.debug('Detected --output-directory=%s', output_directory) 166 logging.debug('Detected --output-directory=%s', output_directory)
281 167
282 if not tool_prefix and output_directory: 168 if not tool_prefix and output_directory:
283 # Auto-detect from build_vars.txt 169 # Auto-detect from build_vars.txt
284 build_vars_path = os.path.join(output_directory, 'build_vars.txt') 170 build_vars_path = os.path.join(output_directory, 'build_vars.txt')
285 if os.path.exists(build_vars_path): 171 if os.path.exists(build_vars_path):
286 with open(build_vars_path) as f: 172 with open(build_vars_path) as f:
287 build_vars = dict(l.rstrip().split('=', 1) for l in f if '=' in l) 173 build_vars = dict(l.rstrip().split('=', 1) for l in f if '=' in l)
288 logging.debug('Found --tool-prefix from build_vars.txt') 174 logging.debug('Found --tool-prefix from build_vars.txt')
289 tool_prefix = build_vars['android_tool_prefix'] 175 tool_prefix = os.path.join(output_directory,
176 build_vars['android_tool_prefix'])
290 177
291 if os.path.sep not in tool_prefix: 178 if os.path.sep not in tool_prefix:
292 full_path = distutils.spawn.find_executable(tool_prefix + 'c++filt') 179 full_path = distutils.spawn.find_executable(tool_prefix + 'c++filt')
293 else: 180 else:
294 full_path = tool_prefix + 'c++filt' 181 full_path = tool_prefix + 'c++filt'
295 182
296 if not os.path.isfile(full_path): 183 if not os.path.isfile(full_path):
297 raise Exception('Bad --tool-prefix. Path not found: %s' % full_path) 184 raise Exception('Bad --tool-prefix. Path not found: %s' % full_path)
298 logging.info('Using --tool-prefix=%s', tool_prefix) 185 logging.info('Using --tool-prefix=%s', tool_prefix)
299 return tool_prefix 186 return tool_prefix
300 187
301 188
302 def AnalyzeWithArgs(args): 189 def AnalyzeWithArgs(args, input_path):
303 return Analyze(args.input_file, args.output_directory, args.tool_prefix) 190 return Analyze(input_path, args.output_directory, args.tool_prefix)
304 191
305 192
306 def Analyze(path, output_directory=None, tool_prefix=''): 193 def Analyze(path, output_directory=None, tool_prefix=''):
307 if _EndsWithMaybeGz(path, '.size'): 194 if file_format.EndsWithMaybeGz(path, '.size'):
308 logging.info('Loading cached results.') 195 logging.debug('Loading results from: %s', path)
309 with _OpenMaybeGz(path) as f: 196 size_info = file_format.LoadSizeInfo(path)
310 result = _LoadResults(f) 197 # Recompute derived values (padding and function names).
311 elif not _EndsWithMaybeGz(path, '.map'): 198 logging.info('Calculating padding')
199 _RemoveDuplicatesAndCalculatePadding(size_info.symbols)
200 logging.info('Deriving signatures')
201 # Re-parse out function parameters.
202 _NormalizeNames(size_info.symbols.WhereInSection('t'))
203 return size_info
204 elif not file_format.EndsWithMaybeGz(path, '.map'):
312 raise Exception('Expected input to be a .map or a .size') 205 raise Exception('Expected input to be a .map or a .size')
313 else: 206 else:
314 # Verify tool_prefix early. 207 # Verify tool_prefix early.
315 tool_prefix = _DetectToolPrefix(tool_prefix, path, output_directory) 208 tool_prefix = _DetectToolPrefix(tool_prefix, path, output_directory)
316 209
317 with _OpenMaybeGz(path) as map_file: 210 with file_format.OpenMaybeGz(path) as map_file:
318 result = mapfileparser.MapFileParser().Parse(map_file) 211 size_info = linker_map_parser.MapFileParser().Parse(map_file)
319 212
320 # Map file for some reason doesn't unmangle all names. 213 # Map file for some reason doesn't unmangle all names.
321 logging.info('Calculating padding') 214 logging.info('Calculating padding')
322 _RemoveDuplicatesAndCalculatePadding(result.symbol_group) 215 _RemoveDuplicatesAndCalculatePadding(size_info.symbols)
323 # Unmangle prints its own log statement. 216 # Unmangle prints its own log statement.
324 _UnmangleRemainingSymbols(result.symbol_group, tool_prefix) 217 _UnmangleRemainingSymbols(size_info.symbols, tool_prefix)
325 # Resolve paths prints its own log statement. 218 # Resolve paths prints its own log statement.
326 logging.info('Normalizing names') 219 logging.info('Normalizing names')
327 _NormalizeNames(result.symbol_group) 220 _NormalizeNames(size_info.symbols)
328 logging.info('Normalizing paths') 221 logging.info('Normalizing paths')
329 _NormalizeObjectPaths(result.symbol_group) 222 _NormalizeObjectPaths(size_info.symbols)
330 223
331 if logging.getLogger().isEnabledFor(logging.INFO): 224 if logging.getLogger().isEnabledFor(logging.INFO):
332 _PrintStats(result, lambda l: logging.info(l.rstrip())) 225 for line in describe.DescribeSizeInfoCoverage(size_info):
333 logging.info('Finished analyzing %d symbols', len(result.symbol_group)) 226 logging.info(line)
334 return result 227 logging.info('Finished analyzing %d symbols', len(size_info.symbols))
228 return size_info
335 229
336 230
337 def main(): 231 def main(argv):
338 parser = argparse.ArgumentParser() 232 parser = argparse.ArgumentParser(argv)
339 parser.add_argument('--output', required=True, 233 parser.add_argument('input_file', help='Path to input .map file.')
340 help='Path to store results. Must end in .size or ' 234 parser.add_argument('output_file', help='Path to output .size(.gz) file.')
341 '.size.gz')
342 AddOptions(parser) 235 AddOptions(parser)
343 args = helpers.AddCommonOptionsAndParseArgs(parser) 236 args = helpers.AddCommonOptionsAndParseArgs(parser, argv)
344 if not _EndsWithMaybeGz(args.output, '.size'): 237 if not file_format.EndsWithMaybeGz(args.output_file, '.size'):
345 raise Exception('--output must end with .size or .size.gz') 238 parser.error('output_file must end with .size or .size.gz')
346 239
347 result = AnalyzeWithArgs(args) 240 size_info = AnalyzeWithArgs(args, args.input_file)
348 logging.info('Saving result to %s', args.output) 241 logging.info('Saving result to %s', args.output_file)
349 with _OpenMaybeGz(args.output, 'wb') as f: 242 file_format.SaveSizeInfo(size_info, args.output_file)
350 _SaveResult(result, f)
351 243
352 logging.info('Done') 244 logging.info('Done')
353 245
354 246
355 if __name__ == '__main__': 247 if __name__ == '__main__':
356 main() 248 sys.exit(main(sys.argv))
OLDNEW
« no previous file with comments | « tools/binary_size/linker_map_parser.py ('k') | tools/binary_size/mapfileparser.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698