Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(131)

Side by Side Diff: tools/binary_size/map2size.py

Issue 2769933002: V2 of //tools/binary_size rewrite (diffs). (Closed)
Patch Set: self-review Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # Copyright 2017 The Chromium Authors. All rights reserved. 2 # Copyright 2017 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be 3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file. 4 # found in the LICENSE file.
5 5
6 """Main Python API for analyzing binary size.""" 6 """Main Python API for analyzing binary size."""
7 7
8 import argparse 8 import argparse
9 import ast
10 import distutils.spawn 9 import distutils.spawn
11 import gzip
12 import logging 10 import logging
13 import os 11 import os
14 import re
15 import subprocess 12 import subprocess
13 import sys
16 14
15 import file_format
17 import function_signature 16 import function_signature
18 import helpers 17 import helpers
19 import mapfileparser 18 import linker_map_parser
20 import symbols 19 import models
21
22
23 # File format version for .size files.
24 _SERIALIZATION_VERSION = 1
25
26
27 def _OpenMaybeGz(path, mode=None):
28 """Calls `gzip.open()` if |path| ends in ".gz", otherwise calls `open()`."""
29 if path.endswith('.gz'):
30 if mode and 'w' in mode:
31 return gzip.GzipFile(path, mode, 1)
32 return gzip.open(path, mode)
33 return open(path, mode or 'r')
34
35
36 def _EndsWithMaybeGz(path, suffix):
37 return path.endswith(suffix) or path.endswith(suffix + '.gz')
38 20
39 21
40 def _IterLines(s): 22 def _IterLines(s):
41 prev_idx = -1 23 prev_idx = -1
42 while True: 24 while True:
43 idx = s.find('\n', prev_idx + 1) 25 idx = s.find('\n', prev_idx + 1)
44 if idx == -1: 26 if idx == -1:
45 return 27 return
46 yield s[prev_idx + 1:idx] 28 yield s[prev_idx + 1:idx]
47 prev_idx = idx 29 prev_idx = idx
48 30
49 31
50 def _UnmangleRemainingSymbols(symbol_group, tool_prefix): 32 def _UnmangleRemainingSymbols(symbol_group, tool_prefix):
51 """Uses c++filt to unmangle any symbols that need it.""" 33 """Uses c++filt to unmangle any symbols that need it."""
52 to_process = [s for s in symbol_group if s.name and s.name.startswith('_Z')] 34 to_process = [s for s in symbol_group if s.name.startswith('_Z')]
53 if not to_process: 35 if not to_process:
54 return 36 return
55 37
56 logging.info('Unmangling %d names', len(to_process)) 38 logging.info('Unmangling %d names', len(to_process))
57 proc = subprocess.Popen([tool_prefix + 'c++filt'], stdin=subprocess.PIPE, 39 proc = subprocess.Popen([tool_prefix + 'c++filt'], stdin=subprocess.PIPE,
58 stdout=subprocess.PIPE) 40 stdout=subprocess.PIPE)
59 stdout = proc.communicate('\n'.join(s.name for s in to_process))[0] 41 stdout = proc.communicate('\n'.join(s.name for s in to_process))[0]
60 assert proc.returncode == 0 42 assert proc.returncode == 0
61 43
62 for i, line in enumerate(_IterLines(stdout)): 44 for i, line in enumerate(_IterLines(stdout)):
63 to_process[i].name = line 45 to_process[i].name = line
64 46
65 47
66 def _NormalizeNames(symbol_group): 48 def _NormalizeNames(symbol_group):
67 """Ensures that all names are formatted in a useful way. 49 """Ensures that all names are formatted in a useful way.
68 50
69 This includes: 51 This includes:
70 - Assigning of |function_signature| (for functions). 52 - Assigning of |function_signature| (for functions).
71 - Stripping of return types in |function_signature| and |name|. 53 - Stripping of return types in |function_signature| and |name|.
72 - Stripping parameters from |name|. 54 - Stripping parameters from |name|.
73 - Moving "vtable for" and the like to be suffixes rather than prefixes. 55 - Moving "vtable for" and the like to be suffixes rather than prefixes.
74 """ 56 """
75 found_prefixes = set() 57 found_prefixes = set()
76 for symbol in symbol_group: 58 for symbol in symbol_group:
77 if not symbol.name or symbol.name.startswith('*'): 59 if symbol.name.startswith('*'):
78 # See comment in _RemoveDuplicatesAndCalculatePadding() about when this 60 # See comment in _RemoveDuplicatesAndCalculatePadding() about when this
79 # can happen. 61 # can happen.
80 continue 62 continue
81 63
82 # E.g.: vtable for FOO 64 # E.g.: vtable for FOO
83 idx = symbol.name.find(' for ', 0, 30) 65 idx = symbol.name.find(' for ', 0, 30)
84 if idx != -1: 66 if idx != -1:
85 found_prefixes.add(symbol.name[:idx + 4]) 67 found_prefixes.add(symbol.name[:idx + 4])
86 symbol.name = symbol.name[idx + 5:] + ' [' + symbol.name[:idx] + ']' 68 symbol.name = symbol.name[idx + 5:] + ' [' + symbol.name[:idx] + ']'
87 69
(...skipping 10 matching lines...) Expand all
98 80
99 # Remove anonymous namespaces (they just harm clustering). 81 # Remove anonymous namespaces (they just harm clustering).
100 symbol.name = symbol.name.replace('(anonymous namespace)::', '') 82 symbol.name = symbol.name.replace('(anonymous namespace)::', '')
101 83
102 logging.debug('Found name prefixes of: %r', found_prefixes) 84 logging.debug('Found name prefixes of: %r', found_prefixes)
103 85
104 86
105 def _NormalizeObjectPaths(symbol_group): 87 def _NormalizeObjectPaths(symbol_group):
106 """Ensures that all paths are formatted in a useful way.""" 88 """Ensures that all paths are formatted in a useful way."""
107 for symbol in symbol_group: 89 for symbol in symbol_group:
108 if symbol.path: 90 if symbol.path.startswith('obj/'):
109 if symbol.path.startswith('obj/'): 91 # Convert obj/third_party/... -> third_party/...
110 # Convert obj/third_party/... -> third_party/... 92 symbol.path = symbol.path[4:]
111 symbol.path = symbol.path[4:] 93 elif symbol.path.startswith('../../'):
112 elif symbol.path.startswith('../../'): 94 # Convert ../../third_party/... -> third_party/...
113 # Convert ../../third_party/... -> third_party/... 95 symbol.path = symbol.path[6:]
114 symbol.path = symbol.path[6:] 96 if symbol.path.endswith(')'):
115 if symbol.path.endswith(')'): 97 # Convert foo/bar.a(baz.o) -> foo/bar.a/baz.o
116 # Convert foo/bar.a(baz.o) -> foo/bar.a/baz.o 98 start_idx = symbol.path.index('(')
117 start_idx = symbol.path.index('(') 99 paren_path = symbol.path[start_idx + 1:-1]
118 paren_path = symbol.path[start_idx + 1:-1] 100 symbol.path = symbol.path[:start_idx] + os.path.sep + paren_path
119 symbol.path = symbol.path[:start_idx] + os.path.sep + paren_path
120 101
121 102
122 def _RemoveDuplicatesAndCalculatePadding(symbol_group): 103 def _RemoveDuplicatesAndCalculatePadding(symbol_group):
123 """Removes symbols at the same address and calculates the |padding| field. 104 """Removes symbols at the same address and calculates the |padding| field.
124 105
125 Symbols must already be sorted by |address|. 106 Symbols must already be sorted by |address|.
126 """ 107 """
127 i = 0 108 i = 0
128 to_remove = set() 109 to_remove = set()
129 all_symbols = symbol_group.symbols 110 all_symbols = symbol_group.symbols
(...skipping 29 matching lines...) Expand all
159 symbol.padding = padding 140 symbol.padding = padding
160 symbol.size += padding 141 symbol.size += padding
161 assert symbol.size >= 0, 'Symbol has negative size: %r' % symbol 142 assert symbol.size >= 0, 'Symbol has negative size: %r' % symbol
162 # Map files have no overlaps, so worth special-casing the no-op case. 143 # Map files have no overlaps, so worth special-casing the no-op case.
163 if to_remove: 144 if to_remove:
164 logging.info('Removing %d overlapping symbols', len(to_remove)) 145 logging.info('Removing %d overlapping symbols', len(to_remove))
165 symbol_group.symbols = ( 146 symbol_group.symbols = (
166 [s for i, s in enumerate(all_symbols) if i not in to_remove]) 147 [s for i, s in enumerate(all_symbols) if i not in to_remove])
167 148
168 149
169 def _PrintStats(result, write_func): 150 def _PrintStats(size_info, write_func):
170 """Prints out how accurate |result| is.""" 151 """Prints out how accurate |size_info| is."""
171 for section in symbols.SECTION_TO_SECTION_NAME: 152 for section in models.SECTION_TO_SECTION_NAME:
172 if section == 'd': 153 if section == 'd':
173 expected_size = sum(v for k, v in result.section_sizes.iteritems() 154 expected_size = sum(v for k, v in size_info.section_sizes.iteritems()
174 if k.startswith('.data')) 155 if k.startswith('.data'))
175 else: 156 else:
176 expected_size = result.section_sizes[ 157 expected_size = size_info.section_sizes[
177 symbols.SECTION_TO_SECTION_NAME[section]] 158 models.SECTION_TO_SECTION_NAME[section]]
178 159
179 def one_stat(group): 160 def one_stat(group):
180 template = ('Section %s has %.1f%% of %d bytes accounted for from ' 161 template = ('Section %s has %.1f%% of %d bytes accounted for from '
181 '%d symbols. %d bytes are unaccounted for. Padding ' 162 '%d symbols. %d bytes are unaccounted for. Padding '
182 'accounts for %d bytes\n') 163 'accounts for %d bytes\n')
183 actual_size = group.size 164 actual_size = group.size_without_padding if group.IsBss() else group.size
184 count = len(group) 165 count = len(group)
185 padding = group.padding 166 padding = group.padding
186 size_percent = 100.0 * actual_size / expected_size 167 size_percent = 100.0 * actual_size / expected_size
187 return (template % (section, size_percent, actual_size, count, 168 return (template % (section, size_percent, actual_size, count,
188 expected_size - actual_size, padding)) 169 expected_size - actual_size, padding))
189 170
190 in_section = result.symbol_group.WhereInSection(section) 171 in_section = size_info.symbols.WhereInSection(section)
191 write_func(one_stat(in_section)) 172 write_func(one_stat(in_section))
192 173
193 star_syms = in_section.WhereNameMatches(r'^\*') 174 star_syms = in_section.WhereNameMatches(r'^\*')
194 attributed_syms = star_syms.Inverted().WhereHasAnyAttribution() 175 attributed_syms = star_syms.Inverted().WhereHasAnyAttribution()
195 anonymous_syms = attributed_syms.Inverted() 176 anonymous_syms = attributed_syms.Inverted()
196 if star_syms or anonymous_syms: 177 if star_syms or anonymous_syms:
197 missing_size = star_syms.size + anonymous_syms.size 178 missing_size = star_syms.size + anonymous_syms.size
198 write_func(('+ Without %d merge sections and %d anonymous entries (' 179 write_func(('+ Without %d merge sections and %d anonymous entries ('
199 'accounting for %d bytes):\n') % ( 180 'accounting for %d bytes):\n') % (
200 len(star_syms), len(anonymous_syms), missing_size)) 181 len(star_syms), len(anonymous_syms), missing_size))
201 write_func('+ ' + one_stat(attributed_syms)) 182 write_func('+ ' + one_stat(attributed_syms))
202 183
203 184
204 def _SaveResult(result, file_obj):
205 """Saves the result to the given file object."""
206 # Store one bucket per line.
207 file_obj.write('%d\n' % _SERIALIZATION_VERSION)
208 file_obj.write('%r\n' % result.section_sizes)
209 file_obj.write('%d\n' % len(result.symbol_group))
210 prev_section_name = None
211 # Store symbol fields as tab-separated.
212 # Store only non-derived fields.
213 for symbol in result.symbol_group:
214 if symbol.section_name != prev_section_name:
215 file_obj.write('%s\n' % symbol.section_name)
216 prev_section_name = symbol.section_name
217 # Don't write padding nor name since these are derived values.
218 file_obj.write('%x\t%x\t%s\t%s\n' % (
219 symbol.address, symbol.size_without_padding,
220 symbol.function_signature or symbol.name or '',
221 symbol.path or ''))
222
223
224 def _LoadResults(file_obj):
225 """Loads a result from the given file."""
226 lines = iter(file_obj)
227 actual_version = int(next(lines))
228 assert actual_version == _SERIALIZATION_VERSION, (
229 'Version mismatch. Need to write some upgrade code.')
230
231 section_sizes = ast.literal_eval(next(lines))
232 num_syms = int(next(lines))
233 symbol_list = [None] * num_syms
234 section_name = None
235 for i in xrange(num_syms):
236 line = next(lines)[:-1]
237 if '\t' not in line:
238 section_name = intern(line)
239 line = next(lines)[:-1]
240 new_sym = symbols.Symbol.__new__(symbols.Symbol)
241 parts = line.split('\t')
242 new_sym.section_name = section_name
243 new_sym.address = int(parts[0], 16)
244 new_sym.size = int(parts[1], 16)
245 new_sym.name = parts[2] or None
246 new_sym.path = parts[3] or None
247 new_sym.padding = 0 # Derived
248 new_sym.function_signature = None # Derived
249 symbol_list[i] = new_sym
250
251 # Recompute derived values (padding and function names).
252 result = mapfileparser.ParseResult(symbol_list, section_sizes)
253 logging.info('Calculating padding')
254 _RemoveDuplicatesAndCalculatePadding(result.symbol_group)
255 logging.info('Deriving signatures')
256 # Re-parse out function parameters.
257 _NormalizeNames(result.symbol_group.WhereInSection('t'))
258 return result
259
260
261 def AddOptions(parser): 185 def AddOptions(parser):
262 parser.add_argument('input_file',
263 help='Path to input file. Can be a linker .map file, an '
264 'unstripped binary, or a saved result from '
265 'analyze.py')
266 parser.add_argument('--tool-prefix', default='', 186 parser.add_argument('--tool-prefix', default='',
267 help='Path prefix for c++filt.') 187 help='Path prefix for c++filt.')
268 parser.add_argument('--output-directory', 188 parser.add_argument('--output-directory',
269 help='Path to the root build directory.') 189 help='Path to the root build directory.')
270 190
271 191
272 def _DetectToolPrefix(tool_prefix, input_file, output_directory=None): 192 def _DetectToolPrefix(tool_prefix, input_file, output_directory=None):
273 """Calls Analyze with values from args.""" 193 """Calls Analyze with values from args."""
274 if not output_directory: 194 if not output_directory:
275 abs_path = os.path.abspath(input_file) 195 abs_path = os.path.abspath(input_file)
(...skipping 16 matching lines...) Expand all
292 full_path = distutils.spawn.find_executable(tool_prefix + 'c++filt') 212 full_path = distutils.spawn.find_executable(tool_prefix + 'c++filt')
293 else: 213 else:
294 full_path = tool_prefix + 'c++filt' 214 full_path = tool_prefix + 'c++filt'
295 215
296 if not os.path.isfile(full_path): 216 if not os.path.isfile(full_path):
297 raise Exception('Bad --tool-prefix. Path not found: %s' % full_path) 217 raise Exception('Bad --tool-prefix. Path not found: %s' % full_path)
298 logging.info('Using --tool-prefix=%s', tool_prefix) 218 logging.info('Using --tool-prefix=%s', tool_prefix)
299 return tool_prefix 219 return tool_prefix
300 220
301 221
302 def AnalyzeWithArgs(args): 222 def AnalyzeWithArgs(args, input_path):
303 return Analyze(args.input_file, args.output_directory, args.tool_prefix) 223 return Analyze(input_path, args.output_directory, args.tool_prefix)
304 224
305 225
306 def Analyze(path, output_directory=None, tool_prefix=''): 226 def Analyze(path, output_directory=None, tool_prefix=''):
307 if _EndsWithMaybeGz(path, '.size'): 227 if file_format.EndsWithMaybeGz(path, '.size'):
308 logging.info('Loading cached results.') 228 logging.debug('Loading results from: %s', path)
309 with _OpenMaybeGz(path) as f: 229 size_info = file_format.LoadSizeInfo(path)
310 result = _LoadResults(f) 230 # Recompute derived values (padding and function names).
311 elif not _EndsWithMaybeGz(path, '.map'): 231 logging.info('Calculating padding')
232 _RemoveDuplicatesAndCalculatePadding(size_info.symbols)
233 logging.info('Deriving signatures')
234 # Re-parse out function parameters.
235 _NormalizeNames(size_info.symbols.WhereInSection('t'))
236 return size_info
237 elif not file_format.EndsWithMaybeGz(path, '.map'):
312 raise Exception('Expected input to be a .map or a .size') 238 raise Exception('Expected input to be a .map or a .size')
313 else: 239 else:
314 # Verify tool_prefix early. 240 # Verify tool_prefix early.
315 tool_prefix = _DetectToolPrefix(tool_prefix, path, output_directory) 241 tool_prefix = _DetectToolPrefix(tool_prefix, path, output_directory)
316 242
317 with _OpenMaybeGz(path) as map_file: 243 with file_format.OpenMaybeGz(path) as map_file:
318 result = mapfileparser.MapFileParser().Parse(map_file) 244 size_info = linker_map_parser.MapFileParser().Parse(map_file)
319 245
320 # Map file for some reason doesn't unmangle all names. 246 # Map file for some reason doesn't unmangle all names.
321 logging.info('Calculating padding') 247 logging.info('Calculating padding')
322 _RemoveDuplicatesAndCalculatePadding(result.symbol_group) 248 _RemoveDuplicatesAndCalculatePadding(size_info.symbols)
323 # Unmangle prints its own log statement. 249 # Unmangle prints its own log statement.
324 _UnmangleRemainingSymbols(result.symbol_group, tool_prefix) 250 _UnmangleRemainingSymbols(size_info.symbols, tool_prefix)
325 # Resolve paths prints its own log statement. 251 # Resolve paths prints its own log statement.
326 logging.info('Normalizing names') 252 logging.info('Normalizing names')
327 _NormalizeNames(result.symbol_group) 253 _NormalizeNames(size_info.symbols)
328 logging.info('Normalizing paths') 254 logging.info('Normalizing paths')
329 _NormalizeObjectPaths(result.symbol_group) 255 _NormalizeObjectPaths(size_info.symbols)
330 256
331 if logging.getLogger().isEnabledFor(logging.INFO): 257 if logging.getLogger().isEnabledFor(logging.INFO):
332 _PrintStats(result, lambda l: logging.info(l.rstrip())) 258 _PrintStats(size_info, lambda l: logging.info(l.rstrip()))
333 logging.info('Finished analyzing %d symbols', len(result.symbol_group)) 259 logging.info('Finished analyzing %d symbols', len(size_info.symbols))
334 return result 260 return size_info
335 261
336 262
337 def main(): 263 def main(argv):
338 parser = argparse.ArgumentParser() 264 parser = argparse.ArgumentParser(argv)
339 parser.add_argument('--output', required=True, 265 parser.add_argument('input_file', help='Path to input .map file.')
340 help='Path to store results. Must end in .size or ' 266 parser.add_argument('output_file', help='Path to output .size(.gz) file.')
341 '.size.gz')
342 AddOptions(parser) 267 AddOptions(parser)
343 args = helpers.AddCommonOptionsAndParseArgs(parser) 268 args = helpers.AddCommonOptionsAndParseArgs(parser, argv)
344 if not _EndsWithMaybeGz(args.output, '.size'): 269 if not file_format.EndsWithMaybeGz(args.output_file, '.size'):
345 raise Exception('--output must end with .size or .size.gz') 270 parser.error('output_file must end with .size or .size.gz')
346 271
347 result = AnalyzeWithArgs(args) 272 size_info = AnalyzeWithArgs(args, args.input_file)
348 logging.info('Saving result to %s', args.output) 273 logging.info('Saving result to %s', args.output_file)
349 with _OpenMaybeGz(args.output, 'wb') as f: 274 file_format.SaveSizeInfo(size_info, args.output_file)
350 _SaveResult(result, f)
351 275
352 logging.info('Done') 276 logging.info('Done')
353 277
354 278
355 if __name__ == '__main__': 279 if __name__ == '__main__':
356 main() 280 sys.exit(main(sys.argv))
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698