| OLD | NEW |
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 # Copyright 2017 The Chromium Authors. All rights reserved. | 2 # Copyright 2017 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
| 5 | 5 |
| 6 """Main Python API for analyzing binary size.""" | 6 """Main Python API for analyzing binary size.""" |
| 7 | 7 |
| 8 import argparse | 8 import argparse |
| 9 import ast | |
| 10 import distutils.spawn | 9 import distutils.spawn |
| 11 import gzip | |
| 12 import logging | 10 import logging |
| 13 import os | 11 import os |
| 14 import re | |
| 15 import subprocess | 12 import subprocess |
| 13 import sys |
| 16 | 14 |
| 15 import file_format |
| 17 import function_signature | 16 import function_signature |
| 18 import helpers | 17 import helpers |
| 19 import mapfileparser | 18 import linker_map_parser |
| 20 import symbols | 19 import models |
| 21 | |
| 22 | |
| 23 # File format version for .size files. | |
| 24 _SERIALIZATION_VERSION = 1 | |
| 25 | |
| 26 | |
| 27 def _OpenMaybeGz(path, mode=None): | |
| 28 """Calls `gzip.open()` if |path| ends in ".gz", otherwise calls `open()`.""" | |
| 29 if path.endswith('.gz'): | |
| 30 if mode and 'w' in mode: | |
| 31 return gzip.GzipFile(path, mode, 1) | |
| 32 return gzip.open(path, mode) | |
| 33 return open(path, mode or 'r') | |
| 34 | |
| 35 | |
| 36 def _EndsWithMaybeGz(path, suffix): | |
| 37 return path.endswith(suffix) or path.endswith(suffix + '.gz') | |
| 38 | 20 |
| 39 | 21 |
| 40 def _IterLines(s): | 22 def _IterLines(s): |
| 41 prev_idx = -1 | 23 prev_idx = -1 |
| 42 while True: | 24 while True: |
| 43 idx = s.find('\n', prev_idx + 1) | 25 idx = s.find('\n', prev_idx + 1) |
| 44 if idx == -1: | 26 if idx == -1: |
| 45 return | 27 return |
| 46 yield s[prev_idx + 1:idx] | 28 yield s[prev_idx + 1:idx] |
| 47 prev_idx = idx | 29 prev_idx = idx |
| 48 | 30 |
| 49 | 31 |
| 50 def _UnmangleRemainingSymbols(symbol_group, tool_prefix): | 32 def _UnmangleRemainingSymbols(symbol_group, tool_prefix): |
| 51 """Uses c++filt to unmangle any symbols that need it.""" | 33 """Uses c++filt to unmangle any symbols that need it.""" |
| 52 to_process = [s for s in symbol_group if s.name and s.name.startswith('_Z')] | 34 to_process = [s for s in symbol_group if s.name.startswith('_Z')] |
| 53 if not to_process: | 35 if not to_process: |
| 54 return | 36 return |
| 55 | 37 |
| 56 logging.info('Unmangling %d names', len(to_process)) | 38 logging.info('Unmangling %d names', len(to_process)) |
| 57 proc = subprocess.Popen([tool_prefix + 'c++filt'], stdin=subprocess.PIPE, | 39 proc = subprocess.Popen([tool_prefix + 'c++filt'], stdin=subprocess.PIPE, |
| 58 stdout=subprocess.PIPE) | 40 stdout=subprocess.PIPE) |
| 59 stdout = proc.communicate('\n'.join(s.name for s in to_process))[0] | 41 stdout = proc.communicate('\n'.join(s.name for s in to_process))[0] |
| 60 assert proc.returncode == 0 | 42 assert proc.returncode == 0 |
| 61 | 43 |
| 62 for i, line in enumerate(_IterLines(stdout)): | 44 for i, line in enumerate(_IterLines(stdout)): |
| 63 to_process[i].name = line | 45 to_process[i].name = line |
| 64 | 46 |
| 65 | 47 |
| 66 def _NormalizeNames(symbol_group): | 48 def _NormalizeNames(symbol_group): |
| 67 """Ensures that all names are formatted in a useful way. | 49 """Ensures that all names are formatted in a useful way. |
| 68 | 50 |
| 69 This includes: | 51 This includes: |
| 70 - Assigning of |function_signature| (for functions). | 52 - Assigning of |function_signature| (for functions). |
| 71 - Stripping of return types in |function_signature| and |name|. | 53 - Stripping of return types in |function_signature| and |name|. |
| 72 - Stripping parameters from |name|. | 54 - Stripping parameters from |name|. |
| 73 - Moving "vtable for" and the like to be suffixes rather than prefixes. | 55 - Moving "vtable for" and the like to be suffixes rather than prefixes. |
| 74 """ | 56 """ |
| 75 found_prefixes = set() | 57 found_prefixes = set() |
| 76 for symbol in symbol_group: | 58 for symbol in symbol_group: |
| 77 if not symbol.name or symbol.name.startswith('*'): | 59 if symbol.name.startswith('*'): |
| 78 # See comment in _RemoveDuplicatesAndCalculatePadding() about when this | 60 # See comment in _RemoveDuplicatesAndCalculatePadding() about when this |
| 79 # can happen. | 61 # can happen. |
| 80 continue | 62 continue |
| 81 | 63 |
| 82 # E.g.: vtable for FOO | 64 # E.g.: vtable for FOO |
| 83 idx = symbol.name.find(' for ', 0, 30) | 65 idx = symbol.name.find(' for ', 0, 30) |
| 84 if idx != -1: | 66 if idx != -1: |
| 85 found_prefixes.add(symbol.name[:idx + 4]) | 67 found_prefixes.add(symbol.name[:idx + 4]) |
| 86 symbol.name = symbol.name[idx + 5:] + ' [' + symbol.name[:idx] + ']' | 68 symbol.name = symbol.name[idx + 5:] + ' [' + symbol.name[:idx] + ']' |
| 87 | 69 |
| (...skipping 10 matching lines...) Expand all Loading... |
| 98 | 80 |
| 99 # Remove anonymous namespaces (they just harm clustering). | 81 # Remove anonymous namespaces (they just harm clustering). |
| 100 symbol.name = symbol.name.replace('(anonymous namespace)::', '') | 82 symbol.name = symbol.name.replace('(anonymous namespace)::', '') |
| 101 | 83 |
| 102 logging.debug('Found name prefixes of: %r', found_prefixes) | 84 logging.debug('Found name prefixes of: %r', found_prefixes) |
| 103 | 85 |
| 104 | 86 |
| 105 def _NormalizeObjectPaths(symbol_group): | 87 def _NormalizeObjectPaths(symbol_group): |
| 106 """Ensures that all paths are formatted in a useful way.""" | 88 """Ensures that all paths are formatted in a useful way.""" |
| 107 for symbol in symbol_group: | 89 for symbol in symbol_group: |
| 108 if symbol.path: | 90 if symbol.path.startswith('obj/'): |
| 109 if symbol.path.startswith('obj/'): | 91 # Convert obj/third_party/... -> third_party/... |
| 110 # Convert obj/third_party/... -> third_party/... | 92 symbol.path = symbol.path[4:] |
| 111 symbol.path = symbol.path[4:] | 93 elif symbol.path.startswith('../../'): |
| 112 elif symbol.path.startswith('../../'): | 94 # Convert ../../third_party/... -> third_party/... |
| 113 # Convert ../../third_party/... -> third_party/... | 95 symbol.path = symbol.path[6:] |
| 114 symbol.path = symbol.path[6:] | 96 if symbol.path.endswith(')'): |
| 115 if symbol.path.endswith(')'): | 97 # Convert foo/bar.a(baz.o) -> foo/bar.a/baz.o |
| 116 # Convert foo/bar.a(baz.o) -> foo/bar.a/baz.o | 98 start_idx = symbol.path.index('(') |
| 117 start_idx = symbol.path.index('(') | 99 paren_path = symbol.path[start_idx + 1:-1] |
| 118 paren_path = symbol.path[start_idx + 1:-1] | 100 symbol.path = symbol.path[:start_idx] + os.path.sep + paren_path |
| 119 symbol.path = symbol.path[:start_idx] + os.path.sep + paren_path | |
| 120 | 101 |
| 121 | 102 |
| 122 def _RemoveDuplicatesAndCalculatePadding(symbol_group): | 103 def _RemoveDuplicatesAndCalculatePadding(symbol_group): |
| 123 """Removes symbols at the same address and calculates the |padding| field. | 104 """Removes symbols at the same address and calculates the |padding| field. |
| 124 | 105 |
| 125 Symbols must already be sorted by |address|. | 106 Symbols must already be sorted by |address|. |
| 126 """ | 107 """ |
| 127 i = 0 | 108 i = 0 |
| 128 to_remove = set() | 109 to_remove = set() |
| 129 all_symbols = symbol_group.symbols | 110 all_symbols = symbol_group.symbols |
| (...skipping 29 matching lines...) Expand all Loading... |
| 159 symbol.padding = padding | 140 symbol.padding = padding |
| 160 symbol.size += padding | 141 symbol.size += padding |
| 161 assert symbol.size >= 0, 'Symbol has negative size: %r' % symbol | 142 assert symbol.size >= 0, 'Symbol has negative size: %r' % symbol |
| 162 # Map files have no overlaps, so worth special-casing the no-op case. | 143 # Map files have no overlaps, so worth special-casing the no-op case. |
| 163 if to_remove: | 144 if to_remove: |
| 164 logging.info('Removing %d overlapping symbols', len(to_remove)) | 145 logging.info('Removing %d overlapping symbols', len(to_remove)) |
| 165 symbol_group.symbols = ( | 146 symbol_group.symbols = ( |
| 166 [s for i, s in enumerate(all_symbols) if i not in to_remove]) | 147 [s for i, s in enumerate(all_symbols) if i not in to_remove]) |
| 167 | 148 |
| 168 | 149 |
| 169 def _PrintStats(result, write_func): | 150 def PrintStats(size_info, write_func): |
| 170 """Prints out how accurate |result| is.""" | 151 """Prints out how accurate |size_info| is.""" |
| 171 for section in symbols.SECTION_TO_SECTION_NAME: | 152 for section in models.SECTION_TO_SECTION_NAME: |
| 172 if section == 'd': | 153 if section == 'd': |
| 173 expected_size = sum(v for k, v in result.section_sizes.iteritems() | 154 expected_size = sum(v for k, v in size_info.section_sizes.iteritems() |
| 174 if k.startswith('.data')) | 155 if k.startswith('.data')) |
| 175 else: | 156 else: |
| 176 expected_size = result.section_sizes[ | 157 expected_size = size_info.section_sizes[ |
| 177 symbols.SECTION_TO_SECTION_NAME[section]] | 158 models.SECTION_TO_SECTION_NAME[section]] |
| 178 | 159 |
| 179 def one_stat(group): | 160 def one_stat(group): |
| 180 template = ('Section %s has %.1f%% of %d bytes accounted for from ' | 161 template = ('Section %s has %.1f%% of %d bytes accounted for from ' |
| 181 '%d symbols. %d bytes are unaccounted for. Padding ' | 162 '%d symbols. %d bytes are unaccounted for. Padding ' |
| 182 'accounts for %d bytes\n') | 163 'accounts for %d bytes\n') |
| 183 actual_size = group.size | 164 actual_size = group.size_without_padding if group.IsBss() else group.size |
| 184 count = len(group) | 165 count = len(group) |
| 185 padding = group.padding | 166 padding = group.padding |
| 186 size_percent = 100.0 * actual_size / expected_size | 167 size_percent = 100.0 * actual_size / expected_size |
| 187 return (template % (section, size_percent, actual_size, count, | 168 return (template % (section, size_percent, actual_size, count, |
| 188 expected_size - actual_size, padding)) | 169 expected_size - actual_size, padding)) |
| 189 | 170 |
| 190 in_section = result.symbol_group.WhereInSection(section) | 171 in_section = size_info.symbols.WhereInSection(section) |
| 191 write_func(one_stat(in_section)) | 172 write_func(one_stat(in_section)) |
| 192 | 173 |
| 193 star_syms = in_section.WhereNameMatches(r'^\*') | 174 star_syms = in_section.WhereNameMatches(r'^\*') |
| 194 attributed_syms = star_syms.Inverted().WhereHasAnyAttribution() | 175 attributed_syms = star_syms.Inverted().WhereHasAnyAttribution() |
| 195 anonymous_syms = attributed_syms.Inverted() | 176 anonymous_syms = attributed_syms.Inverted() |
| 196 if star_syms or anonymous_syms: | 177 if star_syms or anonymous_syms: |
| 197 missing_size = star_syms.size + anonymous_syms.size | 178 missing_size = star_syms.size + anonymous_syms.size |
| 198 write_func(('+ Without %d merge sections and %d anonymous entries (' | 179 write_func(('+ Without %d merge sections and %d anonymous entries (' |
| 199 'accounting for %d bytes):\n') % ( | 180 'accounting for %d bytes):\n') % ( |
| 200 len(star_syms), len(anonymous_syms), missing_size)) | 181 len(star_syms), len(anonymous_syms), missing_size)) |
| 201 write_func('+ ' + one_stat(attributed_syms)) | 182 write_func('+ ' + one_stat(attributed_syms)) |
| 202 | 183 |
| 203 | 184 |
| 204 def _SaveResult(result, file_obj): | |
| 205 """Saves the result to the given file object.""" | |
| 206 # Store one bucket per line. | |
| 207 file_obj.write('%d\n' % _SERIALIZATION_VERSION) | |
| 208 file_obj.write('%r\n' % result.section_sizes) | |
| 209 file_obj.write('%d\n' % len(result.symbol_group)) | |
| 210 prev_section_name = None | |
| 211 # Store symbol fields as tab-separated. | |
| 212 # Store only non-derived fields. | |
| 213 for symbol in result.symbol_group: | |
| 214 if symbol.section_name != prev_section_name: | |
| 215 file_obj.write('%s\n' % symbol.section_name) | |
| 216 prev_section_name = symbol.section_name | |
| 217 # Don't write padding nor name since these are derived values. | |
| 218 file_obj.write('%x\t%x\t%s\t%s\n' % ( | |
| 219 symbol.address, symbol.size_without_padding, | |
| 220 symbol.function_signature or symbol.name or '', | |
| 221 symbol.path or '')) | |
| 222 | |
| 223 | |
| 224 def _LoadResults(file_obj): | |
| 225 """Loads a result from the given file.""" | |
| 226 lines = iter(file_obj) | |
| 227 actual_version = int(next(lines)) | |
| 228 assert actual_version == _SERIALIZATION_VERSION, ( | |
| 229 'Version mismatch. Need to write some upgrade code.') | |
| 230 | |
| 231 section_sizes = ast.literal_eval(next(lines)) | |
| 232 num_syms = int(next(lines)) | |
| 233 symbol_list = [None] * num_syms | |
| 234 section_name = None | |
| 235 for i in xrange(num_syms): | |
| 236 line = next(lines)[:-1] | |
| 237 if '\t' not in line: | |
| 238 section_name = intern(line) | |
| 239 line = next(lines)[:-1] | |
| 240 new_sym = symbols.Symbol.__new__(symbols.Symbol) | |
| 241 parts = line.split('\t') | |
| 242 new_sym.section_name = section_name | |
| 243 new_sym.address = int(parts[0], 16) | |
| 244 new_sym.size = int(parts[1], 16) | |
| 245 new_sym.name = parts[2] or None | |
| 246 new_sym.path = parts[3] or None | |
| 247 new_sym.padding = 0 # Derived | |
| 248 new_sym.function_signature = None # Derived | |
| 249 symbol_list[i] = new_sym | |
| 250 | |
| 251 # Recompute derived values (padding and function names). | |
| 252 result = mapfileparser.ParseResult(symbol_list, section_sizes) | |
| 253 logging.info('Calculating padding') | |
| 254 _RemoveDuplicatesAndCalculatePadding(result.symbol_group) | |
| 255 logging.info('Deriving signatures') | |
| 256 # Re-parse out function parameters. | |
| 257 _NormalizeNames(result.symbol_group.WhereInSection('t')) | |
| 258 return result | |
| 259 | |
| 260 | |
| 261 def AddOptions(parser): | 185 def AddOptions(parser): |
| 262 parser.add_argument('input_file', | |
| 263 help='Path to input file. Can be a linker .map file, an ' | |
| 264 'unstripped binary, or a saved result from ' | |
| 265 'analyze.py') | |
| 266 parser.add_argument('--tool-prefix', default='', | 186 parser.add_argument('--tool-prefix', default='', |
| 267 help='Path prefix for c++filt.') | 187 help='Path prefix for c++filt.') |
| 268 parser.add_argument('--output-directory', | 188 parser.add_argument('--output-directory', |
| 269 help='Path to the root build directory.') | 189 help='Path to the root build directory.') |
| 270 | 190 |
| 271 | 191 |
| 272 def _DetectToolPrefix(tool_prefix, input_file, output_directory=None): | 192 def _DetectToolPrefix(tool_prefix, input_file, output_directory=None): |
| 273 """Calls Analyze with values from args.""" | 193 """Calls Analyze with values from args.""" |
| 274 if not output_directory: | 194 if not output_directory: |
| 275 abs_path = os.path.abspath(input_file) | 195 abs_path = os.path.abspath(input_file) |
| (...skipping 16 matching lines...) Expand all Loading... |
| 292 full_path = distutils.spawn.find_executable(tool_prefix + 'c++filt') | 212 full_path = distutils.spawn.find_executable(tool_prefix + 'c++filt') |
| 293 else: | 213 else: |
| 294 full_path = tool_prefix + 'c++filt' | 214 full_path = tool_prefix + 'c++filt' |
| 295 | 215 |
| 296 if not os.path.isfile(full_path): | 216 if not os.path.isfile(full_path): |
| 297 raise Exception('Bad --tool-prefix. Path not found: %s' % full_path) | 217 raise Exception('Bad --tool-prefix. Path not found: %s' % full_path) |
| 298 logging.info('Using --tool-prefix=%s', tool_prefix) | 218 logging.info('Using --tool-prefix=%s', tool_prefix) |
| 299 return tool_prefix | 219 return tool_prefix |
| 300 | 220 |
| 301 | 221 |
| 302 def AnalyzeWithArgs(args): | 222 def AnalyzeWithArgs(args, input_path): |
| 303 return Analyze(args.input_file, args.output_directory, args.tool_prefix) | 223 return Analyze(input_path, args.output_directory, args.tool_prefix) |
| 304 | 224 |
| 305 | 225 |
| 306 def Analyze(path, output_directory=None, tool_prefix=''): | 226 def Analyze(path, output_directory=None, tool_prefix=''): |
| 307 if _EndsWithMaybeGz(path, '.size'): | 227 if file_format.EndsWithMaybeGz(path, '.size'): |
| 308 logging.info('Loading cached results.') | 228 logging.debug('Loading from .map file.') |
| 309 with _OpenMaybeGz(path) as f: | 229 size_info = file_format.LoadSizeInfo(path) |
| 310 result = _LoadResults(f) | 230 # Recompute derived values (padding and function names). |
| 311 elif not _EndsWithMaybeGz(path, '.map'): | 231 logging.info('Calculating padding') |
| 232 _RemoveDuplicatesAndCalculatePadding(size_info.symbols) |
| 233 logging.info('Deriving signatures') |
| 234 # Re-parse out function parameters. |
| 235 _NormalizeNames(size_info.symbols.WhereInSection('t')) |
| 236 return size_info |
| 237 elif not file_format.EndsWithMaybeGz(path, '.map'): |
| 312 raise Exception('Expected input to be a .map or a .size') | 238 raise Exception('Expected input to be a .map or a .size') |
| 313 else: | 239 else: |
| 314 # Verify tool_prefix early. | 240 # Verify tool_prefix early. |
| 315 tool_prefix = _DetectToolPrefix(tool_prefix, path, output_directory) | 241 tool_prefix = _DetectToolPrefix(tool_prefix, path, output_directory) |
| 316 | 242 |
| 317 with _OpenMaybeGz(path) as map_file: | 243 with file_format.OpenMaybeGz(path) as map_file: |
| 318 result = mapfileparser.MapFileParser().Parse(map_file) | 244 size_info = linker_map_parser.MapFileParser().Parse(map_file) |
| 319 | 245 |
| 320 # Map file for some reason doesn't unmangle all names. | 246 # Map file for some reason doesn't unmangle all names. |
| 321 logging.info('Calculating padding') | 247 logging.info('Calculating padding') |
| 322 _RemoveDuplicatesAndCalculatePadding(result.symbol_group) | 248 _RemoveDuplicatesAndCalculatePadding(size_info.symbols) |
| 323 # Unmangle prints its own log statement. | 249 # Unmangle prints its own log statement. |
| 324 _UnmangleRemainingSymbols(result.symbol_group, tool_prefix) | 250 _UnmangleRemainingSymbols(size_info.symbols, tool_prefix) |
| 325 # Resolve paths prints its own log statement. | 251 # Resolve paths prints its own log statement. |
| 326 logging.info('Normalizing names') | 252 logging.info('Normalizing names') |
| 327 _NormalizeNames(result.symbol_group) | 253 _NormalizeNames(size_info.symbols) |
| 328 logging.info('Normalizing paths') | 254 logging.info('Normalizing paths') |
| 329 _NormalizeObjectPaths(result.symbol_group) | 255 _NormalizeObjectPaths(size_info.symbols) |
| 330 | 256 |
| 331 if logging.getLogger().isEnabledFor(logging.INFO): | 257 if logging.getLogger().isEnabledFor(logging.INFO): |
| 332 _PrintStats(result, lambda l: logging.info(l.rstrip())) | 258 PrintStats(size_info, lambda l: logging.info(l.rstrip())) |
| 333 logging.info('Finished analyzing %d symbols', len(result.symbol_group)) | 259 logging.info('Finished analyzing %d symbols', len(size_info.symbols)) |
| 334 return result | 260 return size_info |
| 335 | 261 |
| 336 | 262 |
| 337 def main(): | 263 def main(argv): |
| 338 parser = argparse.ArgumentParser() | 264 parser = argparse.ArgumentParser(argv) |
| 339 parser.add_argument('--output', required=True, | 265 parser.add_argument('input_file', help='Path to input .map file.') |
| 340 help='Path to store results. Must end in .size or ' | 266 parser.add_argument('output_file', help='Path to output .size(.gz) file.') |
| 341 '.size.gz') | |
| 342 AddOptions(parser) | 267 AddOptions(parser) |
| 343 args = helpers.AddCommonOptionsAndParseArgs(parser) | 268 args = helpers.AddCommonOptionsAndParseArgs(parser, argv) |
| 344 if not _EndsWithMaybeGz(args.output, '.size'): | 269 if not file_format.EndsWithMaybeGz(args.output_file, '.size'): |
| 345 raise Exception('--output must end with .size or .size.gz') | 270 parser.error('output_file must end with .size or .size.gz') |
| 346 | 271 |
| 347 result = AnalyzeWithArgs(args) | 272 size_info = AnalyzeWithArgs(args, args.input_file) |
| 348 logging.info('Saving result to %s', args.output) | 273 logging.info('Saving result to %s', args.output_file) |
| 349 with _OpenMaybeGz(args.output, 'wb') as f: | 274 file_format.SaveSizeInfo(size_info, args.output_file) |
| 350 _SaveResult(result, f) | |
| 351 | 275 |
| 352 logging.info('Done') | 276 logging.info('Done') |
| 353 | 277 |
| 354 | 278 |
| 355 if __name__ == '__main__': | 279 if __name__ == '__main__': |
| 356 main() | 280 sys.exit(main(sys.argv)) |
| OLD | NEW |