Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 # Copyright 2017 The Chromium Authors. All rights reserved. | 2 # Copyright 2017 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
| 5 | 5 |
| 6 """Main Python API for analyzing binary size.""" | 6 """Main Python API for analyzing binary size.""" |
| 7 | 7 |
| 8 import argparse | 8 import argparse |
| 9 import calendar | |
| 9 import datetime | 10 import datetime |
| 10 import distutils.spawn | |
| 11 import gzip | 11 import gzip |
| 12 import logging | 12 import logging |
| 13 import os | 13 import os |
| 14 import re | 14 import re |
| 15 import subprocess | 15 import subprocess |
| 16 import sys | 16 import sys |
| 17 | 17 |
| 18 import describe | 18 import describe |
| 19 import file_format | 19 import file_format |
| 20 import function_signature | 20 import function_signature |
| 21 import helpers | 21 import helpers |
| 22 import linker_map_parser | 22 import linker_map_parser |
| 23 import models | 23 import models |
| 24 import ninja_parser | 24 import ninja_parser |
| 25 import paths | |
| 25 | 26 |
| 26 | 27 |
| 27 def _OpenMaybeGz(path, mode=None): | 28 def _OpenMaybeGz(path, mode=None): |
| 28 """Calls `gzip.open()` if |path| ends in ".gz", otherwise calls `open()`.""" | 29 """Calls `gzip.open()` if |path| ends in ".gz", otherwise calls `open()`.""" |
| 29 if path.endswith('.gz'): | 30 if path.endswith('.gz'): |
| 30 if mode and 'w' in mode: | 31 if mode and 'w' in mode: |
| 31 return gzip.GzipFile(path, mode, 1) | 32 return gzip.GzipFile(path, mode, 1) |
| 32 return gzip.open(path, mode) | 33 return gzip.open(path, mode) |
| 33 return open(path, mode or 'r') | 34 return open(path, mode or 'r') |
| 34 | 35 |
| (...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 123 if path.startswith('gen/'): | 124 if path.startswith('gen/'): |
| 124 # Convert gen/third_party/... -> third_party/... | 125 # Convert gen/third_party/... -> third_party/... |
| 125 return path[4:] | 126 return path[4:] |
| 126 if path.startswith('../../'): | 127 if path.startswith('../../'): |
| 127 # Convert ../../third_party/... -> third_party/... | 128 # Convert ../../third_party/... -> third_party/... |
| 128 return path[6:] | 129 return path[6:] |
| 129 return path | 130 return path |
| 130 | 131 |
| 131 | 132 |
| 132 def _ExtractSourcePaths(symbol_group, output_directory): | 133 def _ExtractSourcePaths(symbol_group, output_directory): |
| 133 """Fills in the .source_path attribute of all symbols.""" | 134 """Fills in the .source_path attribute of all symbols. |
| 135 | |
| 136 Returns True if source paths were found. | |
| 137 """ | |
| 138 all_found = True | |
| 134 mapper = ninja_parser.SourceFileMapper(output_directory) | 139 mapper = ninja_parser.SourceFileMapper(output_directory) |
| 135 | 140 |
| 136 for symbol in symbol_group: | 141 for symbol in symbol_group: |
| 137 object_path = symbol.object_path | 142 object_path = symbol.object_path |
| 138 if symbol.source_path or not object_path: | 143 if symbol.source_path or not object_path: |
| 139 continue | 144 continue |
| 140 # We don't have source info for prebuilt .a files. | 145 # We don't have source info for prebuilt .a files. |
| 141 if not object_path.startswith('..'): | 146 if not object_path.startswith('..'): |
| 142 source_path = mapper.FindSourceForPath(object_path) | 147 source_path = mapper.FindSourceForPath(object_path) |
| 143 if source_path: | 148 if source_path: |
| 144 symbol.source_path = _NormalizeSourcePath(source_path) | 149 symbol.source_path = _NormalizeSourcePath(source_path) |
| 145 else: | 150 else: |
| 151 all_found = False | |
| 146 logging.warning('Could not find source path for %s', object_path) | 152 logging.warning('Could not find source path for %s', object_path) |
| 147 logging.debug('Parsed %d .ninja files.', mapper.GetParsedFileCount()) | 153 logging.debug('Parsed %d .ninja files.', mapper.GetParsedFileCount()) |
| 154 return all_found | |
| 148 | 155 |
| 149 | 156 |
| 150 def _RemoveDuplicatesAndCalculatePadding(symbol_group): | 157 def _RemoveDuplicatesAndCalculatePadding(symbol_group): |
| 151 """Removes symbols at the same address and calculates the |padding| field. | 158 """Removes symbols at the same address and calculates the |padding| field. |
| 152 | 159 |
| 153 Symbols must already be sorted by |address|. | 160 Symbols must already be sorted by |address|. |
| 154 """ | 161 """ |
| 155 to_remove = [] | 162 to_remove = [] |
| 156 seen_sections = [] | 163 seen_sections = [] |
| 157 for i, symbol in enumerate(symbol_group[1:]): | 164 for i, symbol in enumerate(symbol_group[1:]): |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 192 symbol.size += padding | 199 symbol.size += padding |
| 193 assert symbol.size >= 0, ( | 200 assert symbol.size >= 0, ( |
| 194 'Symbol has negative size (likely not sorted propertly): ' | 201 'Symbol has negative size (likely not sorted propertly): ' |
| 195 '%r\nprev symbol: %r' % (symbol, prev_symbol)) | 202 '%r\nprev symbol: %r' % (symbol, prev_symbol)) |
| 196 # Map files have no overlaps, so worth special-casing the no-op case. | 203 # Map files have no overlaps, so worth special-casing the no-op case. |
| 197 if to_remove: | 204 if to_remove: |
| 198 logging.info('Removing %d overlapping symbols', len(to_remove)) | 205 logging.info('Removing %d overlapping symbols', len(to_remove)) |
| 199 symbol_group -= models.SymbolGroup(to_remove) | 206 symbol_group -= models.SymbolGroup(to_remove) |
| 200 | 207 |
| 201 | 208 |
| 202 def AddOptions(parser): | 209 def Analyze(path, lazy_paths=None): |
| 203 parser.add_argument('--tool-prefix', default='', | 210 """Returns a SizeInfo for the given |path|. |
| 204 help='Path prefix for c++filt.') | |
| 205 parser.add_argument('--output-directory', | |
| 206 help='Path to the root build directory.') | |
| 207 | 211 |
| 208 | 212 Args: |
| 209 def _DetectToolPrefix(tool_prefix, input_file, output_directory=None): | 213 path: Can be a .size file, or a .map(.gz). If the latter, then lazy_paths |
| 210 """Detects values for --tool-prefix and --output-directory.""" | 214 must be provided as well. |
| 211 if not output_directory: | 215 """ |
| 212 abs_path = os.path.abspath(input_file) | |
| 213 release_idx = abs_path.find('Release') | |
| 214 if release_idx != -1: | |
| 215 output_directory = abs_path[:release_idx] + 'Release' | |
| 216 output_directory = os.path.relpath(abs_path[:release_idx] + '/Release') | |
| 217 logging.debug('Detected --output-directory=%s', output_directory) | |
| 218 | |
| 219 if not tool_prefix and output_directory: | |
| 220 # Auto-detect from build_vars.txt | |
| 221 build_vars_path = os.path.join(output_directory, 'build_vars.txt') | |
| 222 if os.path.exists(build_vars_path): | |
| 223 with open(build_vars_path) as f: | |
| 224 build_vars = dict(l.rstrip().split('=', 1) for l in f if '=' in l) | |
| 225 logging.debug('Found --tool-prefix from build_vars.txt') | |
| 226 tool_prefix = os.path.join(output_directory, | |
| 227 build_vars['android_tool_prefix']) | |
| 228 | |
| 229 if os.path.sep not in tool_prefix: | |
| 230 full_path = distutils.spawn.find_executable(tool_prefix + 'c++filt') | |
| 231 else: | |
| 232 full_path = tool_prefix + 'c++filt' | |
| 233 | |
| 234 if not full_path or not os.path.isfile(full_path): | |
| 235 raise Exception('Bad --tool-prefix. Path not found: %s' % full_path) | |
| 236 if not output_directory or not os.path.isdir(output_directory): | |
| 237 raise Exception('Bad --output-directory. Path not found: %s' % | |
| 238 output_directory) | |
| 239 logging.info('Using --output-directory=%s', output_directory) | |
| 240 logging.info('Using --tool-prefix=%s', tool_prefix) | |
| 241 return output_directory, tool_prefix | |
| 242 | |
| 243 | |
| 244 def AnalyzeWithArgs(args, input_path): | |
| 245 return Analyze(input_path, args.output_directory, args.tool_prefix) | |
| 246 | |
| 247 | |
| 248 def Analyze(path, output_directory=None, tool_prefix=''): | |
| 249 if path.endswith('.size'): | 216 if path.endswith('.size'): |
| 250 logging.debug('Loading results from: %s', path) | 217 logging.debug('Loading results from: %s', path) |
| 251 size_info = file_format.LoadSizeInfo(path) | 218 size_info = file_format.LoadSizeInfo(path) |
| 252 # Recompute derived values (padding and function names). | 219 # Recompute derived values (padding and function names). |
| 253 logging.info('Calculating padding') | 220 logging.info('Calculating padding') |
| 254 _RemoveDuplicatesAndCalculatePadding(size_info.symbols) | 221 _RemoveDuplicatesAndCalculatePadding(size_info.symbols) |
| 255 logging.info('Deriving signatures') | 222 logging.info('Deriving signatures') |
| 256 # Re-parse out function parameters. | 223 # Re-parse out function parameters. |
| 257 _NormalizeNames(size_info.symbols) | 224 _NormalizeNames(size_info.symbols) |
| 258 return size_info | 225 return size_info |
| 259 elif not path.endswith('.map') and not path.endswith('.map.gz'): | 226 elif not path.endswith('.map') and not path.endswith('.map.gz'): |
| 260 raise Exception('Expected input to be a .map or a .size') | 227 raise Exception('Expected input to be a .map or a .size') |
| 261 else: | 228 else: |
| 262 # Verify tool_prefix early. | 229 # Verify tool_prefix early. |
|
estevenson
2017/04/06 14:14:01
nit: remove this comment?
agrieve
2017/04/06 14:53:54
Done.
| |
| 263 output_directory, tool_prefix = ( | 230 # output_directory needed for source file information. |
| 264 _DetectToolPrefix(tool_prefix, path, output_directory)) | 231 lazy_paths.VerifyOutputDirectory() |
| 232 # tool_prefix needed for c++filt. | |
| 233 lazy_paths.VerifyToolPrefix() | |
| 265 | 234 |
| 266 with _OpenMaybeGz(path) as map_file: | 235 with _OpenMaybeGz(path) as map_file: |
| 267 section_sizes, symbols = linker_map_parser.MapFileParser().Parse(map_file) | 236 section_sizes, symbols = linker_map_parser.MapFileParser().Parse(map_file) |
| 268 timestamp = datetime.datetime.utcfromtimestamp(os.path.getmtime(path)) | 237 size_info = models.SizeInfo(section_sizes, models.SymbolGroup(symbols)) |
| 269 size_info = models.SizeInfo(section_sizes, models.SymbolGroup(symbols), | |
| 270 timestamp=timestamp) | |
| 271 | 238 |
| 272 # Map file for some reason doesn't unmangle all names. | 239 # Map file for some reason doesn't unmangle all names. |
| 273 logging.info('Calculating padding') | 240 logging.info('Calculating padding') |
| 274 _RemoveDuplicatesAndCalculatePadding(size_info.symbols) | 241 _RemoveDuplicatesAndCalculatePadding(size_info.symbols) |
| 275 # Unmangle prints its own log statement. | 242 # Unmangle prints its own log statement. |
| 276 _UnmangleRemainingSymbols(size_info.symbols, tool_prefix) | 243 _UnmangleRemainingSymbols(size_info.symbols, lazy_paths.tool_prefix) |
| 277 logging.info('Extracting source paths from .ninja files') | 244 logging.info('Extracting source paths from .ninja files') |
| 278 _ExtractSourcePaths(size_info.symbols, output_directory) | 245 all_found = _ExtractSourcePaths(size_info.symbols, |
| 246 lazy_paths.output_directory) | |
| 247 assert all_found, ( | |
| 248 'One or more source file paths could not be found. Likely caused by ' | |
| 249 '.ninja files being generated at a different time than the .map file.') | |
| 279 # Resolve paths prints its own log statement. | 250 # Resolve paths prints its own log statement. |
| 280 logging.info('Normalizing names') | 251 logging.info('Normalizing names') |
| 281 _NormalizeNames(size_info.symbols) | 252 _NormalizeNames(size_info.symbols) |
| 282 logging.info('Normalizing paths') | 253 logging.info('Normalizing paths') |
| 283 _NormalizeObjectPaths(size_info.symbols) | 254 _NormalizeObjectPaths(size_info.symbols) |
| 284 | 255 |
| 285 if logging.getLogger().isEnabledFor(logging.INFO): | 256 if logging.getLogger().isEnabledFor(logging.INFO): |
| 286 for line in describe.DescribeSizeInfoCoverage(size_info): | 257 for line in describe.DescribeSizeInfoCoverage(size_info): |
| 287 logging.info(line) | 258 logging.info(line) |
| 288 logging.info('Finished analyzing %d symbols', len(size_info.symbols)) | 259 logging.info('Finished analyzing %d symbols', len(size_info.symbols)) |
| 289 return size_info | 260 return size_info |
| 290 | 261 |
| 291 | 262 |
| 292 def _DetectGitRevision(path): | 263 def _DetectGitRevision(directory): |
| 293 try: | 264 try: |
| 294 git_rev = subprocess.check_output( | 265 git_rev = subprocess.check_output( |
| 295 ['git', '-C', os.path.dirname(path), 'rev-parse', 'HEAD']) | 266 ['git', '-C', directory, 'rev-parse', 'HEAD']) |
| 296 return git_rev.rstrip() | 267 return git_rev.rstrip() |
| 297 except Exception: | 268 except Exception: |
| 298 logging.warning('Failed to detect git revision for file metadata.') | 269 logging.warning('Failed to detect git revision for file metadata.') |
| 299 return None | 270 return None |
| 300 | 271 |
| 301 | 272 |
| 273 def BuildIdFromElf(elf_path, tool_prefix): | |
| 274 args = [tool_prefix + 'readelf', '-n', elf_path] | |
| 275 stdout = subprocess.check_output(args) | |
| 276 match = re.search(r'Build ID: (\w+)', stdout) | |
| 277 assert match, 'Build ID not found from running: ' + ' '.join(args) | |
| 278 return match.group(1) | |
| 279 | |
| 280 | |
| 281 def _SectionSizesFromElf(elf_path, tool_prefix): | |
| 282 args = [tool_prefix + 'readelf', '-S', '--wide', elf_path] | |
| 283 stdout = subprocess.check_output(args) | |
| 284 section_sizes = {} | |
| 285 # Matches [ 2] .hash HASH 00000000006681f0 0001f0 003154 04 A 3 0 8 | |
| 286 for match in re.finditer(r'\[[\s\d]+\] (\..*)$', stdout, re.MULTILINE): | |
| 287 items = match.group(1).split() | |
| 288 section_sizes[items[0]] = int(items[4], 16) | |
| 289 return section_sizes | |
| 290 | |
| 291 | |
| 302 def main(argv): | 292 def main(argv): |
| 303 parser = argparse.ArgumentParser(argv) | 293 parser = argparse.ArgumentParser(argv) |
| 304 parser.add_argument('input_file', help='Path to input .map file.') | 294 parser.add_argument('elf_file', help='Path to input ELF file.') |
| 305 parser.add_argument('output_file', help='Path to output .size(.gz) file.') | 295 parser.add_argument('output_file', help='Path to output .size(.gz) file.') |
| 306 AddOptions(parser) | 296 parser.add_argument('--map-file', |
| 297 help='Path to input .map(.gz) file. Defaults to ' | |
| 298 '{{elf_file}}.map(.gz)?') | |
| 299 paths.AddOptions(parser) | |
| 307 args = helpers.AddCommonOptionsAndParseArgs(parser, argv) | 300 args = helpers.AddCommonOptionsAndParseArgs(parser, argv) |
| 308 if not args.output_file.endswith('.size'): | 301 if not args.output_file.endswith('.size'): |
| 309 parser.error('output_file must end with .size') | 302 parser.error('output_file must end with .size') |
| 310 | 303 |
| 311 size_info = AnalyzeWithArgs(args, args.input_file) | 304 if args.map_file: |
| 312 if not args.input_file.endswith('.size'): | 305 map_file_path = args.map_file |
| 313 git_rev = _DetectGitRevision(args.input_file) | 306 elif args.elf_file.endswith('.size'): |
| 314 size_info.tag = 'Filename=%s git_rev=%s' % ( | 307 # Allow a .size file to be passed as input as well. Useful for measuring |
| 315 os.path.basename(args.input_file), git_rev) | 308 # serialization speed. |
| 309 pass | |
| 310 else: | |
| 311 map_file_path = args.elf_file + '.map' | |
| 312 if not os.path.exists(map_file_path): | |
| 313 map_file_path += '.gz' | |
| 314 if not os.path.exists(map_file_path): | |
| 315 parser.error('Could not find .map(.gz)? file. Use --map-file.') | |
| 316 | |
| 317 lazy_paths = paths.LazyPaths(args=args, input_file=args.elf_file) | |
| 318 metadata = None | |
| 319 if args.elf_file and not args.elf_file.endswith('.size'): | |
| 320 logging.debug('Constructing metadata') | |
| 321 git_rev = _DetectGitRevision(os.path.dirname(args.elf_file)) | |
| 322 build_id = BuildIdFromElf(args.elf_file, lazy_paths.tool_prefix) | |
| 323 timestamp_obj = datetime.datetime.utcfromtimestamp(os.path.getmtime( | |
| 324 args.elf_file)) | |
| 325 timestamp = calendar.timegm(timestamp_obj.timetuple()) | |
| 326 | |
| 327 def relative_to_out(path): | |
|
estevenson
2017/04/06 14:14:01
Is this the preferred naming convention for inner
agrieve
2017/04/06 14:53:54
I believe so. I say this only because that's what
| |
| 328 return os.path.relpath(path, lazy_paths.VerifyOutputDirectory()) | |
| 329 | |
| 330 metadata = { | |
| 331 models.METADATA_GIT_REVISION: git_rev, | |
| 332 models.METADATA_MAP_FILENAME: relative_to_out(map_file_path), | |
| 333 models.METADATA_ELF_FILENAME: relative_to_out(args.elf_file), | |
| 334 models.METADATA_ELF_MTIME: timestamp, | |
| 335 models.METADATA_ELF_BUILD_ID: build_id, | |
| 336 } | |
| 337 | |
| 338 size_info = Analyze(map_file_path, lazy_paths) | |
| 339 | |
| 340 if metadata: | |
| 341 logging.debug('Validating section sizes') | |
| 342 elf_section_sizes = _SectionSizesFromElf(args.elf_file, | |
| 343 lazy_paths.tool_prefix) | |
| 344 for k, v in elf_section_sizes.iteritems(): | |
| 345 assert v == size_info.section_sizes.get(k), ( | |
| 346 'ELF file and .map file do not match.') | |
| 347 | |
| 348 size_info.metadata = metadata | |
| 349 | |
| 316 logging.info('Recording metadata: %s', | 350 logging.info('Recording metadata: %s', |
| 317 describe.DescribeSizeInfoMetadata(size_info)) | 351 describe.DescribeSizeInfoMetadata(size_info)) |
| 318 logging.info('Saving result to %s', args.output_file) | 352 logging.info('Saving result to %s', args.output_file) |
| 319 file_format.SaveSizeInfo(size_info, args.output_file) | 353 file_format.SaveSizeInfo(size_info, args.output_file) |
| 320 | |
| 321 logging.info('Done') | 354 logging.info('Done') |
| 322 | 355 |
| 323 | 356 |
| 324 if __name__ == '__main__': | 357 if __name__ == '__main__': |
| 325 sys.exit(main(sys.argv)) | 358 sys.exit(main(sys.argv)) |
| OLD | NEW |