| OLD | NEW |
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 # Copyright 2017 The Chromium Authors. All rights reserved. | 2 # Copyright 2017 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
| 5 | 5 |
| 6 """Main Python API for analyzing binary size.""" | 6 """Main Python API for analyzing binary size.""" |
| 7 | 7 |
| 8 import argparse | 8 import argparse |
| 9 import distutils.spawn | 9 import distutils.spawn |
| 10 import gzip |
| 10 import logging | 11 import logging |
| 11 import os | 12 import os |
| 13 import re |
| 12 import subprocess | 14 import subprocess |
| 13 import sys | 15 import sys |
| 14 | 16 |
| 15 import describe | 17 import describe |
| 16 import file_format | 18 import file_format |
| 17 import function_signature | 19 import function_signature |
| 18 import helpers | 20 import helpers |
| 19 import linker_map_parser | 21 import linker_map_parser |
| 20 import models | 22 import models |
| 23 import ninja_parser |
| 21 | 24 |
| 22 | 25 |
| 23 def _IterLines(s): | 26 def _OpenMaybeGz(path, mode=None): |
| 24 prev_idx = -1 | 27 """Calls `gzip.open()` if |path| ends in ".gz", otherwise calls `open()`.""" |
| 25 while True: | 28 if path.endswith('.gz'): |
| 26 idx = s.find('\n', prev_idx + 1) | 29 if mode and 'w' in mode: |
| 27 if idx == -1: | 30 return gzip.GzipFile(path, mode, 1) |
| 28 return | 31 return gzip.open(path, mode) |
| 29 yield s[prev_idx + 1:idx] | 32 return open(path, mode or 'r') |
| 30 prev_idx = idx | |
| 31 | 33 |
| 32 | 34 |
| 33 def _UnmangleRemainingSymbols(symbol_group, tool_prefix): | 35 def _UnmangleRemainingSymbols(symbol_group, tool_prefix): |
| 34 """Uses c++filt to unmangle any symbols that need it.""" | 36 """Uses c++filt to unmangle any symbols that need it.""" |
| 35 to_process = [s for s in symbol_group if s.name.startswith('_Z')] | 37 to_process = [s for s in symbol_group if s.name.startswith('_Z')] |
| 36 if not to_process: | 38 if not to_process: |
| 37 return | 39 return |
| 38 | 40 |
| 39 logging.info('Unmangling %d names', len(to_process)) | 41 logging.info('Unmangling %d names', len(to_process)) |
| 40 proc = subprocess.Popen([tool_prefix + 'c++filt'], stdin=subprocess.PIPE, | 42 proc = subprocess.Popen([tool_prefix + 'c++filt'], stdin=subprocess.PIPE, |
| 41 stdout=subprocess.PIPE) | 43 stdout=subprocess.PIPE) |
| 42 stdout = proc.communicate('\n'.join(s.name for s in to_process))[0] | 44 stdout = proc.communicate('\n'.join(s.name for s in to_process))[0] |
| 43 assert proc.returncode == 0 | 45 assert proc.returncode == 0 |
| 44 | 46 |
| 45 for i, line in enumerate(_IterLines(stdout)): | 47 for i, line in enumerate(stdout.splitlines()): |
| 46 to_process[i].name = line | 48 to_process[i].name = line |
| 47 | 49 |
| 48 | 50 |
| 49 def _NormalizeNames(symbol_group): | 51 def _NormalizeNames(symbol_group): |
| 50 """Ensures that all names are formatted in a useful way. | 52 """Ensures that all names are formatted in a useful way. |
| 51 | 53 |
| 52 This includes: | 54 This includes: |
| 53 - Assigning of |function_signature| (for functions). | 55 - Assigning of |full_name|. |
| 54 - Stripping of return types in |function_signature| and |name|. | 56 - Stripping of return types in |full_name| and |name| (for functions). |
| 55 - Stripping parameters from |name|. | 57 - Stripping parameters from |name|. |
| 56 - Moving "vtable for" and the like to be suffixes rather than prefixes. | 58 - Moving "vtable for" and the like to be suffixes rather than prefixes. |
| 57 """ | 59 """ |
| 58 found_prefixes = set() | 60 found_prefixes = set() |
| 59 for symbol in symbol_group: | 61 for symbol in symbol_group: |
| 60 if symbol.name.startswith('*'): | 62 if symbol.name.startswith('*'): |
| 61 # See comment in _RemoveDuplicatesAndCalculatePadding() about when this | 63 # See comment in _RemoveDuplicatesAndCalculatePadding() about when this |
| 62 # can happen. | 64 # can happen. |
| 63 continue | 65 continue |
| 64 | 66 |
| 65 # E.g.: vtable for FOO | 67 # E.g.: vtable for FOO |
| 66 idx = symbol.name.find(' for ', 0, 30) | 68 idx = symbol.name.find(' for ', 0, 30) |
| 67 if idx != -1: | 69 if idx != -1: |
| 68 found_prefixes.add(symbol.name[:idx + 4]) | 70 found_prefixes.add(symbol.name[:idx + 4]) |
| 69 symbol.name = symbol.name[idx + 5:] + ' [' + symbol.name[:idx] + ']' | 71 symbol.name = symbol.name[idx + 5:] + ' [' + symbol.name[:idx] + ']' |
| 70 | 72 |
| 71 # E.g.: virtual thunk to FOO | 73 # E.g.: virtual thunk to FOO |
| 72 idx = symbol.name.find(' to ', 0, 30) | 74 idx = symbol.name.find(' to ', 0, 30) |
| 73 if idx != -1: | 75 if idx != -1: |
| 74 found_prefixes.add(symbol.name[:idx + 3]) | 76 found_prefixes.add(symbol.name[:idx + 3]) |
| 75 symbol.name = symbol.name[idx + 4:] + ' [' + symbol.name[:idx] + ']' | 77 symbol.name = symbol.name[idx + 4:] + ' [' + symbol.name[:idx] + ']' |
| 76 | 78 |
| 77 # Strip out return type, and identify where parameter list starts. | 79 # Strip out return type, and identify where parameter list starts. |
| 78 if symbol.section == 't': | 80 if symbol.section == 't': |
| 79 symbol.function_signature, symbol.name = ( | 81 symbol.full_name, symbol.name = function_signature.Parse(symbol.name) |
| 80 function_signature.Parse(symbol.name)) | |
| 81 | 82 |
| 82 # Remove anonymous namespaces (they just harm clustering). | 83 # Remove anonymous namespaces (they just harm clustering). |
| 83 symbol.name = symbol.name.replace('(anonymous namespace)::', '') | 84 non_anonymous = symbol.name.replace('(anonymous namespace)::', '') |
| 85 if symbol.name != non_anonymous: |
| 86 symbol.is_anonymous = True |
| 87 symbol.name = non_anonymous |
| 88 symbol.full_name = symbol.full_name.replace( |
| 89 '(anonymous namespace)::', '') |
| 90 |
| 91 if symbol.section != 't' and '(' in symbol.name: |
| 92 # Pretty rare. Example: |
| 93 # blink::CSSValueKeywordsHash::findValueImpl(char const*)::value_word_list |
| 94 symbol.full_name = symbol.name |
| 95 symbol.name = re.sub(r'\(.*\)', '', symbol.full_name) |
| 84 | 96 |
| 85 logging.debug('Found name prefixes of: %r', found_prefixes) | 97 logging.debug('Found name prefixes of: %r', found_prefixes) |
| 86 | 98 |
| 87 | 99 |
| 88 def _NormalizeObjectPaths(symbol_group): | 100 def _NormalizeObjectPaths(symbol_group): |
| 89 """Ensures that all paths are formatted in a useful way.""" | 101 """Ensures that all paths are formatted in a useful way.""" |
| 90 for symbol in symbol_group: | 102 for symbol in symbol_group: |
| 91 if symbol.path.startswith('obj/'): | 103 path = symbol.object_path |
| 104 if path.startswith('obj/'): |
| 92 # Convert obj/third_party/... -> third_party/... | 105 # Convert obj/third_party/... -> third_party/... |
| 93 symbol.path = symbol.path[4:] | 106 path = path[4:] |
| 94 elif symbol.path.startswith('../../'): | 107 elif path.startswith('../../'): |
| 95 # Convert ../../third_party/... -> third_party/... | 108 # Convert ../../third_party/... -> third_party/... |
| 96 symbol.path = symbol.path[6:] | 109 path = path[6:] |
| 97 if symbol.path.endswith(')'): | 110 if path.endswith(')'): |
| 98 # Convert foo/bar.a(baz.o) -> foo/bar.a/baz.o | 111 # Convert foo/bar.a(baz.o) -> foo/bar.a/(baz.o) |
| 99 start_idx = symbol.path.index('(') | 112 start_idx = path.index('(') |
| 100 paren_path = symbol.path[start_idx + 1:-1] | 113 path = os.path.join(path[:start_idx], path[start_idx:]) |
| 101 symbol.path = symbol.path[:start_idx] + os.path.sep + paren_path | 114 symbol.object_path = path |
| 115 |
| 116 |
| 117 def _NormalizeSourcePath(path): |
| 118 if path.startswith('gen/'): |
| 119 # Convert gen/third_party/... -> third_party/... |
| 120 return path[4:] |
| 121 if path.startswith('../../'): |
| 122 # Convert ../../third_party/... -> third_party/... |
| 123 return path[6:] |
| 124 return path |
| 125 |
| 126 |
| 127 def _ExtractSourcePaths(symbol_group, output_directory): |
| 128 """Fills in the .source_path attribute of all symbols.""" |
| 129 mapper = ninja_parser.SourceFileMapper(output_directory) |
| 130 |
| 131 for symbol in symbol_group: |
| 132 object_path = symbol.object_path |
| 133 if symbol.source_path or not object_path: |
| 134 continue |
| 135 # We don't have source info for prebuilt .a files. |
| 136 if not object_path.startswith('..'): |
| 137 source_path = mapper.FindSourceForPath(object_path) |
| 138 if source_path: |
| 139 symbol.source_path = _NormalizeSourcePath(source_path) |
| 140 else: |
| 141 logging.warning('Could not find source path for %s', object_path) |
| 142 logging.debug('Parsed %d .ninja files.', mapper.GetParsedFileCount()) |
| 102 | 143 |
| 103 | 144 |
| 104 def _RemoveDuplicatesAndCalculatePadding(symbol_group): | 145 def _RemoveDuplicatesAndCalculatePadding(symbol_group): |
| 105 """Removes symbols at the same address and calculates the |padding| field. | 146 """Removes symbols at the same address and calculates the |padding| field. |
| 106 | 147 |
| 107 Symbols must already be sorted by |address|. | 148 Symbols must already be sorted by |address|. |
| 108 """ | 149 """ |
| 109 i = 0 | |
| 110 to_remove = set() | 150 to_remove = set() |
| 111 all_symbols = symbol_group.symbols | 151 all_symbols = symbol_group.symbols |
| 112 for i in xrange(len(all_symbols)): | 152 for i, symbol in enumerate(all_symbols[1:]): |
| 113 prev_symbol = all_symbols[i - 1] | 153 prev_symbol = all_symbols[i] |
| 114 symbol = all_symbols[i] | |
| 115 if prev_symbol.section_name != symbol.section_name: | 154 if prev_symbol.section_name != symbol.section_name: |
| 116 continue | 155 continue |
| 117 if symbol.address > 0 and prev_symbol.address > 0: | 156 if symbol.address > 0 and prev_symbol.address > 0: |
| 118 # Fold symbols that are at the same address (happens in nm output). | 157 # Fold symbols that are at the same address (happens in nm output). |
| 119 if symbol.address == prev_symbol.address: | 158 if symbol.address == prev_symbol.address: |
| 120 symbol.size = max(prev_symbol.size, symbol.size) | 159 symbol.size = max(prev_symbol.size, symbol.size) |
| 121 to_remove.add(i) | 160 to_remove.add(i + 1) |
| 122 continue | 161 continue |
| 123 # Even with symbols at the same address removed, overlaps can still | 162 # Even with symbols at the same address removed, overlaps can still |
| 124 # happen. In this case, padding will be negative (and this is fine). | 163 # happen. In this case, padding will be negative (and this is fine). |
| 125 padding = symbol.address - prev_symbol.end_address | 164 padding = symbol.address - prev_symbol.end_address |
| 126 # These thresholds were found by manually auditing arm32 Chrome. | 165 # These thresholds were found by manually auditing arm32 Chrome. |
| 127 # E.g.: Set them to 0 and see what warnings get logged. | 166 # E.g.: Set them to 0 and see what warnings get logged. |
| 128 # TODO(agrieve): See if these thresholds make sense for architectures | 167 # TODO(agrieve): See if these thresholds make sense for architectures |
| 129 # other than arm32. | 168 # other than arm32. |
| 130 if (symbol.section in 'rd' and padding >= 256 or | 169 if (symbol.section in 'rd' and padding >= 256 or |
| 131 symbol.section in 't' and padding >= 64): | 170 symbol.section in 't' and padding >= 64): |
| 132 # For nm data, this is caused by data that has no associated symbol. | 171 # For nm data, this is caused by data that has no associated symbol. |
| 133 # The linker map file lists them with no name, but with a file. | 172 # The linker map file lists them with no name, but with a file. |
| 134 # Example: | 173 # Example: |
| 135 # .data 0x02d42764 0x120 .../V8SharedWorkerGlobalScope.o | 174 # .data 0x02d42764 0x120 .../V8SharedWorkerGlobalScope.o |
| 136 # Where as most look like: | 175 # Where as most look like: |
| 137 # .data.MANGLED_NAME... | 176 # .data.MANGLED_NAME... |
| 138 logging.debug('Large padding of %d between:\n A) %r\n B) %r' % ( | 177 logging.debug('Large padding of %d between:\n A) %r\n B) %r' % ( |
| 139 padding, prev_symbol, symbol)) | 178 padding, prev_symbol, symbol)) |
| 140 continue | 179 continue |
| 141 symbol.padding = padding | 180 symbol.padding = padding |
| 142 symbol.size += padding | 181 symbol.size += padding |
| 143 assert symbol.size >= 0, 'Symbol has negative size: %r' % symbol | 182 assert symbol.size >= 0, 'Symbol has negative size: ' + ( |
| 183 '%r\nprev symbol: %r' % (symbol, prev_symbol)) |
| 144 # Map files have no overlaps, so worth special-casing the no-op case. | 184 # Map files have no overlaps, so worth special-casing the no-op case. |
| 145 if to_remove: | 185 if to_remove: |
| 146 logging.info('Removing %d overlapping symbols', len(to_remove)) | 186 logging.info('Removing %d overlapping symbols', len(to_remove)) |
| 147 symbol_group.symbols = ( | 187 symbol_group.symbols = ( |
| 148 [s for i, s in enumerate(all_symbols) if i not in to_remove]) | 188 [s for i, s in enumerate(all_symbols) if i not in to_remove]) |
| 149 | 189 |
| 150 | 190 |
| 151 def AddOptions(parser): | 191 def AddOptions(parser): |
| 152 parser.add_argument('--tool-prefix', default='', | 192 parser.add_argument('--tool-prefix', default='', |
| 153 help='Path prefix for c++filt.') | 193 help='Path prefix for c++filt.') |
| (...skipping 19 matching lines...) Expand all Loading... |
| 173 build_vars = dict(l.rstrip().split('=', 1) for l in f if '=' in l) | 213 build_vars = dict(l.rstrip().split('=', 1) for l in f if '=' in l) |
| 174 logging.debug('Found --tool-prefix from build_vars.txt') | 214 logging.debug('Found --tool-prefix from build_vars.txt') |
| 175 tool_prefix = os.path.join(output_directory, | 215 tool_prefix = os.path.join(output_directory, |
| 176 build_vars['android_tool_prefix']) | 216 build_vars['android_tool_prefix']) |
| 177 | 217 |
| 178 if os.path.sep not in tool_prefix: | 218 if os.path.sep not in tool_prefix: |
| 179 full_path = distutils.spawn.find_executable(tool_prefix + 'c++filt') | 219 full_path = distutils.spawn.find_executable(tool_prefix + 'c++filt') |
| 180 else: | 220 else: |
| 181 full_path = tool_prefix + 'c++filt' | 221 full_path = tool_prefix + 'c++filt' |
| 182 | 222 |
| 183 if not os.path.isfile(full_path): | 223 if not full_path or not os.path.isfile(full_path): |
| 184 raise Exception('Bad --tool-prefix. Path not found: %s' % full_path) | 224 raise Exception('Bad --tool-prefix. Path not found: %s' % full_path) |
| 225 if not output_directory or not os.path.isdir(output_directory): |
| 226 raise Exception('Bad --output-directory. Path not found: %s' % |
| 227 output_directory) |
| 228 logging.info('Using --output-directory=%s', output_directory) |
| 185 logging.info('Using --tool-prefix=%s', tool_prefix) | 229 logging.info('Using --tool-prefix=%s', tool_prefix) |
| 186 return tool_prefix | 230 return output_directory, tool_prefix |
| 187 | 231 |
| 188 | 232 |
| 189 def AnalyzeWithArgs(args, input_path): | 233 def AnalyzeWithArgs(args, input_path): |
| 190 return Analyze(input_path, args.output_directory, args.tool_prefix) | 234 return Analyze(input_path, args.output_directory, args.tool_prefix) |
| 191 | 235 |
| 192 | 236 |
| 193 def Analyze(path, output_directory=None, tool_prefix=''): | 237 def Analyze(path, output_directory=None, tool_prefix=''): |
| 194 if file_format.EndsWithMaybeGz(path, '.size'): | 238 if path.endswith('.size'): |
| 195 logging.debug('Loading results from: %s', path) | 239 logging.debug('Loading results from: %s', path) |
| 196 size_info = file_format.LoadSizeInfo(path) | 240 size_info = file_format.LoadSizeInfo(path) |
| 197 # Recompute derived values (padding and function names). | 241 # Recompute derived values (padding and function names). |
| 198 logging.info('Calculating padding') | 242 logging.info('Calculating padding') |
| 199 _RemoveDuplicatesAndCalculatePadding(size_info.symbols) | 243 _RemoveDuplicatesAndCalculatePadding(size_info.symbols) |
| 200 logging.info('Deriving signatures') | 244 logging.info('Deriving signatures') |
| 201 # Re-parse out function parameters. | 245 # Re-parse out function parameters. |
| 202 _NormalizeNames(size_info.symbols.WhereInSection('t')) | 246 _NormalizeNames(size_info.symbols) |
| 203 return size_info | 247 return size_info |
| 204 elif not file_format.EndsWithMaybeGz(path, '.map'): | 248 elif not path.endswith('.map') and not path.endswith('.map.gz'): |
| 205 raise Exception('Expected input to be a .map or a .size') | 249 raise Exception('Expected input to be a .map or a .size') |
| 206 else: | 250 else: |
| 207 # Verify tool_prefix early. | 251 # Verify tool_prefix early. |
| 208 tool_prefix = _DetectToolPrefix(tool_prefix, path, output_directory) | 252 output_directory, tool_prefix = ( |
| 253 _DetectToolPrefix(tool_prefix, path, output_directory)) |
| 209 | 254 |
| 210 with file_format.OpenMaybeGz(path) as map_file: | 255 with _OpenMaybeGz(path) as map_file: |
| 211 size_info = linker_map_parser.MapFileParser().Parse(map_file) | 256 size_info = linker_map_parser.MapFileParser().Parse(map_file) |
| 212 | 257 |
| 213 # Map file for some reason doesn't unmangle all names. | 258 # Map file for some reason doesn't unmangle all names. |
| 214 logging.info('Calculating padding') | 259 logging.info('Calculating padding') |
| 215 _RemoveDuplicatesAndCalculatePadding(size_info.symbols) | 260 _RemoveDuplicatesAndCalculatePadding(size_info.symbols) |
| 216 # Unmangle prints its own log statement. | 261 # Unmangle prints its own log statement. |
| 217 _UnmangleRemainingSymbols(size_info.symbols, tool_prefix) | 262 _UnmangleRemainingSymbols(size_info.symbols, tool_prefix) |
| 263 logging.info('Extracting source paths from .ninja files') |
| 264 _ExtractSourcePaths(size_info.symbols, output_directory) |
| 218 # Resolve paths prints its own log statement. | 265 # Resolve paths prints its own log statement. |
| 219 logging.info('Normalizing names') | 266 logging.info('Normalizing names') |
| 220 _NormalizeNames(size_info.symbols) | 267 _NormalizeNames(size_info.symbols) |
| 221 logging.info('Normalizing paths') | 268 logging.info('Normalizing paths') |
| 222 _NormalizeObjectPaths(size_info.symbols) | 269 _NormalizeObjectPaths(size_info.symbols) |
| 223 | 270 |
| 224 if logging.getLogger().isEnabledFor(logging.INFO): | 271 if logging.getLogger().isEnabledFor(logging.INFO): |
| 225 for line in describe.DescribeSizeInfoCoverage(size_info): | 272 for line in describe.DescribeSizeInfoCoverage(size_info): |
| 226 logging.info(line) | 273 logging.info(line) |
| 227 logging.info('Finished analyzing %d symbols', len(size_info.symbols)) | 274 logging.info('Finished analyzing %d symbols', len(size_info.symbols)) |
| 228 return size_info | 275 return size_info |
| 229 | 276 |
| 230 | 277 |
| 231 def main(argv): | 278 def main(argv): |
| 232 parser = argparse.ArgumentParser(argv) | 279 parser = argparse.ArgumentParser(argv) |
| 233 parser.add_argument('input_file', help='Path to input .map file.') | 280 parser.add_argument('input_file', help='Path to input .map file.') |
| 234 parser.add_argument('output_file', help='Path to output .size(.gz) file.') | 281 parser.add_argument('output_file', help='Path to output .size(.gz) file.') |
| 235 AddOptions(parser) | 282 AddOptions(parser) |
| 236 args = helpers.AddCommonOptionsAndParseArgs(parser, argv) | 283 args = helpers.AddCommonOptionsAndParseArgs(parser, argv) |
| 237 if not file_format.EndsWithMaybeGz(args.output_file, '.size'): | 284 if not args.output_file.endswith('.size'): |
| 238 parser.error('output_file must end with .size or .size.gz') | 285 parser.error('output_file must end with .size') |
| 239 | 286 |
| 240 size_info = AnalyzeWithArgs(args, args.input_file) | 287 size_info = AnalyzeWithArgs(args, args.input_file) |
| 241 logging.info('Saving result to %s', args.output_file) | 288 logging.info('Saving result to %s', args.output_file) |
| 242 file_format.SaveSizeInfo(size_info, args.output_file) | 289 file_format.SaveSizeInfo(size_info, args.output_file) |
| 243 | 290 |
| 244 logging.info('Done') | 291 logging.info('Done') |
| 245 | 292 |
| 246 | 293 |
| 247 if __name__ == '__main__': | 294 if __name__ == '__main__': |
| 248 sys.exit(main(sys.argv)) | 295 sys.exit(main(sys.argv)) |
| OLD | NEW |