Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(410)

Side by Side Diff: tools/binary_size/map2size.py

Issue 2775173005: FREEZE.unindexed (Closed)
Patch Set: ps2 Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « tools/binary_size/linker_map_parser.py ('k') | tools/binary_size/models.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # Copyright 2017 The Chromium Authors. All rights reserved. 2 # Copyright 2017 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be 3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file. 4 # found in the LICENSE file.
5 5
6 """Main Python API for analyzing binary size.""" 6 """Main Python API for analyzing binary size."""
7 7
8 import argparse 8 import argparse
9 import distutils.spawn 9 import distutils.spawn
10 import gzip
10 import logging 11 import logging
11 import os 12 import os
13 import re
12 import subprocess 14 import subprocess
13 import sys 15 import sys
14 16
15 import describe 17 import describe
16 import file_format 18 import file_format
17 import function_signature 19 import function_signature
18 import helpers 20 import helpers
19 import linker_map_parser 21 import linker_map_parser
20 import models 22 import models
23 import ninja_parser
21 24
22 25
23 def _IterLines(s): 26 def _OpenMaybeGz(path, mode=None):
24 prev_idx = -1 27 """Calls `gzip.open()` if |path| ends in ".gz", otherwise calls `open()`."""
25 while True: 28 if path.endswith('.gz'):
26 idx = s.find('\n', prev_idx + 1) 29 if mode and 'w' in mode:
27 if idx == -1: 30 return gzip.GzipFile(path, mode, 1)
28 return 31 return gzip.open(path, mode)
29 yield s[prev_idx + 1:idx] 32 return open(path, mode or 'r')
30 prev_idx = idx
31 33
32 34
33 def _UnmangleRemainingSymbols(symbol_group, tool_prefix): 35 def _UnmangleRemainingSymbols(symbol_group, tool_prefix):
34 """Uses c++filt to unmangle any symbols that need it.""" 36 """Uses c++filt to unmangle any symbols that need it."""
35 to_process = [s for s in symbol_group if s.name.startswith('_Z')] 37 to_process = [s for s in symbol_group if s.name.startswith('_Z')]
36 if not to_process: 38 if not to_process:
37 return 39 return
38 40
39 logging.info('Unmangling %d names', len(to_process)) 41 logging.info('Unmangling %d names', len(to_process))
40 proc = subprocess.Popen([tool_prefix + 'c++filt'], stdin=subprocess.PIPE, 42 proc = subprocess.Popen([tool_prefix + 'c++filt'], stdin=subprocess.PIPE,
41 stdout=subprocess.PIPE) 43 stdout=subprocess.PIPE)
42 stdout = proc.communicate('\n'.join(s.name for s in to_process))[0] 44 stdout = proc.communicate('\n'.join(s.name for s in to_process))[0]
43 assert proc.returncode == 0 45 assert proc.returncode == 0
44 46
45 for i, line in enumerate(_IterLines(stdout)): 47 for i, line in enumerate(stdout.splitlines()):
46 to_process[i].name = line 48 to_process[i].name = line
47 49
48 50
49 def _NormalizeNames(symbol_group): 51 def _NormalizeNames(symbol_group):
50 """Ensures that all names are formatted in a useful way. 52 """Ensures that all names are formatted in a useful way.
51 53
52 This includes: 54 This includes:
53 - Assigning of |function_signature| (for functions). 55 - Assigning of |full_name|.
54 - Stripping of return types in |function_signature| and |name|. 56 - Stripping of return types in |full_name| and |name| (for functions).
55 - Stripping parameters from |name|. 57 - Stripping parameters from |name|.
56 - Moving "vtable for" and the like to be suffixes rather than prefixes. 58 - Moving "vtable for" and the like to be suffixes rather than prefixes.
57 """ 59 """
58 found_prefixes = set() 60 found_prefixes = set()
59 for symbol in symbol_group: 61 for symbol in symbol_group:
60 if symbol.name.startswith('*'): 62 if symbol.name.startswith('*'):
61 # See comment in _RemoveDuplicatesAndCalculatePadding() about when this 63 # See comment in _RemoveDuplicatesAndCalculatePadding() about when this
62 # can happen. 64 # can happen.
63 continue 65 continue
64 66
65 # E.g.: vtable for FOO 67 # E.g.: vtable for FOO
66 idx = symbol.name.find(' for ', 0, 30) 68 idx = symbol.name.find(' for ', 0, 30)
67 if idx != -1: 69 if idx != -1:
68 found_prefixes.add(symbol.name[:idx + 4]) 70 found_prefixes.add(symbol.name[:idx + 4])
69 symbol.name = symbol.name[idx + 5:] + ' [' + symbol.name[:idx] + ']' 71 symbol.name = symbol.name[idx + 5:] + ' [' + symbol.name[:idx] + ']'
70 72
71 # E.g.: virtual thunk to FOO 73 # E.g.: virtual thunk to FOO
72 idx = symbol.name.find(' to ', 0, 30) 74 idx = symbol.name.find(' to ', 0, 30)
73 if idx != -1: 75 if idx != -1:
74 found_prefixes.add(symbol.name[:idx + 3]) 76 found_prefixes.add(symbol.name[:idx + 3])
75 symbol.name = symbol.name[idx + 4:] + ' [' + symbol.name[:idx] + ']' 77 symbol.name = symbol.name[idx + 4:] + ' [' + symbol.name[:idx] + ']'
76 78
77 # Strip out return type, and identify where parameter list starts. 79 # Strip out return type, and identify where parameter list starts.
78 if symbol.section == 't': 80 if symbol.section == 't':
79 symbol.function_signature, symbol.name = ( 81 symbol.full_name, symbol.name = function_signature.Parse(symbol.name)
80 function_signature.Parse(symbol.name))
81 82
82 # Remove anonymous namespaces (they just harm clustering). 83 # Remove anonymous namespaces (they just harm clustering).
83 symbol.name = symbol.name.replace('(anonymous namespace)::', '') 84 non_anonymous = symbol.name.replace('(anonymous namespace)::', '')
85 if symbol.name != non_anonymous:
86 symbol.is_anonymous = True
87 symbol.name = non_anonymous
88 symbol.full_name = symbol.full_name.replace(
89 '(anonymous namespace)::', '')
90
91 if symbol.section != 't' and '(' in symbol.name:
92 # Pretty rare. Example:
93 # blink::CSSValueKeywordsHash::findValueImpl(char const*)::value_word_list
94 symbol.full_name = symbol.name
95 symbol.name = re.sub(r'\(.*\)', '', symbol.full_name)
84 96
85 logging.debug('Found name prefixes of: %r', found_prefixes) 97 logging.debug('Found name prefixes of: %r', found_prefixes)
86 98
87 99
88 def _NormalizeObjectPaths(symbol_group): 100 def _NormalizeObjectPaths(symbol_group):
89 """Ensures that all paths are formatted in a useful way.""" 101 """Ensures that all paths are formatted in a useful way."""
90 for symbol in symbol_group: 102 for symbol in symbol_group:
91 if symbol.path.startswith('obj/'): 103 path = symbol.object_path
104 if path.startswith('obj/'):
92 # Convert obj/third_party/... -> third_party/... 105 # Convert obj/third_party/... -> third_party/...
93 symbol.path = symbol.path[4:] 106 path = path[4:]
94 elif symbol.path.startswith('../../'): 107 elif path.startswith('../../'):
95 # Convert ../../third_party/... -> third_party/... 108 # Convert ../../third_party/... -> third_party/...
96 symbol.path = symbol.path[6:] 109 path = path[6:]
97 if symbol.path.endswith(')'): 110 if path.endswith(')'):
98 # Convert foo/bar.a(baz.o) -> foo/bar.a/baz.o 111 # Convert foo/bar.a(baz.o) -> foo/bar.a/(baz.o)
99 start_idx = symbol.path.index('(') 112 start_idx = path.index('(')
100 paren_path = symbol.path[start_idx + 1:-1] 113 path = os.path.join(path[:start_idx], path[start_idx:])
101 symbol.path = symbol.path[:start_idx] + os.path.sep + paren_path 114 symbol.object_path = path
115
116
117 def _NormalizeSourcePath(path):
118 if path.startswith('gen/'):
119 # Convert gen/third_party/... -> third_party/...
120 return path[4:]
121 if path.startswith('../../'):
122 # Convert ../../third_party/... -> third_party/...
123 return path[6:]
124 return path
125
126
127 def _ExtractSourcePaths(symbol_group, output_directory):
128 """Fills in the .source_path attribute of all symbols."""
129 mapper = ninja_parser.SourceFileMapper(output_directory)
130
131 for symbol in symbol_group:
132 object_path = symbol.object_path
133 if symbol.source_path or not object_path:
134 continue
135 # We don't have source info for prebuilt .a files.
136 if not object_path.startswith('..'):
137 source_path = mapper.FindSourceForPath(object_path)
138 if source_path:
139 symbol.source_path = _NormalizeSourcePath(source_path)
140 else:
141 logging.warning('Could not find source path for %s', object_path)
142 logging.debug('Parsed %d .ninja files.', mapper.GetParsedFileCount())
102 143
103 144
104 def _RemoveDuplicatesAndCalculatePadding(symbol_group): 145 def _RemoveDuplicatesAndCalculatePadding(symbol_group):
105 """Removes symbols at the same address and calculates the |padding| field. 146 """Removes symbols at the same address and calculates the |padding| field.
106 147
107 Symbols must already be sorted by |address|. 148 Symbols must already be sorted by |address|.
108 """ 149 """
109 i = 0
110 to_remove = set() 150 to_remove = set()
111 all_symbols = symbol_group.symbols 151 all_symbols = symbol_group.symbols
112 for i in xrange(len(all_symbols)): 152 for i, symbol in enumerate(all_symbols[1:]):
113 prev_symbol = all_symbols[i - 1] 153 prev_symbol = all_symbols[i]
114 symbol = all_symbols[i]
115 if prev_symbol.section_name != symbol.section_name: 154 if prev_symbol.section_name != symbol.section_name:
116 continue 155 continue
117 if symbol.address > 0 and prev_symbol.address > 0: 156 if symbol.address > 0 and prev_symbol.address > 0:
118 # Fold symbols that are at the same address (happens in nm output). 157 # Fold symbols that are at the same address (happens in nm output).
119 if symbol.address == prev_symbol.address: 158 if symbol.address == prev_symbol.address:
120 symbol.size = max(prev_symbol.size, symbol.size) 159 symbol.size = max(prev_symbol.size, symbol.size)
121 to_remove.add(i) 160 to_remove.add(i + 1)
122 continue 161 continue
123 # Even with symbols at the same address removed, overlaps can still 162 # Even with symbols at the same address removed, overlaps can still
124 # happen. In this case, padding will be negative (and this is fine). 163 # happen. In this case, padding will be negative (and this is fine).
125 padding = symbol.address - prev_symbol.end_address 164 padding = symbol.address - prev_symbol.end_address
126 # These thresholds were found by manually auditing arm32 Chrome. 165 # These thresholds were found by manually auditing arm32 Chrome.
127 # E.g.: Set them to 0 and see what warnings get logged. 166 # E.g.: Set them to 0 and see what warnings get logged.
128 # TODO(agrieve): See if these thresholds make sense for architectures 167 # TODO(agrieve): See if these thresholds make sense for architectures
129 # other than arm32. 168 # other than arm32.
130 if (symbol.section in 'rd' and padding >= 256 or 169 if (symbol.section in 'rd' and padding >= 256 or
131 symbol.section in 't' and padding >= 64): 170 symbol.section in 't' and padding >= 64):
132 # For nm data, this is caused by data that has no associated symbol. 171 # For nm data, this is caused by data that has no associated symbol.
133 # The linker map file lists them with no name, but with a file. 172 # The linker map file lists them with no name, but with a file.
134 # Example: 173 # Example:
135 # .data 0x02d42764 0x120 .../V8SharedWorkerGlobalScope.o 174 # .data 0x02d42764 0x120 .../V8SharedWorkerGlobalScope.o
136 # Where as most look like: 175 # Where as most look like:
137 # .data.MANGLED_NAME... 176 # .data.MANGLED_NAME...
138 logging.debug('Large padding of %d between:\n A) %r\n B) %r' % ( 177 logging.debug('Large padding of %d between:\n A) %r\n B) %r' % (
139 padding, prev_symbol, symbol)) 178 padding, prev_symbol, symbol))
140 continue 179 continue
141 symbol.padding = padding 180 symbol.padding = padding
142 symbol.size += padding 181 symbol.size += padding
143 assert symbol.size >= 0, 'Symbol has negative size: %r' % symbol 182 assert symbol.size >= 0, 'Symbol has negative size: ' + (
183 '%r\nprev symbol: %r' % (symbol, prev_symbol))
144 # Map files have no overlaps, so worth special-casing the no-op case. 184 # Map files have no overlaps, so worth special-casing the no-op case.
145 if to_remove: 185 if to_remove:
146 logging.info('Removing %d overlapping symbols', len(to_remove)) 186 logging.info('Removing %d overlapping symbols', len(to_remove))
147 symbol_group.symbols = ( 187 symbol_group.symbols = (
148 [s for i, s in enumerate(all_symbols) if i not in to_remove]) 188 [s for i, s in enumerate(all_symbols) if i not in to_remove])
149 189
150 190
151 def AddOptions(parser): 191 def AddOptions(parser):
152 parser.add_argument('--tool-prefix', default='', 192 parser.add_argument('--tool-prefix', default='',
153 help='Path prefix for c++filt.') 193 help='Path prefix for c++filt.')
(...skipping 19 matching lines...) Expand all
173 build_vars = dict(l.rstrip().split('=', 1) for l in f if '=' in l) 213 build_vars = dict(l.rstrip().split('=', 1) for l in f if '=' in l)
174 logging.debug('Found --tool-prefix from build_vars.txt') 214 logging.debug('Found --tool-prefix from build_vars.txt')
175 tool_prefix = os.path.join(output_directory, 215 tool_prefix = os.path.join(output_directory,
176 build_vars['android_tool_prefix']) 216 build_vars['android_tool_prefix'])
177 217
178 if os.path.sep not in tool_prefix: 218 if os.path.sep not in tool_prefix:
179 full_path = distutils.spawn.find_executable(tool_prefix + 'c++filt') 219 full_path = distutils.spawn.find_executable(tool_prefix + 'c++filt')
180 else: 220 else:
181 full_path = tool_prefix + 'c++filt' 221 full_path = tool_prefix + 'c++filt'
182 222
183 if not os.path.isfile(full_path): 223 if not full_path or not os.path.isfile(full_path):
184 raise Exception('Bad --tool-prefix. Path not found: %s' % full_path) 224 raise Exception('Bad --tool-prefix. Path not found: %s' % full_path)
225 if not output_directory or not os.path.isdir(output_directory):
226 raise Exception('Bad --output-directory. Path not found: %s' %
227 output_directory)
228 logging.info('Using --output-directory=%s', output_directory)
185 logging.info('Using --tool-prefix=%s', tool_prefix) 229 logging.info('Using --tool-prefix=%s', tool_prefix)
186 return tool_prefix 230 return output_directory, tool_prefix
187 231
188 232
189 def AnalyzeWithArgs(args, input_path): 233 def AnalyzeWithArgs(args, input_path):
190 return Analyze(input_path, args.output_directory, args.tool_prefix) 234 return Analyze(input_path, args.output_directory, args.tool_prefix)
191 235
192 236
193 def Analyze(path, output_directory=None, tool_prefix=''): 237 def Analyze(path, output_directory=None, tool_prefix=''):
194 if file_format.EndsWithMaybeGz(path, '.size'): 238 if path.endswith('.size'):
195 logging.debug('Loading results from: %s', path) 239 logging.debug('Loading results from: %s', path)
196 size_info = file_format.LoadSizeInfo(path) 240 size_info = file_format.LoadSizeInfo(path)
197 # Recompute derived values (padding and function names). 241 # Recompute derived values (padding and function names).
198 logging.info('Calculating padding') 242 logging.info('Calculating padding')
199 _RemoveDuplicatesAndCalculatePadding(size_info.symbols) 243 _RemoveDuplicatesAndCalculatePadding(size_info.symbols)
200 logging.info('Deriving signatures') 244 logging.info('Deriving signatures')
201 # Re-parse out function parameters. 245 # Re-parse out function parameters.
202 _NormalizeNames(size_info.symbols.WhereInSection('t')) 246 _NormalizeNames(size_info.symbols)
203 return size_info 247 return size_info
204 elif not file_format.EndsWithMaybeGz(path, '.map'): 248 elif not path.endswith('.map') and not path.endswith('.map.gz'):
205 raise Exception('Expected input to be a .map or a .size') 249 raise Exception('Expected input to be a .map or a .size')
206 else: 250 else:
207 # Verify tool_prefix early. 251 # Verify tool_prefix early.
208 tool_prefix = _DetectToolPrefix(tool_prefix, path, output_directory) 252 output_directory, tool_prefix = (
253 _DetectToolPrefix(tool_prefix, path, output_directory))
209 254
210 with file_format.OpenMaybeGz(path) as map_file: 255 with _OpenMaybeGz(path) as map_file:
211 size_info = linker_map_parser.MapFileParser().Parse(map_file) 256 size_info = linker_map_parser.MapFileParser().Parse(map_file)
212 257
213 # Map file for some reason doesn't unmangle all names. 258 # Map file for some reason doesn't unmangle all names.
214 logging.info('Calculating padding') 259 logging.info('Calculating padding')
215 _RemoveDuplicatesAndCalculatePadding(size_info.symbols) 260 _RemoveDuplicatesAndCalculatePadding(size_info.symbols)
216 # Unmangle prints its own log statement. 261 # Unmangle prints its own log statement.
217 _UnmangleRemainingSymbols(size_info.symbols, tool_prefix) 262 _UnmangleRemainingSymbols(size_info.symbols, tool_prefix)
263 logging.info('Extracting source paths from .ninja files')
264 _ExtractSourcePaths(size_info.symbols, output_directory)
218 # Resolve paths prints its own log statement. 265 # Resolve paths prints its own log statement.
219 logging.info('Normalizing names') 266 logging.info('Normalizing names')
220 _NormalizeNames(size_info.symbols) 267 _NormalizeNames(size_info.symbols)
221 logging.info('Normalizing paths') 268 logging.info('Normalizing paths')
222 _NormalizeObjectPaths(size_info.symbols) 269 _NormalizeObjectPaths(size_info.symbols)
223 270
224 if logging.getLogger().isEnabledFor(logging.INFO): 271 if logging.getLogger().isEnabledFor(logging.INFO):
225 for line in describe.DescribeSizeInfoCoverage(size_info): 272 for line in describe.DescribeSizeInfoCoverage(size_info):
226 logging.info(line) 273 logging.info(line)
227 logging.info('Finished analyzing %d symbols', len(size_info.symbols)) 274 logging.info('Finished analyzing %d symbols', len(size_info.symbols))
228 return size_info 275 return size_info
229 276
230 277
231 def main(argv): 278 def main(argv):
232 parser = argparse.ArgumentParser(argv) 279 parser = argparse.ArgumentParser(argv)
233 parser.add_argument('input_file', help='Path to input .map file.') 280 parser.add_argument('input_file', help='Path to input .map file.')
234 parser.add_argument('output_file', help='Path to output .size(.gz) file.') 281 parser.add_argument('output_file', help='Path to output .size(.gz) file.')
235 AddOptions(parser) 282 AddOptions(parser)
236 args = helpers.AddCommonOptionsAndParseArgs(parser, argv) 283 args = helpers.AddCommonOptionsAndParseArgs(parser, argv)
237 if not file_format.EndsWithMaybeGz(args.output_file, '.size'): 284 if not args.output_file.endswith('.size'):
238 parser.error('output_file must end with .size or .size.gz') 285 parser.error('output_file must end with .size')
239 286
240 size_info = AnalyzeWithArgs(args, args.input_file) 287 size_info = AnalyzeWithArgs(args, args.input_file)
241 logging.info('Saving result to %s', args.output_file) 288 logging.info('Saving result to %s', args.output_file)
242 file_format.SaveSizeInfo(size_info, args.output_file) 289 file_format.SaveSizeInfo(size_info, args.output_file)
243 290
244 logging.info('Done') 291 logging.info('Done')
245 292
246 293
247 if __name__ == '__main__': 294 if __name__ == '__main__':
248 sys.exit(main(sys.argv)) 295 sys.exit(main(sys.argv))
OLDNEW
« no previous file with comments | « tools/binary_size/linker_map_parser.py ('k') | tools/binary_size/models.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698