Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(235)

Side by Side Diff: tools/binary_size/map2size.py

Issue 2791433004: //tools/binary_size: source_path information, change file format, fixes (Closed)
Patch Set: fix comment for _DetectToolPrefix Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « tools/binary_size/linker_map_parser.py ('k') | tools/binary_size/models.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # Copyright 2017 The Chromium Authors. All rights reserved. 2 # Copyright 2017 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be 3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file. 4 # found in the LICENSE file.
5 5
6 """Main Python API for analyzing binary size.""" 6 """Main Python API for analyzing binary size."""
7 7
8 import argparse 8 import argparse
9 import datetime
9 import distutils.spawn 10 import distutils.spawn
11 import gzip
10 import logging 12 import logging
11 import os 13 import os
14 import re
12 import subprocess 15 import subprocess
13 import sys 16 import sys
14 17
15 import describe 18 import describe
16 import file_format 19 import file_format
17 import function_signature 20 import function_signature
18 import helpers 21 import helpers
19 import linker_map_parser 22 import linker_map_parser
20 import models 23 import models
24 import ninja_parser
21 25
22 26
23 def _IterLines(s): 27 def _OpenMaybeGz(path, mode=None):
24 prev_idx = -1 28 """Calls `gzip.open()` if |path| ends in ".gz", otherwise calls `open()`."""
25 while True: 29 if path.endswith('.gz'):
26 idx = s.find('\n', prev_idx + 1) 30 if mode and 'w' in mode:
27 if idx == -1: 31 return gzip.GzipFile(path, mode, 1)
28 return 32 return gzip.open(path, mode)
29 yield s[prev_idx + 1:idx] 33 return open(path, mode or 'r')
30 prev_idx = idx
31 34
32 35
33 def _UnmangleRemainingSymbols(symbol_group, tool_prefix): 36 def _UnmangleRemainingSymbols(symbol_group, tool_prefix):
34 """Uses c++filt to unmangle any symbols that need it.""" 37 """Uses c++filt to unmangle any symbols that need it."""
35 to_process = [s for s in symbol_group if s.name.startswith('_Z')] 38 to_process = [s for s in symbol_group if s.name.startswith('_Z')]
36 if not to_process: 39 if not to_process:
37 return 40 return
38 41
39 logging.info('Unmangling %d names', len(to_process)) 42 logging.info('Unmangling %d names', len(to_process))
40 proc = subprocess.Popen([tool_prefix + 'c++filt'], stdin=subprocess.PIPE, 43 proc = subprocess.Popen([tool_prefix + 'c++filt'], stdin=subprocess.PIPE,
41 stdout=subprocess.PIPE) 44 stdout=subprocess.PIPE)
42 stdout = proc.communicate('\n'.join(s.name for s in to_process))[0] 45 stdout = proc.communicate('\n'.join(s.name for s in to_process))[0]
43 assert proc.returncode == 0 46 assert proc.returncode == 0
44 47
45 for i, line in enumerate(_IterLines(stdout)): 48 for i, line in enumerate(stdout.splitlines()):
46 to_process[i].name = line 49 to_process[i].name = line
47 50
48 51
49 def _NormalizeNames(symbol_group): 52 def _NormalizeNames(symbol_group):
50 """Ensures that all names are formatted in a useful way. 53 """Ensures that all names are formatted in a useful way.
51 54
52 This includes: 55 This includes:
53 - Assigning of |function_signature| (for functions). 56 - Assigning of |full_name|.
54 - Stripping of return types in |function_signature| and |name|. 57 - Stripping of return types in |full_name| and |name| (for functions).
55 - Stripping parameters from |name|. 58 - Stripping parameters from |name|.
56 - Moving "vtable for" and the like to be suffixes rather than prefixes. 59 - Moving "vtable for" and the like to be suffixes rather than prefixes.
57 """ 60 """
58 found_prefixes = set() 61 found_prefixes = set()
59 for symbol in symbol_group: 62 for symbol in symbol_group:
60 if symbol.name.startswith('*'): 63 if symbol.name.startswith('*'):
61 # See comment in _RemoveDuplicatesAndCalculatePadding() about when this 64 # See comment in _RemoveDuplicatesAndCalculatePadding() about when this
62 # can happen. 65 # can happen.
63 continue 66 continue
64 67
65 # E.g.: vtable for FOO 68 # E.g.: vtable for FOO
66 idx = symbol.name.find(' for ', 0, 30) 69 idx = symbol.name.find(' for ', 0, 30)
67 if idx != -1: 70 if idx != -1:
68 found_prefixes.add(symbol.name[:idx + 4]) 71 found_prefixes.add(symbol.name[:idx + 4])
69 symbol.name = symbol.name[idx + 5:] + ' [' + symbol.name[:idx] + ']' 72 symbol.name = symbol.name[idx + 5:] + ' [' + symbol.name[:idx] + ']'
70 73
71 # E.g.: virtual thunk to FOO 74 # E.g.: virtual thunk to FOO
72 idx = symbol.name.find(' to ', 0, 30) 75 idx = symbol.name.find(' to ', 0, 30)
73 if idx != -1: 76 if idx != -1:
74 found_prefixes.add(symbol.name[:idx + 3]) 77 found_prefixes.add(symbol.name[:idx + 3])
75 symbol.name = symbol.name[idx + 4:] + ' [' + symbol.name[:idx] + ']' 78 symbol.name = symbol.name[idx + 4:] + ' [' + symbol.name[:idx] + ']'
76 79
77 # Strip out return type, and identify where parameter list starts. 80 # Strip out return type, and identify where parameter list starts.
78 if symbol.section == 't': 81 if symbol.section == 't':
79 symbol.function_signature, symbol.name = ( 82 symbol.full_name, symbol.name = function_signature.Parse(symbol.name)
80 function_signature.Parse(symbol.name))
81 83
82 # Remove anonymous namespaces (they just harm clustering). 84 # Remove anonymous namespaces (they just harm clustering).
83 symbol.name = symbol.name.replace('(anonymous namespace)::', '') 85 non_anonymous = symbol.name.replace('(anonymous namespace)::', '')
86 if symbol.name != non_anonymous:
87 symbol.is_anonymous = True
88 symbol.name = non_anonymous
89 symbol.full_name = symbol.full_name.replace(
90 '(anonymous namespace)::', '')
91
92 if symbol.section != 't' and '(' in symbol.name:
93 # Pretty rare. Example:
94 # blink::CSSValueKeywordsHash::findValueImpl(char const*)::value_word_list
95 symbol.full_name = symbol.name
96 symbol.name = re.sub(r'\(.*\)', '', symbol.full_name)
84 97
85 logging.debug('Found name prefixes of: %r', found_prefixes) 98 logging.debug('Found name prefixes of: %r', found_prefixes)
86 99
87 100
88 def _NormalizeObjectPaths(symbol_group): 101 def _NormalizeObjectPaths(symbol_group):
89 """Ensures that all paths are formatted in a useful way.""" 102 """Ensures that all paths are formatted in a useful way."""
90 for symbol in symbol_group: 103 for symbol in symbol_group:
91 if symbol.path.startswith('obj/'): 104 path = symbol.object_path
105 if path.startswith('obj/'):
92 # Convert obj/third_party/... -> third_party/... 106 # Convert obj/third_party/... -> third_party/...
93 symbol.path = symbol.path[4:] 107 path = path[4:]
94 elif symbol.path.startswith('../../'): 108 elif path.startswith('../../'):
95 # Convert ../../third_party/... -> third_party/... 109 # Convert ../../third_party/... -> third_party/...
96 symbol.path = symbol.path[6:] 110 path = path[6:]
97 if symbol.path.endswith(')'): 111 if path.endswith(')'):
98 # Convert foo/bar.a(baz.o) -> foo/bar.a/baz.o 112 # Convert foo/bar.a(baz.o) -> foo/bar.a/(baz.o)
99 start_idx = symbol.path.index('(') 113 start_idx = path.index('(')
100 paren_path = symbol.path[start_idx + 1:-1] 114 path = os.path.join(path[:start_idx], path[start_idx:])
101 symbol.path = symbol.path[:start_idx] + os.path.sep + paren_path 115 symbol.object_path = path
116
117
118 def _NormalizeSourcePath(path):
119 if path.startswith('gen/'):
120 # Convert gen/third_party/... -> third_party/...
121 return path[4:]
122 if path.startswith('../../'):
123 # Convert ../../third_party/... -> third_party/...
124 return path[6:]
125 return path
126
127
128 def _ExtractSourcePaths(symbol_group, output_directory):
129 """Fills in the .source_path attribute of all symbols."""
130 mapper = ninja_parser.SourceFileMapper(output_directory)
131
132 for symbol in symbol_group:
133 object_path = symbol.object_path
134 if symbol.source_path or not object_path:
135 continue
136 # We don't have source info for prebuilt .a files.
137 if not object_path.startswith('..'):
138 source_path = mapper.FindSourceForPath(object_path)
139 if source_path:
140 symbol.source_path = _NormalizeSourcePath(source_path)
141 else:
142 logging.warning('Could not find source path for %s', object_path)
143 logging.debug('Parsed %d .ninja files.', mapper.GetParsedFileCount())
102 144
103 145
104 def _RemoveDuplicatesAndCalculatePadding(symbol_group): 146 def _RemoveDuplicatesAndCalculatePadding(symbol_group):
105 """Removes symbols at the same address and calculates the |padding| field. 147 """Removes symbols at the same address and calculates the |padding| field.
106 148
107 Symbols must already be sorted by |address|. 149 Symbols must already be sorted by |address|.
108 """ 150 """
109 i = 0
110 to_remove = set() 151 to_remove = set()
111 all_symbols = symbol_group.symbols 152 all_symbols = symbol_group.symbols
112 for i in xrange(len(all_symbols)): 153 for i, symbol in enumerate(all_symbols[1:]):
113 prev_symbol = all_symbols[i - 1] 154 prev_symbol = all_symbols[i]
114 symbol = all_symbols[i]
115 if prev_symbol.section_name != symbol.section_name: 155 if prev_symbol.section_name != symbol.section_name:
116 continue 156 continue
117 if symbol.address > 0 and prev_symbol.address > 0: 157 if symbol.address > 0 and prev_symbol.address > 0:
118 # Fold symbols that are at the same address (happens in nm output). 158 # Fold symbols that are at the same address (happens in nm output).
119 if symbol.address == prev_symbol.address: 159 if symbol.address == prev_symbol.address:
120 symbol.size = max(prev_symbol.size, symbol.size) 160 symbol.size = max(prev_symbol.size, symbol.size)
121 to_remove.add(i) 161 to_remove.add(i + 1)
122 continue 162 continue
123 # Even with symbols at the same address removed, overlaps can still 163 # Even with symbols at the same address removed, overlaps can still
124 # happen. In this case, padding will be negative (and this is fine). 164 # happen. In this case, padding will be negative (and this is fine).
125 padding = symbol.address - prev_symbol.end_address 165 padding = symbol.address - prev_symbol.end_address
126 # These thresholds were found by manually auditing arm32 Chrome. 166 # These thresholds were found by manually auditing arm32 Chrome.
127 # E.g.: Set them to 0 and see what warnings get logged. 167 # E.g.: Set them to 0 and see what warnings get logged.
128 # TODO(agrieve): See if these thresholds make sense for architectures 168 # TODO(agrieve): See if these thresholds make sense for architectures
129 # other than arm32. 169 # other than arm32.
130 if (symbol.section in 'rd' and padding >= 256 or 170 if (symbol.section in 'rd' and padding >= 256 or
131 symbol.section in 't' and padding >= 64): 171 symbol.section in 't' and padding >= 64):
132 # For nm data, this is caused by data that has no associated symbol. 172 # For nm data, this is caused by data that has no associated symbol.
133 # The linker map file lists them with no name, but with a file. 173 # The linker map file lists them with no name, but with a file.
134 # Example: 174 # Example:
135 # .data 0x02d42764 0x120 .../V8SharedWorkerGlobalScope.o 175 # .data 0x02d42764 0x120 .../V8SharedWorkerGlobalScope.o
136 # Where as most look like: 176 # Where as most look like:
137 # .data.MANGLED_NAME... 177 # .data.MANGLED_NAME...
138 logging.debug('Large padding of %d between:\n A) %r\n B) %r' % ( 178 logging.debug('Large padding of %d between:\n A) %r\n B) %r' % (
139 padding, prev_symbol, symbol)) 179 padding, prev_symbol, symbol))
140 continue 180 continue
141 symbol.padding = padding 181 symbol.padding = padding
142 symbol.size += padding 182 symbol.size += padding
143 assert symbol.size >= 0, 'Symbol has negative size: %r' % symbol 183 assert symbol.size >= 0, 'Symbol has negative size: ' + (
184 '%r\nprev symbol: %r' % (symbol, prev_symbol))
144 # Map files have no overlaps, so worth special-casing the no-op case. 185 # Map files have no overlaps, so worth special-casing the no-op case.
145 if to_remove: 186 if to_remove:
146 logging.info('Removing %d overlapping symbols', len(to_remove)) 187 logging.info('Removing %d overlapping symbols', len(to_remove))
147 symbol_group.symbols = ( 188 symbol_group.symbols = (
148 [s for i, s in enumerate(all_symbols) if i not in to_remove]) 189 [s for i, s in enumerate(all_symbols) if i not in to_remove])
149 190
150 191
151 def AddOptions(parser): 192 def AddOptions(parser):
152 parser.add_argument('--tool-prefix', default='', 193 parser.add_argument('--tool-prefix', default='',
153 help='Path prefix for c++filt.') 194 help='Path prefix for c++filt.')
154 parser.add_argument('--output-directory', 195 parser.add_argument('--output-directory',
155 help='Path to the root build directory.') 196 help='Path to the root build directory.')
156 197
157 198
158 def _DetectToolPrefix(tool_prefix, input_file, output_directory=None): 199 def _DetectToolPrefix(tool_prefix, input_file, output_directory=None):
159 """Calls Analyze with values from args.""" 200 """Detects values for --tool-prefix and --output-directory."""
160 if not output_directory: 201 if not output_directory:
161 abs_path = os.path.abspath(input_file) 202 abs_path = os.path.abspath(input_file)
162 release_idx = abs_path.find('Release') 203 release_idx = abs_path.find('Release')
163 if release_idx != -1: 204 if release_idx != -1:
164 output_directory = abs_path[:release_idx] + 'Release' 205 output_directory = abs_path[:release_idx] + 'Release'
165 output_directory = os.path.relpath(abs_path[:release_idx] + '/Release') 206 output_directory = os.path.relpath(abs_path[:release_idx] + '/Release')
166 logging.debug('Detected --output-directory=%s', output_directory) 207 logging.debug('Detected --output-directory=%s', output_directory)
167 208
168 if not tool_prefix and output_directory: 209 if not tool_prefix and output_directory:
169 # Auto-detect from build_vars.txt 210 # Auto-detect from build_vars.txt
170 build_vars_path = os.path.join(output_directory, 'build_vars.txt') 211 build_vars_path = os.path.join(output_directory, 'build_vars.txt')
171 if os.path.exists(build_vars_path): 212 if os.path.exists(build_vars_path):
172 with open(build_vars_path) as f: 213 with open(build_vars_path) as f:
173 build_vars = dict(l.rstrip().split('=', 1) for l in f if '=' in l) 214 build_vars = dict(l.rstrip().split('=', 1) for l in f if '=' in l)
174 logging.debug('Found --tool-prefix from build_vars.txt') 215 logging.debug('Found --tool-prefix from build_vars.txt')
175 tool_prefix = os.path.join(output_directory, 216 tool_prefix = os.path.join(output_directory,
176 build_vars['android_tool_prefix']) 217 build_vars['android_tool_prefix'])
177 218
178 if os.path.sep not in tool_prefix: 219 if os.path.sep not in tool_prefix:
179 full_path = distutils.spawn.find_executable(tool_prefix + 'c++filt') 220 full_path = distutils.spawn.find_executable(tool_prefix + 'c++filt')
180 else: 221 else:
181 full_path = tool_prefix + 'c++filt' 222 full_path = tool_prefix + 'c++filt'
182 223
183 if not os.path.isfile(full_path): 224 if not full_path or not os.path.isfile(full_path):
184 raise Exception('Bad --tool-prefix. Path not found: %s' % full_path) 225 raise Exception('Bad --tool-prefix. Path not found: %s' % full_path)
226 if not output_directory or not os.path.isdir(output_directory):
227 raise Exception('Bad --output-directory. Path not found: %s' %
228 output_directory)
229 logging.info('Using --output-directory=%s', output_directory)
185 logging.info('Using --tool-prefix=%s', tool_prefix) 230 logging.info('Using --tool-prefix=%s', tool_prefix)
186 return tool_prefix 231 return output_directory, tool_prefix
187 232
188 233
189 def AnalyzeWithArgs(args, input_path): 234 def AnalyzeWithArgs(args, input_path):
190 return Analyze(input_path, args.output_directory, args.tool_prefix) 235 return Analyze(input_path, args.output_directory, args.tool_prefix)
191 236
192 237
193 def Analyze(path, output_directory=None, tool_prefix=''): 238 def Analyze(path, output_directory=None, tool_prefix=''):
194 if file_format.EndsWithMaybeGz(path, '.size'): 239 if path.endswith('.size'):
195 logging.debug('Loading results from: %s', path) 240 logging.debug('Loading results from: %s', path)
196 size_info = file_format.LoadSizeInfo(path) 241 size_info = file_format.LoadSizeInfo(path)
197 # Recompute derived values (padding and function names). 242 # Recompute derived values (padding and function names).
198 logging.info('Calculating padding') 243 logging.info('Calculating padding')
199 _RemoveDuplicatesAndCalculatePadding(size_info.symbols) 244 _RemoveDuplicatesAndCalculatePadding(size_info.symbols)
200 logging.info('Deriving signatures') 245 logging.info('Deriving signatures')
201 # Re-parse out function parameters. 246 # Re-parse out function parameters.
202 _NormalizeNames(size_info.symbols.WhereInSection('t')) 247 _NormalizeNames(size_info.symbols)
203 return size_info 248 return size_info
204 elif not file_format.EndsWithMaybeGz(path, '.map'): 249 elif not path.endswith('.map') and not path.endswith('.map.gz'):
205 raise Exception('Expected input to be a .map or a .size') 250 raise Exception('Expected input to be a .map or a .size')
206 else: 251 else:
207 # Verify tool_prefix early. 252 # Verify tool_prefix early.
208 tool_prefix = _DetectToolPrefix(tool_prefix, path, output_directory) 253 output_directory, tool_prefix = (
254 _DetectToolPrefix(tool_prefix, path, output_directory))
209 255
210 with file_format.OpenMaybeGz(path) as map_file: 256 with _OpenMaybeGz(path) as map_file:
211 size_info = linker_map_parser.MapFileParser().Parse(map_file) 257 section_sizes, symbols = linker_map_parser.MapFileParser().Parse(map_file)
258 timestamp = datetime.datetime.utcfromtimestamp(os.path.getmtime(path))
259 size_info = models.SizeInfo(section_sizes, models.SymbolGroup(symbols),
260 timestamp=timestamp)
212 261
213 # Map file for some reason doesn't unmangle all names. 262 # Map file for some reason doesn't unmangle all names.
214 logging.info('Calculating padding') 263 logging.info('Calculating padding')
215 _RemoveDuplicatesAndCalculatePadding(size_info.symbols) 264 _RemoveDuplicatesAndCalculatePadding(size_info.symbols)
216 # Unmangle prints its own log statement. 265 # Unmangle prints its own log statement.
217 _UnmangleRemainingSymbols(size_info.symbols, tool_prefix) 266 _UnmangleRemainingSymbols(size_info.symbols, tool_prefix)
267 logging.info('Extracting source paths from .ninja files')
268 _ExtractSourcePaths(size_info.symbols, output_directory)
218 # Resolve paths prints its own log statement. 269 # Resolve paths prints its own log statement.
219 logging.info('Normalizing names') 270 logging.info('Normalizing names')
220 _NormalizeNames(size_info.symbols) 271 _NormalizeNames(size_info.symbols)
221 logging.info('Normalizing paths') 272 logging.info('Normalizing paths')
222 _NormalizeObjectPaths(size_info.symbols) 273 _NormalizeObjectPaths(size_info.symbols)
223 274
224 if logging.getLogger().isEnabledFor(logging.INFO): 275 if logging.getLogger().isEnabledFor(logging.INFO):
225 for line in describe.DescribeSizeInfoCoverage(size_info): 276 for line in describe.DescribeSizeInfoCoverage(size_info):
226 logging.info(line) 277 logging.info(line)
227 logging.info('Finished analyzing %d symbols', len(size_info.symbols)) 278 logging.info('Finished analyzing %d symbols', len(size_info.symbols))
228 return size_info 279 return size_info
229 280
230 281
282 def _DetectGitRevision(path):
283 try:
284 git_rev = subprocess.check_output(
285 ['git', '-C', os.path.dirname(path), 'rev-parse', 'HEAD'])
286 return git_rev.rstrip()
287 except Exception:
288 logging.warning('Failed to detect git revision for file metadata.')
289 return None
290
291
231 def main(argv): 292 def main(argv):
232 parser = argparse.ArgumentParser(argv) 293 parser = argparse.ArgumentParser(argv)
233 parser.add_argument('input_file', help='Path to input .map file.') 294 parser.add_argument('input_file', help='Path to input .map file.')
234 parser.add_argument('output_file', help='Path to output .size(.gz) file.') 295 parser.add_argument('output_file', help='Path to output .size(.gz) file.')
235 AddOptions(parser) 296 AddOptions(parser)
236 args = helpers.AddCommonOptionsAndParseArgs(parser, argv) 297 args = helpers.AddCommonOptionsAndParseArgs(parser, argv)
237 if not file_format.EndsWithMaybeGz(args.output_file, '.size'): 298 if not args.output_file.endswith('.size'):
238 parser.error('output_file must end with .size or .size.gz') 299 parser.error('output_file must end with .size')
239 300
240 size_info = AnalyzeWithArgs(args, args.input_file) 301 size_info = AnalyzeWithArgs(args, args.input_file)
302 if not args.input_file.endswith('.size'):
303 git_rev = _DetectGitRevision(args.input_file)
304 size_info.tag = 'Filename=%s git_rev=%s' % (
305 os.path.basename(args.input_file), git_rev)
306 logging.info('Recording metadata: %s',
307 describe.DescribeSizeInfoMetadata(size_info))
241 logging.info('Saving result to %s', args.output_file) 308 logging.info('Saving result to %s', args.output_file)
242 file_format.SaveSizeInfo(size_info, args.output_file) 309 file_format.SaveSizeInfo(size_info, args.output_file)
243 310
244 logging.info('Done') 311 logging.info('Done')
245 312
246 313
247 if __name__ == '__main__': 314 if __name__ == '__main__':
248 sys.exit(main(sys.argv)) 315 sys.exit(main(sys.argv))
OLDNEW
« no previous file with comments | « tools/binary_size/linker_map_parser.py ('k') | tools/binary_size/models.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698