Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(148)

Side by Side Diff: tools/binary_size/run_binary_size_analysis.py

Issue 258633003: Graphical version of the run_binary_size_analysis tool. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Made the code fully pylint clean. Created 6 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « tools/binary_size/java/src/org/chromium/tools/binary_size/Record.java ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/python 1 #!/usr/bin/env python
2 # Copyright 2014 The Chromium Authors. All rights reserved. 2 # Copyright 2014 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be 3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file. 4 # found in the LICENSE file.
5 5
6 """Generate a spatial analysis against an arbitrary library. 6 """Generate a spatial analysis against an arbitrary library.
7 7
8 To use, build the 'binary_size_tool' target. Then run this tool, passing 8 To use, build the 'binary_size_tool' target. Then run this tool, passing
9 in the location of the library to be analyzed along with any other options 9 in the location of the library to be analyzed along with any other options
10 you desire. 10 you desire.
11 """ 11 """
12 12
13 import collections 13 import collections
14 import fileinput
15 import json 14 import json
15 import logging
16 import multiprocessing
16 import optparse 17 import optparse
17 import os 18 import os
18 import pprint
19 import re 19 import re
20 import shutil 20 import shutil
21 import subprocess 21 import subprocess
22 import sys 22 import sys
23 import tempfile 23 import tempfile
24 import time
25
26 import binary_size_utils
27
28 # This path changee is not beautiful. Temporary (I hope) measure until
29 # the chromium project has figured out a proper way to organize the
30 # library of python tools. http://crbug.com/375725
31 elf_symbolizer_path = os.path.abspath(os.path.join(
32 os.path.dirname(__file__),
33 '..',
34 '..',
35 'build',
36 'android',
37 'pylib'))
38 sys.path.append(elf_symbolizer_path)
39 import symbols.elf_symbolizer as elf_symbolizer
24 40
25 41
26 # TODO(andrewhayden): Only used for legacy reports. Delete. 42 # TODO(andrewhayden): Only used for legacy reports. Delete.
27 def FormatBytes(bytes): 43 def FormatBytes(byte_count):
28 """Pretty-print a number of bytes.""" 44 """Pretty-print a number of bytes."""
29 if bytes > 1e6: 45 if byte_count > 1e6:
30 bytes = bytes / 1.0e6 46 byte_count = byte_count / 1.0e6
31 return '%.1fm' % bytes 47 return '%.1fm' % byte_count
32 if bytes > 1e3: 48 if byte_count > 1e3:
33 bytes = bytes / 1.0e3 49 byte_count = byte_count / 1.0e3
34 return '%.1fk' % bytes 50 return '%.1fk' % byte_count
35 return str(bytes) 51 return str(byte_count)
36 52
37 53
38 # TODO(andrewhayden): Only used for legacy reports. Delete. 54 # TODO(andrewhayden): Only used for legacy reports. Delete.
39 def SymbolTypeToHuman(type): 55 def SymbolTypeToHuman(symbol_type):
40 """Convert a symbol type as printed by nm into a human-readable name.""" 56 """Convert a symbol type as printed by nm into a human-readable name."""
41 return {'b': 'bss', 57 return {'b': 'bss',
42 'd': 'data', 58 'd': 'data',
43 'r': 'read-only data', 59 'r': 'read-only data',
44 't': 'code', 60 't': 'code',
45 'w': 'weak symbol', 61 'w': 'weak symbol',
46 'v': 'weak symbol'}[type] 62 'v': 'weak symbol'}[symbol_type]
47
48
49 def ParseNm(input):
50 """Parse nm output.
51
52 Argument: an iterable over lines of nm output.
53
54 Yields: (symbol name, symbol type, symbol size, source file path).
55 Path may be None if nm couldn't figure out the source file.
56 """
57
58 # Match lines with size, symbol, optional location, optional discriminator
59 sym_re = re.compile(r'^[0-9a-f]{8} ' # address (8 hex digits)
60 '([0-9a-f]{8}) ' # size (8 hex digits)
61 '(.) ' # symbol type, one character
62 '([^\t]+)' # symbol name, separated from next by tab
63 '(?:\t(.*):[\d\?]+)?.*$') # location
64 # Match lines with addr but no size.
65 addr_re = re.compile(r'^[0-9a-f]{8} (.) ([^\t]+)(?:\t.*)?$')
66 # Match lines that don't have an address at all -- typically external symbols.
67 noaddr_re = re.compile(r'^ {8} (.) (.*)$')
68
69 for line in input:
70 line = line.rstrip()
71 match = sym_re.match(line)
72 if match:
73 size, type, sym = match.groups()[0:3]
74 size = int(size, 16)
75 if type.lower() == 'b':
76 continue # skip all BSS for now
77 path = match.group(4)
78 yield sym, type, size, path
79 continue
80 match = addr_re.match(line)
81 if match:
82 type, sym = match.groups()[0:2]
83 # No size == we don't care.
84 continue
85 match = noaddr_re.match(line)
86 if match:
87 type, sym = match.groups()
88 if type in ('U', 'w'):
89 # external or weak symbol
90 continue
91
92 print >>sys.stderr, 'unparsed:', repr(line)
93 63
94 64
95 def _MkChild(node, name): 65 def _MkChild(node, name):
96 child = None 66 child = node['children'].get(name)
97 for test in node['children']: 67 if child is None:
98 if test['n'] == name: 68 child = {'n': name, 'children': {}}
99 child = test 69 node['children'][name] = child
100 break
101 if not child:
102 child = {'n': name, 'children': []}
103 node['children'].append(child)
104 return child 70 return child
105 71
106 72
73 def MakeChildrenDictsIntoLists(node):
74 largest_list_len = 0
75 if 'children' in node:
76 largest_list_len = len(node['children'])
77 child_list = []
78 for child in node['children'].itervalues():
79 child_largest_list_len = MakeChildrenDictsIntoLists(child)
80 if child_largest_list_len > largest_list_len:
81 largest_list_len = child_largest_list_len
82 child_list.append(child)
83 node['children'] = child_list
84
85 return largest_list_len
86
87
107 def MakeCompactTree(symbols): 88 def MakeCompactTree(symbols):
108 result = {'n': '/', 'children': [], 'k': 'p', 'maxDepth': 0} 89 result = {'n': '/', 'children': {}, 'k': 'p', 'maxDepth': 0}
90 seen_symbol_with_path = False
109 for symbol_name, symbol_type, symbol_size, file_path in symbols: 91 for symbol_name, symbol_type, symbol_size, file_path in symbols:
110 92
111 if 'vtable for ' in symbol_name: 93 if 'vtable for ' in symbol_name:
112 symbol_type = '@' # hack to categorize these separately 94 symbol_type = '@' # hack to categorize these separately
113 # Take path like '/foo/bar/baz', convert to ['foo', 'bar', 'baz'] 95 # Take path like '/foo/bar/baz', convert to ['foo', 'bar', 'baz']
114 if file_path: 96 if file_path:
115 file_path = os.path.normpath(file_path) 97 file_path = os.path.normpath(file_path)
98 seen_symbol_with_path = True
116 else: 99 else:
117 file_path = '(No Path)' 100 file_path = '(No Path)'
118 101
119 if file_path.startswith('/'): 102 if file_path.startswith('/'):
120 file_path = file_path[1:] 103 file_path = file_path[1:]
121 path_parts = file_path.split('/') 104 path_parts = file_path.split('/')
122 105
123 # Find pre-existing node in tree, or update if it already exists 106 # Find pre-existing node in tree, or update if it already exists
124 node = result 107 node = result
125 depth = 0 108 depth = 0
126 while len(path_parts) > 0: 109 while len(path_parts) > 0:
127 path_part = path_parts.pop(0) 110 path_part = path_parts.pop(0)
128 if len(path_part) == 0: 111 if len(path_part) == 0:
129 continue 112 continue
130 depth += 1 113 depth += 1
131 node = _MkChild(node, path_part); 114 node = _MkChild(node, path_part)
115 assert not 'k' in node or node['k'] == 'p'
132 node['k'] = 'p' # p for path 116 node['k'] = 'p' # p for path
133 117
134 # 'node' is now the file node. Find the symbol-type bucket. 118 # 'node' is now the file node. Find the symbol-type bucket.
135 node['lastPathElement'] = True 119 node['lastPathElement'] = True
136 node = _MkChild(node, symbol_type) 120 node = _MkChild(node, symbol_type)
121 assert not 'k' in node or node['k'] == 'b'
137 node['t'] = symbol_type 122 node['t'] = symbol_type
138 node['k'] = 'b' # b for bucket 123 node['k'] = 'b' # b for bucket
139 depth += 1 124 depth += 1
140 125
141 # 'node' is now the symbol-type bucket. Make the child entry. 126 # 'node' is now the symbol-type bucket. Make the child entry.
142 node = _MkChild(node, symbol_name) 127 node = _MkChild(node, symbol_name)
143 if 'children' in node: # Only possible if we're adding duplicate entries!!! 128 if 'children' in node:
129 if node['children']:
130 logging.warning('A container node used as symbol for %s.' % symbol_name)
131 # This is going to be used as a leaf so no use for child list.
144 del node['children'] 132 del node['children']
145 node['value'] = symbol_size 133 node['value'] = symbol_size
146 node['t'] = symbol_type 134 node['t'] = symbol_type
147 node['k'] = 's' # s for symbol 135 node['k'] = 's' # s for symbol
148 depth += 1 136 depth += 1
149 result['maxDepth'] = max(result['maxDepth'], depth); 137 result['maxDepth'] = max(result['maxDepth'], depth)
150 138
139 if not seen_symbol_with_path:
140 logging.warning('Symbols lack paths. Data will not be structured.')
141
142 largest_list_len = MakeChildrenDictsIntoLists(result)
143
144 if largest_list_len > 1000:
145 logging.warning('There are sections with %d nodes. '
146 'Results might be unusable.' % largest_list_len)
151 return result 147 return result
152 148
153 149
154 # TODO(andrewhayden): Only used for legacy reports. Delete. 150 # TODO(andrewhayden): Only used for legacy reports. Delete.
155 def TreeifySymbols(symbols): 151 def TreeifySymbols(symbols):
156 """Convert symbols into a path-based tree, calculating size information 152 """Convert symbols into a path-based tree, calculating size information
157 along the way. 153 along the way.
158 154
159 The result is a dictionary that contains two kinds of nodes: 155 The result is a dictionary that contains two kinds of nodes:
160 1. Leaf nodes, representing source code locations (e.g., c++ files) 156 1. Leaf nodes, representing source code locations (e.g., c++ files)
161 These nodes have the following dictionary entries: 157 These nodes have the following dictionary entries:
162 sizes: a dictionary whose keys are categories (such as code, data, 158 sizes: a dictionary whose keys are categories (such as code, data,
163 vtable, etceteras) and whose values are the size, in bytes, of 159 vtable, etceteras) and whose values are the size, in bytes, of
164 those categories; 160 those categories;
165 size: the total size, in bytes, of all the entries in the sizes dict 161 size: the total size, in bytes, of all the entries in the sizes dict
166 2. Non-leaf nodes, representing directories 162 2. Non-leaf nodes, representing directories
167 These nodes have the following dictionary entries: 163 These nodes have the following dictionary entries:
168 children: a dictionary whose keys are names (path entries; either 164 children: a dictionary whose keys are names (path entries; either
169 directory or file names) and whose values are other nodes; 165 directory or file names) and whose values are other nodes;
170 size: the total size, in bytes, of all the leaf nodes that are 166 size: the total size, in bytes, of all the leaf nodes that are
171 contained within the children dict (recursively expanded) 167 contained within the children dict (recursively expanded)
172 168
173 The result object is itself a dictionary that represents the common ancestor 169 The result object is itself a dictionary that represents the common ancestor
174 of all child nodes, e.g. a path to which all other nodes beneath it are 170 of all child nodes, e.g. a path to which all other nodes beneath it are
175 relative. The 'size' attribute of this dict yields the sum of the size of all 171 relative. The 'size' attribute of this dict yields the sum of the size of all
176 leaf nodes within the data structure. 172 leaf nodes within the data structure.
177 """ 173 """
178 dirs = {'children': {}, 'size': 0} 174 dirs = {'children': {}, 'size': 0}
179 for sym, type, size, path in symbols: 175 for sym, symbol_type, size, path in symbols:
180 dirs['size'] += size 176 dirs['size'] += size
181 if path: 177 if path:
182 path = os.path.normpath(path) 178 path = os.path.normpath(path)
183 if path.startswith('/'): 179 if path.startswith('/'):
184 path = path[1:] 180 path = path[1:]
185 181
186 parts = None 182 parts = None
187 if path: 183 if path:
188 parts = path.split('/') 184 parts = path.split('/')
189 185
(...skipping 12 matching lines...) Expand all
202 198
203 # Get (creating if necessary) the node for the file 199 # Get (creating if necessary) the node for the file
204 # This node doesn't have a 'children' attribute 200 # This node doesn't have a 'children' attribute
205 if file_key not in tree['children']: 201 if file_key not in tree['children']:
206 tree['children'][file_key] = {'sizes': collections.defaultdict(int), 202 tree['children'][file_key] = {'sizes': collections.defaultdict(int),
207 'size': 0} 203 'size': 0}
208 tree = tree['children'][file_key] 204 tree = tree['children'][file_key]
209 tree['size'] += size 205 tree['size'] += size
210 206
211 # Accumulate size into a bucket within the file 207 # Accumulate size into a bucket within the file
212 type = type.lower() 208 symbol_type = symbol_type.lower()
213 if 'vtable for ' in sym: 209 if 'vtable for ' in sym:
214 tree['sizes']['[vtable]'] += size 210 tree['sizes']['[vtable]'] += size
215 elif 'r' == type: 211 elif 'r' == symbol_type:
216 tree['sizes']['[rodata]'] += size 212 tree['sizes']['[rodata]'] += size
217 elif 'd' == type: 213 elif 'd' == symbol_type:
218 tree['sizes']['[data]'] += size 214 tree['sizes']['[data]'] += size
219 elif 'b' == type: 215 elif 'b' == symbol_type:
220 tree['sizes']['[bss]'] += size 216 tree['sizes']['[bss]'] += size
221 elif 't' == type: 217 elif 't' == symbol_type:
222 # 'text' in binary parlance means 'code'. 218 # 'text' in binary parlance means 'code'.
223 tree['sizes']['[code]'] += size 219 tree['sizes']['[code]'] += size
224 elif 'w' == type: 220 elif 'w' == symbol_type:
225 tree['sizes']['[weak]'] += size 221 tree['sizes']['[weak]'] += size
226 else: 222 else:
227 tree['sizes']['[other]'] += size 223 tree['sizes']['[other]'] += size
228 except: 224 except:
229 print >>sys.stderr, sym, parts, key 225 print >> sys.stderr, sym, parts, file_key
230 raise 226 raise
231 else: 227 else:
232 key = 'symbols without paths' 228 key = 'symbols without paths'
233 if key not in dirs['children']: 229 if key not in dirs['children']:
234 dirs['children'][key] = {'sizes': collections.defaultdict(int), 230 dirs['children'][key] = {'sizes': collections.defaultdict(int),
235 'size': 0} 231 'size': 0}
236 tree = dirs['children'][key] 232 tree = dirs['children'][key]
237 subkey = 'misc' 233 subkey = 'misc'
238 if (sym.endswith('::__FUNCTION__') or 234 if (sym.endswith('::__FUNCTION__') or
239 sym.endswith('::__PRETTY_FUNCTION__')): 235 sym.endswith('::__PRETTY_FUNCTION__')):
(...skipping 25 matching lines...) Expand all
265 if 'children' in tree: 261 if 'children' in tree:
266 # Non-leaf node. Recurse. 262 # Non-leaf node. Recurse.
267 for child_name, child in tree['children'].iteritems(): 263 for child_name, child in tree['children'].iteritems():
268 children.append(JsonifyTree(child, child_name)) 264 children.append(JsonifyTree(child, child_name))
269 else: 265 else:
270 # Leaf node; dump per-file stats as entries in the treemap 266 # Leaf node; dump per-file stats as entries in the treemap
271 for kind, size in tree['sizes'].iteritems(): 267 for kind, size in tree['sizes'].iteritems():
272 child_json = {'name': kind + ' (' + FormatBytes(size) + ')', 268 child_json = {'name': kind + ' (' + FormatBytes(size) + ')',
273 'data': { '$area': size }} 269 'data': { '$area': size }}
274 css_class = css_class_map.get(kind) 270 css_class = css_class_map.get(kind)
275 if css_class is not None: child_json['data']['$symbol'] = css_class 271 if css_class is not None:
272 child_json['data']['$symbol'] = css_class
276 children.append(child_json) 273 children.append(child_json)
277 # Sort children by size, largest to smallest. 274 # Sort children by size, largest to smallest.
278 children.sort(key=lambda child: -child['data']['$area']) 275 children.sort(key=lambda child: -child['data']['$area'])
279 276
280 # For leaf nodes, the 'size' attribute is the size of the leaf; 277 # For leaf nodes, the 'size' attribute is the size of the leaf;
281 # Non-leaf nodes don't really have a size, but their 'size' attribute is 278 # Non-leaf nodes don't really have a size, but their 'size' attribute is
282 # the sum of the sizes of all their children. 279 # the sum of the sizes of all their children.
283 return {'name': name + ' (' + FormatBytes(tree['size']) + ')', 280 return {'name': name + ' (' + FormatBytes(tree['size']) + ')',
284 'data': { '$area': tree['size'] }, 281 'data': { '$area': tree['size'] },
285 'children': children } 282 'children': children }
286 283
287 def DumpCompactTree(symbols, outfile): 284 def DumpCompactTree(symbols, outfile):
288 out = open(outfile, 'w') 285 tree_root = MakeCompactTree(symbols)
289 try: 286 with open(outfile, 'w') as out:
290 out.write('var tree_data = ' + json.dumps(MakeCompactTree(symbols))) 287 out.write('var tree_data = ')
291 finally: 288 json.dump(tree_root, out)
292 out.flush() 289 print('Writing %d bytes json' % os.path.getsize(outfile))
293 out.close()
294 290
295 291
296 # TODO(andrewhayden): Only used for legacy reports. Delete. 292 # TODO(andrewhayden): Only used for legacy reports. Delete.
297 def DumpTreemap(symbols, outfile): 293 def DumpTreemap(symbols, outfile):
298 dirs = TreeifySymbols(symbols) 294 dirs = TreeifySymbols(symbols)
299 out = open(outfile, 'w') 295 out = open(outfile, 'w')
300 try: 296 try:
301 out.write('var kTree = ' + json.dumps(JsonifyTree(dirs, '/'))) 297 out.write('var kTree = ' + json.dumps(JsonifyTree(dirs, '/')))
302 finally: 298 finally:
303 out.flush() 299 out.flush()
304 out.close() 300 out.close()
305 301
306 302
307 # TODO(andrewhayden): Only used for legacy reports. Delete. 303 # TODO(andrewhayden): Only used for legacy reports. Delete.
308 def DumpLargestSymbols(symbols, outfile, n): 304 def DumpLargestSymbols(symbols, outfile, n):
309 # a list of (sym, type, size, path); sort by size. 305 # a list of (sym, symbol_type, size, path); sort by size.
310 symbols = sorted(symbols, key=lambda x: -x[2]) 306 symbols = sorted(symbols, key=lambda x: -x[2])
311 dumped = 0 307 dumped = 0
312 out = open(outfile, 'w') 308 out = open(outfile, 'w')
313 try: 309 try:
314 out.write('var largestSymbols = [\n') 310 out.write('var largestSymbols = [\n')
315 for sym, type, size, path in symbols: 311 for sym, symbol_type, size, path in symbols:
316 if type in ('b', 'w'): 312 if symbol_type in ('b', 'w'):
317 continue # skip bss and weak symbols 313 continue # skip bss and weak symbols
318 if path is None: 314 if path is None:
319 path = '' 315 path = ''
320 entry = {'size': FormatBytes(size), 316 entry = {'size': FormatBytes(size),
321 'symbol': sym, 317 'symbol': sym,
322 'type': SymbolTypeToHuman(type), 318 'type': SymbolTypeToHuman(symbol_type),
323 'location': path } 319 'location': path }
324 out.write(json.dumps(entry)) 320 out.write(json.dumps(entry))
325 out.write(',\n') 321 out.write(',\n')
326 dumped += 1 322 dumped += 1
327 if dumped >= n: 323 if dumped >= n:
328 return 324 return
329 finally: 325 finally:
330 out.write('];\n') 326 out.write('];\n')
331 out.flush() 327 out.flush()
332 out.close() 328 out.close()
333 329
334 330
335 def MakeSourceMap(symbols): 331 def MakeSourceMap(symbols):
336 sources = {} 332 sources = {}
337 for sym, type, size, path in symbols: 333 for _sym, _symbol_type, size, path in symbols:
338 key = None 334 key = None
339 if path: 335 if path:
340 key = os.path.normpath(path) 336 key = os.path.normpath(path)
341 else: 337 else:
342 key = '[no path]' 338 key = '[no path]'
343 if key not in sources: 339 if key not in sources:
344 sources[key] = {'path': path, 'symbol_count': 0, 'size': 0} 340 sources[key] = {'path': path, 'symbol_count': 0, 'size': 0}
345 record = sources[key] 341 record = sources[key]
346 record['size'] += size 342 record['size'] += size
347 record['symbol_count'] += 1 343 record['symbol_count'] += 1
348 return sources 344 return sources
349 345
350 346
351 # TODO(andrewhayden): Only used for legacy reports. Delete. 347 # TODO(andrewhayden): Only used for legacy reports. Delete.
352 def DumpLargestSources(symbols, outfile, n): 348 def DumpLargestSources(symbols, outfile, n):
353 map = MakeSourceMap(symbols) 349 source_map = MakeSourceMap(symbols)
354 sources = sorted(map.values(), key=lambda x: -x['size']) 350 sources = sorted(source_map.values(), key=lambda x: -x['size'])
355 dumped = 0 351 dumped = 0
356 out = open(outfile, 'w') 352 out = open(outfile, 'w')
357 try: 353 try:
358 out.write('var largestSources = [\n') 354 out.write('var largestSources = [\n')
359 for record in sources: 355 for record in sources:
360 entry = {'size': FormatBytes(record['size']), 356 entry = {'size': FormatBytes(record['size']),
361 'symbol_count': str(record['symbol_count']), 357 'symbol_count': str(record['symbol_count']),
362 'location': record['path']} 358 'location': record['path']}
363 out.write(json.dumps(entry)) 359 out.write(json.dumps(entry))
364 out.write(',\n') 360 out.write(',\n')
365 dumped += 1 361 dumped += 1
366 if dumped >= n: 362 if dumped >= n:
367 return 363 return
368 finally: 364 finally:
369 out.write('];\n') 365 out.write('];\n')
370 out.flush() 366 out.flush()
371 out.close() 367 out.close()
372 368
373 369
374 # TODO(andrewhayden): Only used for legacy reports. Delete. 370 # TODO(andrewhayden): Only used for legacy reports. Delete.
375 def DumpLargestVTables(symbols, outfile, n): 371 def DumpLargestVTables(symbols, outfile, n):
376 vtables = [] 372 vtables = []
377 for symbol, type, size, path in symbols: 373 for symbol, _type, size, path in symbols:
378 if 'vtable for ' in symbol: 374 if 'vtable for ' in symbol:
379 vtables.append({'symbol': symbol, 'path': path, 'size': size}) 375 vtables.append({'symbol': symbol, 'path': path, 'size': size})
380 vtables = sorted(vtables, key=lambda x: -x['size']) 376 vtables = sorted(vtables, key=lambda x: -x['size'])
381 dumped = 0 377 dumped = 0
382 out = open(outfile, 'w') 378 out = open(outfile, 'w')
383 try: 379 try:
384 out.write('var largestVTables = [\n') 380 out.write('var largestVTables = [\n')
385 for record in vtables: 381 for record in vtables:
386 entry = {'size': FormatBytes(record['size']), 382 entry = {'size': FormatBytes(record['size']),
387 'symbol': record['symbol'], 383 'symbol': record['symbol'],
388 'location': record['path']} 384 'location': record['path']}
389 out.write(json.dumps(entry)) 385 out.write(json.dumps(entry))
390 out.write(',\n') 386 out.write(',\n')
391 dumped += 1 387 dumped += 1
392 if dumped >= n: 388 if dumped >= n:
393 return 389 return
394 finally: 390 finally:
395 out.write('];\n') 391 out.write('];\n')
396 out.flush() 392 out.flush()
397 out.close() 393 out.close()
398 394
399 395
400 # TODO(andrewhayden): Switch to Primiano's python-based version. 396 # Regex for parsing "nm" output. A sample line looks like this:
401 def RunParallelAddress2Line(outfile, library, arch, jobs, verbose): 397 # 0167b39c 00000018 t ACCESS_DESCRIPTION_free /path/file.c:95
402 """Run a parallel addr2line processing engine to dump and resolve symbols.""" 398 #
403 out_dir = os.getenv('CHROMIUM_OUT_DIR', 'out') 399 # The fields are: address, size, type, name, source location
404 build_type = os.getenv('BUILDTYPE', 'Release') 400 # Regular expression explained ( see also: https://xkcd.com/208 ):
405 classpath = os.path.join(out_dir, build_type, 'lib.java', 401 # ([0-9a-f]{8,}+) The address
406 'binary_size_java.jar') 402 # [\s]+ Whitespace separator
407 cmd = ['java', 403 # ([0-9a-f]{8,}+) The size. From here on out it's all optional.
408 '-classpath', classpath, 404 # [\s]+ Whitespace separator
409 'org.chromium.tools.binary_size.ParallelAddress2Line', 405 # (\S?) The symbol type, which is any non-whitespace char
410 '--disambiguate', 406 # [\s*] Whitespace separator
411 '--outfile', outfile, 407 # ([^\t]*) Symbol name, any non-tab character (spaces ok!)
412 '--library', library, 408 # [\t]? Tab separator
413 '--threads', jobs] 409 # (.*) The location (filename[:linennum|?][ (discriminator n)]
414 if verbose is True: 410 sNmPattern = re.compile(
415 cmd.append('--verbose') 411 r'([0-9a-f]{8,})[\s]+([0-9a-f]{8,})[\s]*(\S?)[\s*]([^\t]*)[\t]?(.*)')
416 prefix = os.path.join('third_party', 'android_tools', 'ndk', 'toolchains')
417 if arch == 'android-arm':
418 prefix = os.path.join(prefix, 'arm-linux-androideabi-4.8', 'prebuilt',
419 'linux-x86_64', 'bin', 'arm-linux-androideabi-')
420 cmd.extend(['--nm', prefix + 'nm', '--addr2line', prefix + 'addr2line'])
421 elif arch == 'android-mips':
422 prefix = os.path.join(prefix, 'mipsel-linux-android-4.8', 'prebuilt',
423 'linux-x86_64', 'bin', 'mipsel-linux-android-')
424 cmd.extend(['--nm', prefix + 'nm', '--addr2line', prefix + 'addr2line'])
425 elif arch == 'android-x86':
426 prefix = os.path.join(prefix, 'x86-4.8', 'prebuilt',
427 'linux-x86_64', 'bin', 'i686-linux-android-')
428 cmd.extend(['--nm', prefix + 'nm', '--addr2line', prefix + 'addr2line'])
429 # else, use whatever is in PATH (don't pass --nm or --addr2line)
430 412
431 if verbose: 413 class Progress():
432 print cmd 414 def __init__(self):
433 415 self.count = 0
434 return_code = subprocess.call(cmd) 416 self.skip_count = 0
435 if return_code: 417 self.collisions = 0
436 raise RuntimeError('Failed to run ParallelAddress2Line: returned ' + 418 self.time_last_output = time.time()
437 str(return_code)) 419 self.count_last_output = 0
438 420
439 421
440 def GetNmSymbols(infile, outfile, library, arch, jobs, verbose): 422 def RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary, jobs):
441 if infile is None: 423 nm_output = RunNm(library, nm_binary)
424 nm_output_lines = nm_output.splitlines()
425 nm_output_lines_len = len(nm_output_lines)
426 address_symbol = {}
427 progress = Progress()
428 def map_address_symbol(symbol, addr):
429 progress.count += 1
430 if addr in address_symbol:
431 # 'Collision between %s and %s.' % (str(symbol.name),
432 # str(address_symbol[addr].name))
433 progress.collisions += 1
434 else:
435 address_symbol[addr] = symbol
436
437 progress_chunk = 100
438 if progress.count % progress_chunk == 0:
439 time_now = time.time()
440 time_spent = time_now - progress.time_last_output
441 if time_spent > 1.0:
442 # Only output at most once per second.
443 progress.time_last_output = time_now
444 chunk_size = progress.count - progress.count_last_output
445 progress.count_last_output = progress.count
446 if time_spent > 0:
447 speed = chunk_size / time_spent
448 else:
449 speed = 0
450 progress_percent = (100.0 * (progress.count + progress.skip_count) /
451 nm_output_lines_len)
452 print('%.1f%%: Looked up %d symbols (%d collisions) - %.1f lookups/s.' %
453 (progress_percent, progress.count, progress.collisions, speed))
454
455 symbolizer = elf_symbolizer.ELFSymbolizer(library, addr2line_binary,
456 map_address_symbol,
457 max_concurrent_jobs=jobs)
458 for line in nm_output_lines:
459 match = sNmPattern.match(line)
460 if match:
461 location = match.group(5)
462 if not location:
463 addr = int(match.group(1), 16)
464 size = int(match.group(2), 16)
465 if addr in address_symbol: # Already looked up, shortcut ELFSymbolizer.
466 map_address_symbol(address_symbol[addr], addr)
467 continue
468 elif size == 0:
469 # Save time by not looking up empty symbols (do they even exist?)
470 print('Empty symbol: ' + line)
471 else:
472 symbolizer.SymbolizeAsync(addr, addr)
473 continue
474
475 progress.skip_count += 1
476
477 symbolizer.Join()
478
479 with open(outfile, 'w') as out:
480 for line in nm_output_lines:
481 match = sNmPattern.match(line)
482 if match:
483 location = match.group(5)
484 if not location:
485 addr = int(match.group(1), 16)
486 symbol = address_symbol[addr]
487 path = '??'
488 if symbol.source_path is not None:
489 path = symbol.source_path
490 line_number = 0
491 if symbol.source_line is not None:
492 line_number = symbol.source_line
493 out.write('%s\t%s:%d\n' % (line, path, line_number))
494 continue
495
496 out.write('%s\n' % line)
497
498 print('%d symbols in the results.' % len(address_symbol))
499
500
501 def RunNm(binary, nm_binary):
502 print('Starting nm')
503 cmd = [nm_binary, '-C', '--print-size', binary]
504 nm_process = subprocess.Popen(cmd,
505 stdout=subprocess.PIPE,
506 stderr=subprocess.PIPE)
507 (process_output, err_output) = nm_process.communicate()
508
509 if nm_process.returncode != 0:
510 if err_output:
511 raise Exception, err_output
512 else:
513 raise Exception, process_output
514
515 print('Finished nm')
516 return process_output
517
518
519 def GetNmSymbols(nm_infile, outfile, library, jobs, verbose,
520 addr2line_binary, nm_binary):
521 if nm_infile is None:
442 if outfile is None: 522 if outfile is None:
443 infile = tempfile.NamedTemporaryFile(delete=False).name 523 outfile = tempfile.NamedTemporaryFile(delete=False).name
444 else:
445 infile = outfile
446 524
447 if verbose: 525 if verbose:
448 print 'Running parallel addr2line, dumping symbols to ' + infile; 526 print 'Running parallel addr2line, dumping symbols to ' + outfile
449 RunParallelAddress2Line(outfile=infile, library=library, arch=arch, 527 RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary, jobs)
450 jobs=jobs, verbose=verbose) 528
529 nm_infile = outfile
530
451 elif verbose: 531 elif verbose:
452 print 'Using nm input from ' + infile 532 print 'Using nm input from ' + nm_infile
453 with file(infile, 'r') as infile: 533 with file(nm_infile, 'r') as infile:
454 return list(ParseNm(infile)) 534 return list(binary_size_utils.ParseNm(infile))
535
536
537 def _find_in_system_path(binary):
538 """Locate the full path to binary in the system path or return None
539 if not found."""
540 system_path = os.environ["PATH"].split(os.pathsep)
541 for path in system_path:
542 binary_path = os.path.join(path, binary)
543 if os.path.isfile(binary_path):
544 return binary_path
545 return None
455 546
456 547
457 def main(): 548 def main():
458 usage="""%prog [options] 549 usage = """%prog [options]
459 550
460 Runs a spatial analysis on a given library, looking up the source locations 551 Runs a spatial analysis on a given library, looking up the source locations
461 of its symbols and calculating how much space each directory, source file, 552 of its symbols and calculating how much space each directory, source file,
462 and so on is taking. The result is a report that can be used to pinpoint 553 and so on is taking. The result is a report that can be used to pinpoint
463 sources of large portions of the binary, etceteras. 554 sources of large portions of the binary, etceteras.
464 555
465 Under normal circumstances, you only need to pass two arguments, thusly: 556 Under normal circumstances, you only need to pass two arguments, thusly:
466 557
467 %prog --library /path/to/library --destdir /path/to/output 558 %prog --library /path/to/library --destdir /path/to/output
468 559
(...skipping 10 matching lines...) Expand all
479 'present in the file; i.e., no addr2line symbol lookups ' 570 'present in the file; i.e., no addr2line symbol lookups '
480 'will be performed when this option is specified. ' 571 'will be performed when this option is specified. '
481 'Mutually exclusive with --library.') 572 'Mutually exclusive with --library.')
482 parser.add_option('--destdir', metavar='PATH', 573 parser.add_option('--destdir', metavar='PATH',
483 help='write output to the specified directory. An HTML ' 574 help='write output to the specified directory. An HTML '
484 'report is generated here along with supporting files; ' 575 'report is generated here along with supporting files; '
485 'any existing report will be overwritten.') 576 'any existing report will be overwritten.')
486 parser.add_option('--library', metavar='PATH', 577 parser.add_option('--library', metavar='PATH',
487 help='if specified, process symbols in the library at ' 578 help='if specified, process symbols in the library at '
488 'the specified path. Mutually exclusive with --nm-in.') 579 'the specified path. Mutually exclusive with --nm-in.')
489 parser.add_option('--arch', 580 parser.add_option('--nm-binary',
490 help='the architecture that the library is targeted to. ' 581 help='use the specified nm binary to analyze library. '
491 'Determines which nm/addr2line binaries are used. When ' 582 'This is to be used when the nm in the path is not for '
492 '\'host-native\' is chosen, the program will use whichever ' 583 'the right architecture or of the right version.')
493 'nm/addr2line binaries are on the PATH. This is ' 584 parser.add_option('--addr2line-binary',
494 'appropriate when you are analyzing a binary by and for ' 585 help='use the specified addr2line binary to analyze '
495 'your computer. ' 586 'library. This is to be used when the addr2line in '
496 'This argument is only valid when using --library. ' 587 'the path is not for the right architecture or '
497 'Default is \'host-native\'.', 588 'of the right version.')
498 choices=['host-native', 'android-arm',
499 'android-mips', 'android-x86'],)
500 parser.add_option('--jobs', 589 parser.add_option('--jobs',
501 help='number of jobs to use for the parallel ' 590 help='number of jobs to use for the parallel '
502 'addr2line processing pool; defaults to 1. More ' 591 'addr2line processing pool; defaults to 1. More '
503 'jobs greatly improve throughput but eat RAM like ' 592 'jobs greatly improve throughput but eat RAM like '
504 'popcorn, and take several gigabytes each. Start low ' 593 'popcorn, and take several gigabytes each. Start low '
505 'and ramp this number up until your machine begins to ' 594 'and ramp this number up until your machine begins to '
506 'struggle with RAM. ' 595 'struggle with RAM. '
507 'This argument is only valid when using --library.') 596 'This argument is only valid when using --library.')
508 parser.add_option('-v', dest='verbose', action='store_true', 597 parser.add_option('-v', dest='verbose', action='store_true',
509 help='be verbose, printing lots of status information.') 598 help='be verbose, printing lots of status information.')
510 parser.add_option('--nm-out', metavar='PATH', 599 parser.add_option('--nm-out', metavar='PATH',
511 help='keep the nm output file, and store it at the ' 600 help='keep the nm output file, and store it at the '
512 'specified path. This is useful if you want to see the ' 601 'specified path. This is useful if you want to see the '
513 'fully processed nm output after the symbols have been ' 602 'fully processed nm output after the symbols have been '
514 'mapped to source locations. By default, a tempfile is ' 603 'mapped to source locations. By default, a tempfile is '
515 'used and is deleted when the program terminates.' 604 'used and is deleted when the program terminates.'
516 'This argument is only valid when using --library.') 605 'This argument is only valid when using --library.')
517 parser.add_option('--legacy', action='store_true', 606 parser.add_option('--legacy', action='store_true',
518 help='emit legacy binary size report instead of modern') 607 help='emit legacy binary size report instead of modern')
519 opts, args = parser.parse_args() 608 opts, _args = parser.parse_args()
520 609
521 if ((not opts.library) and (not opts.nm_in)) or (opts.library and opts.nm_in): 610 if ((not opts.library) and (not opts.nm_in)) or (opts.library and opts.nm_in):
522 parser.error('exactly one of --library or --nm-in is required') 611 parser.error('exactly one of --library or --nm-in is required')
523 if (opts.nm_in): 612 if (opts.nm_in):
524 if opts.jobs: 613 if opts.jobs:
525 print >> sys.stderr, ('WARNING: --jobs has no effect ' 614 print >> sys.stderr, ('WARNING: --jobs has no effect '
526 'when used with --nm-in') 615 'when used with --nm-in')
527 if opts.arch:
528 print >> sys.stderr, ('WARNING: --arch has no effect '
529 'when used with --nm-in')
530 if not opts.destdir: 616 if not opts.destdir:
531 parser.error('--destdir is required argument') 617 parser.error('--destdir is required argument')
532 if not opts.jobs: 618 if not opts.jobs:
533 opts.jobs = '1' 619 # Use the number of processors but cap between 2 and 4 since raw
534 if not opts.arch: 620 # CPU power isn't the limiting factor. It's I/O limited, memory
535 opts.arch = 'host-native' 621 # bus limited and available-memory-limited. Too many processes and
622 # the computer will run out of memory and it will be slow.
623 opts.jobs = max(2, min(4, str(multiprocessing.cpu_count())))
536 624
537 symbols = GetNmSymbols(opts.nm_in, opts.nm_out, opts.library, opts.arch, 625 if opts.addr2line_binary:
538 opts.jobs, opts.verbose is True) 626 assert os.path.isfile(opts.addr2line_binary)
627 addr2line_binary = opts.addr2line_binary
628 else:
629 addr2line_binary = _find_in_system_path('addr2line')
630 assert addr2line_binary, 'Unable to find addr2line in the path. '\
631 'Use --addr2line-binary to specify location.'
632
633 if opts.nm_binary:
634 assert os.path.isfile(opts.nm_binary)
635 nm_binary = opts.nm_binary
636 else:
637 nm_binary = _find_in_system_path('nm')
638 assert nm_binary, 'Unable to find nm in the path. Use --nm-binary '\
639 'to specify location.'
640
641 print('nm: %s' % nm_binary)
642 print('addr2line: %s' % addr2line_binary)
643
644 symbols = GetNmSymbols(opts.nm_in, opts.nm_out, opts.library,
645 opts.jobs, opts.verbose is True,
646 addr2line_binary, nm_binary)
539 if not os.path.exists(opts.destdir): 647 if not os.path.exists(opts.destdir):
540 os.makedirs(opts.destdir, 0755) 648 os.makedirs(opts.destdir, 0755)
541 649
542 650
543 if opts.legacy: # legacy report 651 if opts.legacy: # legacy report
544 DumpTreemap(symbols, os.path.join(opts.destdir, 'treemap-dump.js')) 652 DumpTreemap(symbols, os.path.join(opts.destdir, 'treemap-dump.js'))
545 DumpLargestSymbols(symbols, 653 DumpLargestSymbols(symbols,
546 os.path.join(opts.destdir, 'largest-symbols.js'), 100) 654 os.path.join(opts.destdir, 'largest-symbols.js'), 100)
547 DumpLargestSources(symbols, 655 DumpLargestSources(symbols,
548 os.path.join(opts.destdir, 'largest-sources.js'), 100) 656 os.path.join(opts.destdir, 'largest-sources.js'), 100)
549 DumpLargestVTables(symbols, 657 DumpLargestVTables(symbols,
550 os.path.join(opts.destdir, 'largest-vtables.js'), 100) 658 os.path.join(opts.destdir, 'largest-vtables.js'), 100)
551 treemap_out = os.path.join(opts.destdir, 'webtreemap') 659 treemap_out = os.path.join(opts.destdir, 'webtreemap')
552 if not os.path.exists(treemap_out): 660 if not os.path.exists(treemap_out):
553 os.makedirs(treemap_out, 0755) 661 os.makedirs(treemap_out, 0755)
554 treemap_src = os.path.join('third_party', 'webtreemap', 'src') 662 treemap_src = os.path.join('third_party', 'webtreemap', 'src')
555 shutil.copy(os.path.join(treemap_src, 'COPYING'), treemap_out) 663 shutil.copy(os.path.join(treemap_src, 'COPYING'), treemap_out)
556 shutil.copy(os.path.join(treemap_src, 'webtreemap.js'), treemap_out) 664 shutil.copy(os.path.join(treemap_src, 'webtreemap.js'), treemap_out)
557 shutil.copy(os.path.join(treemap_src, 'webtreemap.css'), treemap_out) 665 shutil.copy(os.path.join(treemap_src, 'webtreemap.css'), treemap_out)
558 shutil.copy(os.path.join('tools', 'binary_size', 'legacy_template', 666 shutil.copy(os.path.join('tools', 'binary_size', 'legacy_template',
559 'index.html'), opts.destdir) 667 'index.html'), opts.destdir)
560 else: # modern report 668 else: # modern report
561 DumpCompactTree(symbols, os.path.join(opts.destdir, 'data.js')) 669 DumpCompactTree(symbols, os.path.join(opts.destdir, 'data.js'))
562 d3_out = os.path.join(opts.destdir, 'd3') 670 d3_out = os.path.join(opts.destdir, 'd3')
563 if not os.path.exists(d3_out): 671 if not os.path.exists(d3_out):
564 os.makedirs(d3_out, 0755) 672 os.makedirs(d3_out, 0755)
565 d3_src = os.path.join('third_party', 'd3', 'src') 673 d3_src = os.path.join(os.path.dirname(__file__),
566 template_src = os.path.join('tools', 'binary_size', 674 '..',
675 '..',
676 'third_party', 'd3', 'src')
677 template_src = os.path.join(os.path.dirname(__file__),
567 'template') 678 'template')
568 shutil.copy(os.path.join(d3_src, 'LICENSE'), d3_out) 679 shutil.copy(os.path.join(d3_src, 'LICENSE'), d3_out)
569 shutil.copy(os.path.join(d3_src, 'd3.js'), d3_out) 680 shutil.copy(os.path.join(d3_src, 'd3.js'), d3_out)
681 print('Copying index.html')
570 shutil.copy(os.path.join(template_src, 'index.html'), opts.destdir) 682 shutil.copy(os.path.join(template_src, 'index.html'), opts.destdir)
571 shutil.copy(os.path.join(template_src, 'D3SymbolTreeMap.js'), opts.destdir) 683 shutil.copy(os.path.join(template_src, 'D3SymbolTreeMap.js'), opts.destdir)
572 684
573 if opts.verbose: 685 if opts.verbose:
574 print 'Report saved to ' + opts.destdir + '/index.html' 686 print 'Report saved to ' + opts.destdir + '/index.html'
575 687
576 688
577 if __name__ == '__main__': 689 if __name__ == '__main__':
578 sys.exit(main()) 690 sys.exit(main())
OLDNEW
« no previous file with comments | « tools/binary_size/java/src/org/chromium/tools/binary_size/Record.java ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698