OLD | NEW |
| (Empty) |
1 #!/usr/bin/env python | |
2 # Copyright 2014 The Chromium Authors. All rights reserved. | |
3 # Use of this source code is governed by a BSD-style license that can be | |
4 # found in the LICENSE file. | |
5 | |
6 """Generate a spatial analysis against an arbitrary library. | |
7 | |
8 Adapted for Skia's use case from | |
9 chromium/src/tools/binary_size/run_binary_size_analysis.py. Main changes: | |
10 | |
11 -- Cleans up some deprecated codes. | |
12 -- Always use relative code path so the tree root is Skia repo's root. | |
13 -- Instead of outputting the standalone HTML/CSS/JS filesets, writes the | |
14 TreeMap JSON data into a Google Storage bucket. | |
15 -- Adds githash and total_size to the JSON data. | |
16 -- Outputs another summary data in JSON Bench format for skiaperf ingestion. | |
17 | |
18 The output JSON data for visualization is in the following format: | |
19 | |
20 { | |
21 "githash": 123abc, | |
22 "commit_ts": 1234567890, | |
23 "total_size": 1234567, | |
24 "key": {"source_type": "binary_size"}, | |
25 "tree_data": { | |
26 "maxDepth": 9, | |
27 "k": "p", "children":[ | |
28 {"k":"p","children":[ | |
29 {"k":"p","children":[ | |
30 {"k":"p","lastPathElement":true,"children":[ | |
31 {"k":"b","t":"t","children":[ | |
32 {"k":"s", "t":"t", "value":4029, | |
33 "n":"etc_encode_subblock_helper(unsigned char const*, ...)" | |
34 }, | |
35 ...... | |
36 } | |
37 } | |
38 | |
39 Another JSON file is generated for size summaries to be used in skiaperf. The | |
40 JSON format details can be found at: | |
41 https://github.com/google/skia/blob/master/bench/ResultsWriter.h#L54 | |
42 and: | |
43 https://skia.googlesource.com/buildbot/+/master/perf/go/ingester/nanobench.go | |
44 | |
45 In the binary size case, outputs look like: | |
46 | |
47 { | |
48 "gitHash": "123abc", | |
49 "key": { | |
50 "source_type": "binarysize" | |
51 } | |
52 "results: { | |
53 "src_lazy_global_weak_symbol": { | |
54 "memory": { | |
55 "bytes": 41, | |
56 "options": { | |
57 "path": "src_lazy", | |
58 "symbol": "global_weak_symbol" | |
59 } | |
60 } | |
61 }, | |
62 "src_lazy_global_read_only_data": { | |
63 "memory": { | |
64 "bytes": 13476, | |
65 "options": { | |
66 "path": "src_lazy", | |
67 "symbol": "global_read_only_data" | |
68 } | |
69 } | |
70 }, | |
71 ... | |
72 } | |
73 } | |
74 | |
75 """ | |
76 | |
77 import collections | |
78 import datetime | |
79 import json | |
80 import logging | |
81 import multiprocessing | |
82 import optparse | |
83 import os | |
84 import re | |
85 import shutil | |
86 import struct | |
87 import subprocess | |
88 import sys | |
89 import tempfile | |
90 import time | |
91 import urllib2 | |
92 | |
93 import binary_size_utils | |
94 import elf_symbolizer | |
95 | |
96 from recipe_engine.types import freeze | |
97 | |
98 # Node dictionary keys. These are output in json read by the webapp so | |
99 # keep them short to save file size. | |
100 # Note: If these change, the webapp must also change. | |
101 NODE_TYPE_KEY = 'k' | |
102 NODE_NAME_KEY = 'n' | |
103 NODE_CHILDREN_KEY = 'children' | |
104 NODE_SYMBOL_TYPE_KEY = 't' | |
105 NODE_SYMBOL_SIZE_KEY = 'value' | |
106 NODE_MAX_DEPTH_KEY = 'maxDepth' | |
107 NODE_LAST_PATH_ELEMENT_KEY = 'lastPathElement' | |
108 | |
109 # The display name of the bucket where we put symbols without path. | |
110 NAME_NO_PATH_BUCKET = '(No Path)' | |
111 | |
112 # Try to keep data buckets smaller than this to avoid killing the | |
113 # graphing lib. | |
114 BIG_BUCKET_LIMIT = 3000 | |
115 | |
116 # Skia addition: relative dir for libskia.so from code base. | |
117 LIBSKIA_RELATIVE_PATH = os.path.join('out', 'Release', 'lib') | |
118 | |
119 # Skia addition: dictionary mapping symbol type code to symbol name. | |
120 # See | |
121 # https://code.google.com/p/chromium/codesearch#chromium/src/tools/binary_size/t
emplate/D3SymbolTreeMap.js&l=74 | |
122 SYMBOL_MAP = freeze({ | |
123 'A': 'global_absolute', | |
124 'B': 'global_uninitialized_data', | |
125 'b': 'local_uninitialized_data', | |
126 'C': 'global_uninitialized_common', | |
127 'D': 'global_initialized_data', | |
128 'd': 'local_initialized_data', | |
129 'G': 'global_small initialized_data', | |
130 'g': 'local_small_initialized_data', | |
131 'i': 'indirect_function', | |
132 'N': 'debugging', | |
133 'p': 'stack_unwind', | |
134 'R': 'global_read_only_data', | |
135 'r': 'local_read_only_data', | |
136 'S': 'global_small_uninitialized_data', | |
137 's': 'local_small_uninitialized_data', | |
138 'T': 'global_code', | |
139 't': 'local_code', | |
140 'U': 'undefined', | |
141 'u': 'unique', | |
142 'V': 'global_weak_object', | |
143 'v': 'local_weak_object', | |
144 'W': 'global_weak_symbol', | |
145 'w': 'local_weak_symbol', | |
146 '@': 'vtable_entry', | |
147 '-': 'stabs_debugging', | |
148 '?': 'unrecognized', | |
149 }) | |
150 | |
151 | |
152 def _MkChild(node, name): | |
153 child = node[NODE_CHILDREN_KEY].get(name) | |
154 if child is None: | |
155 child = {NODE_NAME_KEY: name, | |
156 NODE_CHILDREN_KEY: {}} | |
157 node[NODE_CHILDREN_KEY][name] = child | |
158 return child | |
159 | |
160 | |
161 def SplitNoPathBucket(node): | |
162 """NAME_NO_PATH_BUCKET can be too large for the graphing lib to | |
163 handle. Split it into sub-buckets in that case.""" | |
164 root_children = node[NODE_CHILDREN_KEY] | |
165 if NAME_NO_PATH_BUCKET in root_children: | |
166 no_path_bucket = root_children[NAME_NO_PATH_BUCKET] | |
167 old_children = no_path_bucket[NODE_CHILDREN_KEY] | |
168 count = 0 | |
169 for symbol_type, symbol_bucket in old_children.iteritems(): | |
170 count += len(symbol_bucket[NODE_CHILDREN_KEY]) | |
171 if count > BIG_BUCKET_LIMIT: | |
172 new_children = {} | |
173 no_path_bucket[NODE_CHILDREN_KEY] = new_children | |
174 current_bucket = None | |
175 index = 0 | |
176 for symbol_type, symbol_bucket in old_children.iteritems(): | |
177 for symbol_name, value in symbol_bucket[NODE_CHILDREN_KEY].iteritems(): | |
178 if index % BIG_BUCKET_LIMIT == 0: | |
179 group_no = (index / BIG_BUCKET_LIMIT) + 1 | |
180 current_bucket = _MkChild(no_path_bucket, | |
181 '%s subgroup %d' % (NAME_NO_PATH_BUCKET, | |
182 group_no)) | |
183 assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'p' | |
184 node[NODE_TYPE_KEY] = 'p' # p for path | |
185 index += 1 | |
186 symbol_size = value[NODE_SYMBOL_SIZE_KEY] | |
187 AddSymbolIntoFileNode(current_bucket, symbol_type, | |
188 symbol_name, symbol_size) | |
189 | |
190 | |
191 def MakeChildrenDictsIntoLists(node): | |
192 largest_list_len = 0 | |
193 if NODE_CHILDREN_KEY in node: | |
194 largest_list_len = len(node[NODE_CHILDREN_KEY]) | |
195 child_list = [] | |
196 for child in node[NODE_CHILDREN_KEY].itervalues(): | |
197 child_largest_list_len = MakeChildrenDictsIntoLists(child) | |
198 if child_largest_list_len > largest_list_len: | |
199 largest_list_len = child_largest_list_len | |
200 child_list.append(child) | |
201 node[NODE_CHILDREN_KEY] = child_list | |
202 | |
203 return largest_list_len | |
204 | |
205 | |
206 def AddSymbolIntoFileNode(node, symbol_type, symbol_name, symbol_size): | |
207 """Puts symbol into the file path node |node|. | |
208 Returns the number of added levels in tree. I.e. returns 2.""" | |
209 | |
210 # 'node' is the file node and first step is to find its symbol-type bucket. | |
211 node[NODE_LAST_PATH_ELEMENT_KEY] = True | |
212 node = _MkChild(node, symbol_type) | |
213 assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'b' | |
214 node[NODE_SYMBOL_TYPE_KEY] = symbol_type | |
215 node[NODE_TYPE_KEY] = 'b' # b for bucket | |
216 | |
217 # 'node' is now the symbol-type bucket. Make the child entry. | |
218 node = _MkChild(node, symbol_name) | |
219 if NODE_CHILDREN_KEY in node: | |
220 if node[NODE_CHILDREN_KEY]: | |
221 logging.warning('A container node used as symbol for %s.' % symbol_name) | |
222 # This is going to be used as a leaf so no use for child list. | |
223 del node[NODE_CHILDREN_KEY] | |
224 node[NODE_SYMBOL_SIZE_KEY] = symbol_size | |
225 node[NODE_SYMBOL_TYPE_KEY] = symbol_type | |
226 node[NODE_TYPE_KEY] = 's' # s for symbol | |
227 | |
228 return 2 # Depth of the added subtree. | |
229 | |
230 | |
231 def MakeCompactTree(symbols, symbol_path_origin_dir): | |
232 result = {NODE_NAME_KEY: '/', | |
233 NODE_CHILDREN_KEY: {}, | |
234 NODE_TYPE_KEY: 'p', | |
235 NODE_MAX_DEPTH_KEY: 0} | |
236 seen_symbol_with_path = False | |
237 for symbol_name, symbol_type, symbol_size, file_path in symbols: | |
238 | |
239 if 'vtable for ' in symbol_name: | |
240 symbol_type = '@' # hack to categorize these separately | |
241 if file_path and file_path != "??": | |
242 seen_symbol_with_path = True | |
243 else: | |
244 file_path = NAME_NO_PATH_BUCKET | |
245 | |
246 path_parts = file_path.split('/') | |
247 | |
248 # Find pre-existing node in tree, or update if it already exists | |
249 node = result | |
250 depth = 0 | |
251 while len(path_parts) > 0: | |
252 path_part = path_parts.pop(0) | |
253 if len(path_part) == 0: | |
254 continue | |
255 depth += 1 | |
256 node = _MkChild(node, path_part) | |
257 assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'p' | |
258 node[NODE_TYPE_KEY] = 'p' # p for path | |
259 | |
260 depth += AddSymbolIntoFileNode(node, symbol_type, symbol_name, symbol_size) | |
261 result[NODE_MAX_DEPTH_KEY] = max(result[NODE_MAX_DEPTH_KEY], depth) | |
262 | |
263 if not seen_symbol_with_path: | |
264 logging.warning('Symbols lack paths. Data will not be structured.') | |
265 | |
266 # The (no path) bucket can be extremely large if we failed to get | |
267 # path information. Split it into subgroups if needed. | |
268 SplitNoPathBucket(result) | |
269 | |
270 largest_list_len = MakeChildrenDictsIntoLists(result) | |
271 | |
272 if largest_list_len > BIG_BUCKET_LIMIT: | |
273 logging.warning('There are sections with %d nodes. ' | |
274 'Results might be unusable.' % largest_list_len) | |
275 return result | |
276 | |
277 | |
278 # Skia added: summarizes tree size by symbol type for the given root node. | |
279 # Returns a dict keyed by symbol type, and value the type's overall size. | |
280 # e.g., {"t": 12345, "W": 543}. | |
281 def GetTreeSizes(node): | |
282 if 'children' not in node or not node['children']: | |
283 return {node['t']: node['value']} | |
284 dic = {} | |
285 for i in node['children']: | |
286 for k, v in GetTreeSizes(i).items(): | |
287 dic.setdefault(k, 0) | |
288 dic[k] += v | |
289 | |
290 return dic | |
291 | |
292 | |
293 # Skia added: creates dict to be converted to JSON in bench format. | |
294 # See top of file for the structure description. | |
295 def GetBenchDict(githash, tree_root): | |
296 dic = {'gitHash': githash, | |
297 'key': {'source_type': 'binarysize'}, | |
298 'results': {},} | |
299 for i in tree_root['children']: | |
300 if '(No Path)' == i['n']: # Already at symbol summary level. | |
301 for k, v in GetTreeSizes(i).items(): | |
302 dic['results']['no_path_' + SYMBOL_MAP[k]] = { | |
303 'memory': { | |
304 'bytes': v, | |
305 'options': {'path': 'no_path', | |
306 'symbol': SYMBOL_MAP[k],},}} | |
307 else: # We need to go deeper. | |
308 for c in i['children']: | |
309 path = i['n'] + '_' + c['n'] | |
310 for k, v in GetTreeSizes(c).items(): | |
311 dic['results'][path + '_' + SYMBOL_MAP[k]] = { | |
312 'memory': { | |
313 'bytes': v, | |
314 'options': {'path': path, | |
315 'symbol': SYMBOL_MAP[k],}}} | |
316 | |
317 return dic | |
318 | |
319 | |
320 # Skia added: constructs 'gsutil cp' subprocess command list. | |
321 def GetGsCopyCommandList(gsutil, src, dst): | |
322 return [gsutil, '-h', 'Content-Type:application/json', 'cp', '-a', | |
323 'public-read', src, dst] | |
324 | |
325 | |
326 def DumpCompactTree(symbols, symbol_path_origin_dir, ha, ts, issue, gsutil): | |
327 tree_root = MakeCompactTree(symbols, symbol_path_origin_dir) | |
328 json_data = {'tree_data': tree_root, | |
329 'githash': ha, | |
330 'commit_ts': ts, | |
331 'key': {'source_type': 'binary_size'}, | |
332 'total_size': sum(GetTreeSizes(tree_root).values()),} | |
333 tmpfile = tempfile.NamedTemporaryFile(delete=False).name | |
334 with open(tmpfile, 'w') as out: | |
335 # Use separators without whitespace to get a smaller file. | |
336 json.dump(json_data, out, separators=(',', ':')) | |
337 | |
338 GS_PREFIX = 'gs://chromium-skia-gm/' | |
339 # Writes to Google Storage for visualization. | |
340 subprocess.check_call(GetGsCopyCommandList( | |
341 gsutil, tmpfile, GS_PREFIX + 'size/' + ha + '.json')) | |
342 # Updates the latest data. | |
343 if not issue: | |
344 subprocess.check_call(GetGsCopyCommandList(gsutil, tmpfile, | |
345 GS_PREFIX + 'size/latest.json')) | |
346 # Writes an extra copy using year/month/day/hour path for easy ingestion. | |
347 with open(tmpfile, 'w') as out: | |
348 json.dump(GetBenchDict(ha, tree_root), out, separators=(',', ':')) | |
349 now = datetime.datetime.utcnow() | |
350 ingest_path = '/'.join(('nano-json-v1', str(now.year).zfill(4), | |
351 str(now.month).zfill(2), str(now.day).zfill(2), | |
352 str(now.hour).zfill(2))) | |
353 if issue: | |
354 ingest_path = '/'.join('trybot', ingest_path, issue) | |
355 subprocess.check_call(GetGsCopyCommandList(gsutil, tmpfile, | |
356 GS_PREFIX + ingest_path + '/binarysize_' + ha + '.json')) | |
357 | |
358 | |
359 def MakeSourceMap(symbols): | |
360 sources = {} | |
361 for _sym, _symbol_type, size, path in symbols: | |
362 key = None | |
363 if path: | |
364 key = os.path.normpath(path) | |
365 else: | |
366 key = '[no path]' | |
367 if key not in sources: | |
368 sources[key] = {'path': path, 'symbol_count': 0, 'size': 0} | |
369 record = sources[key] | |
370 record['size'] += size | |
371 record['symbol_count'] += 1 | |
372 return sources | |
373 | |
374 | |
375 # Regex for parsing "nm" output. A sample line looks like this: | |
376 # 0167b39c 00000018 t ACCESS_DESCRIPTION_free /path/file.c:95 | |
377 # | |
378 # The fields are: address, size, type, name, source location | |
379 # Regular expression explained ( see also: https://xkcd.com/208 ): | |
380 # ([0-9a-f]{8,}+) The address | |
381 # [\s]+ Whitespace separator | |
382 # ([0-9a-f]{8,}+) The size. From here on out it's all optional. | |
383 # [\s]+ Whitespace separator | |
384 # (\S?) The symbol type, which is any non-whitespace char | |
385 # [\s*] Whitespace separator | |
386 # ([^\t]*) Symbol name, any non-tab character (spaces ok!) | |
387 # [\t]? Tab separator | |
388 # (.*) The location (filename[:linennum|?][ (discriminator n)] | |
389 sNmPattern = re.compile( | |
390 r'([0-9a-f]{8,})[\s]+([0-9a-f]{8,})[\s]*(\S?)[\s*]([^\t]*)[\t]?(.*)') | |
391 | |
392 class Progress(): | |
393 def __init__(self): | |
394 self.count = 0 | |
395 self.skip_count = 0 | |
396 self.collisions = 0 | |
397 self.time_last_output = time.time() | |
398 self.count_last_output = 0 | |
399 self.disambiguations = 0 | |
400 self.was_ambiguous = 0 | |
401 | |
402 | |
403 def RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary, jobs, | |
404 disambiguate, src_path): | |
405 nm_output = RunNm(library, nm_binary) | |
406 nm_output_lines = nm_output.splitlines() | |
407 nm_output_lines_len = len(nm_output_lines) | |
408 address_symbol = {} | |
409 progress = Progress() | |
410 def map_address_symbol(symbol, addr): | |
411 progress.count += 1 | |
412 if addr in address_symbol: | |
413 # 'Collision between %s and %s.' % (str(symbol.name), | |
414 # str(address_symbol[addr].name)) | |
415 progress.collisions += 1 | |
416 else: | |
417 if symbol.disambiguated: | |
418 progress.disambiguations += 1 | |
419 if symbol.was_ambiguous: | |
420 progress.was_ambiguous += 1 | |
421 | |
422 address_symbol[addr] = symbol | |
423 | |
424 progress_output() | |
425 | |
426 def progress_output(): | |
427 progress_chunk = 100 | |
428 if progress.count % progress_chunk == 0: | |
429 time_now = time.time() | |
430 time_spent = time_now - progress.time_last_output | |
431 if time_spent > 1.0: | |
432 # Only output at most once per second. | |
433 progress.time_last_output = time_now | |
434 chunk_size = progress.count - progress.count_last_output | |
435 progress.count_last_output = progress.count | |
436 if time_spent > 0: | |
437 speed = chunk_size / time_spent | |
438 else: | |
439 speed = 0 | |
440 progress_percent = (100.0 * (progress.count + progress.skip_count) / | |
441 nm_output_lines_len) | |
442 disambiguation_percent = 0 | |
443 if progress.disambiguations != 0: | |
444 disambiguation_percent = (100.0 * progress.disambiguations / | |
445 progress.was_ambiguous) | |
446 | |
447 sys.stdout.write('\r%.1f%%: Looked up %d symbols (%d collisions, ' | |
448 '%d disambiguations where %.1f%% succeeded)' | |
449 ' - %.1f lookups/s.' % | |
450 (progress_percent, progress.count, progress.collisions, | |
451 progress.disambiguations, disambiguation_percent, speed)) | |
452 | |
453 # In case disambiguation was disabled, we remove the source path (which upon | |
454 # being set signals the symbolizer to enable disambiguation) | |
455 if not disambiguate: | |
456 src_path = None | |
457 symbol_path_origin_dir = os.path.dirname(library) | |
458 # Skia specific. | |
459 symbol_path_prefix = symbol_path_origin_dir.replace(LIBSKIA_RELATIVE_PATH, '') | |
460 symbolizer = elf_symbolizer.ELFSymbolizer(library, addr2line_binary, | |
461 map_address_symbol, | |
462 max_concurrent_jobs=jobs, | |
463 source_root_path=src_path, | |
464 prefix_to_remove=symbol_path_prefix) | |
465 user_interrupted = False | |
466 try: | |
467 for line in nm_output_lines: | |
468 match = sNmPattern.match(line) | |
469 if match: | |
470 location = match.group(5) | |
471 if not location: | |
472 addr = int(match.group(1), 16) | |
473 size = int(match.group(2), 16) | |
474 if addr in address_symbol: # Already looked up, shortcut | |
475 # ELFSymbolizer. | |
476 map_address_symbol(address_symbol[addr], addr) | |
477 continue | |
478 elif size == 0: | |
479 # Save time by not looking up empty symbols (do they even exist?) | |
480 print('Empty symbol: ' + line) | |
481 else: | |
482 symbolizer.SymbolizeAsync(addr, addr) | |
483 continue | |
484 | |
485 progress.skip_count += 1 | |
486 except KeyboardInterrupt: | |
487 user_interrupted = True | |
488 print('Interrupting - killing subprocesses. Please wait.') | |
489 | |
490 try: | |
491 symbolizer.Join() | |
492 except KeyboardInterrupt: | |
493 # Don't want to abort here since we will be finished in a few seconds. | |
494 user_interrupted = True | |
495 print('Patience you must have my young padawan.') | |
496 | |
497 print '' | |
498 | |
499 if user_interrupted: | |
500 print('Skipping the rest of the file mapping. ' | |
501 'Output will not be fully classified.') | |
502 | |
503 symbol_path_origin_dir = os.path.dirname(library) | |
504 # Skia specific: path prefix to strip. | |
505 symbol_path_prefix = symbol_path_origin_dir.replace(LIBSKIA_RELATIVE_PATH, '') | |
506 | |
507 with open(outfile, 'w') as out: | |
508 for line in nm_output_lines: | |
509 match = sNmPattern.match(line) | |
510 if match: | |
511 location = match.group(5) | |
512 if not location: | |
513 addr = int(match.group(1), 16) | |
514 symbol = address_symbol.get(addr) | |
515 if symbol is not None: | |
516 path = '??' | |
517 if symbol.source_path is not None: | |
518 path = symbol.source_path.replace(symbol_path_prefix, '') | |
519 line_number = 0 | |
520 if symbol.source_line is not None: | |
521 line_number = symbol.source_line | |
522 out.write('%s\t%s:%d\n' % (line, path, line_number)) | |
523 continue | |
524 | |
525 out.write('%s\n' % line) | |
526 | |
527 print('%d symbols in the results.' % len(address_symbol)) | |
528 | |
529 | |
530 def RunNm(binary, nm_binary): | |
531 cmd = [nm_binary, '-C', '--print-size', '--size-sort', '--reverse-sort', | |
532 binary] | |
533 nm_process = subprocess.Popen(cmd, | |
534 stdout=subprocess.PIPE, | |
535 stderr=subprocess.PIPE) | |
536 (process_output, err_output) = nm_process.communicate() | |
537 | |
538 if nm_process.returncode != 0: | |
539 if err_output: | |
540 raise Exception, err_output | |
541 else: | |
542 raise Exception, process_output | |
543 | |
544 return process_output | |
545 | |
546 | |
547 def GetNmSymbols(nm_infile, outfile, library, jobs, verbose, | |
548 addr2line_binary, nm_binary, disambiguate, src_path): | |
549 if nm_infile is None: | |
550 if outfile is None: | |
551 outfile = tempfile.NamedTemporaryFile(delete=False).name | |
552 | |
553 if verbose: | |
554 print 'Running parallel addr2line, dumping symbols to ' + outfile | |
555 RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary, jobs, | |
556 disambiguate, src_path) | |
557 | |
558 nm_infile = outfile | |
559 | |
560 elif verbose: | |
561 print 'Using nm input from ' + nm_infile | |
562 with file(nm_infile, 'r') as infile: | |
563 return list(binary_size_utils.ParseNm(infile)) | |
564 | |
565 | |
566 PAK_RESOURCE_ID_TO_STRING = { "inited": False } | |
567 | |
568 def LoadPakIdsFromResourceFile(filename): | |
569 """Given a file name, it loads everything that looks like a resource id | |
570 into PAK_RESOURCE_ID_TO_STRING.""" | |
571 with open(filename) as resource_header: | |
572 for line in resource_header: | |
573 if line.startswith("#define "): | |
574 line_data = line.split() | |
575 if len(line_data) == 3: | |
576 try: | |
577 resource_number = int(line_data[2]) | |
578 resource_name = line_data[1] | |
579 PAK_RESOURCE_ID_TO_STRING[resource_number] = resource_name | |
580 except ValueError: | |
581 pass | |
582 | |
583 def GetReadablePakResourceName(pak_file, resource_id): | |
584 """Pak resources have a numeric identifier. It is not helpful when | |
585 trying to locate where footprint is generated. This does its best to | |
586 map the number to a usable string.""" | |
587 if not PAK_RESOURCE_ID_TO_STRING['inited']: | |
588 # Try to find resource header files generated by grit when | |
589 # building the pak file. We'll look for files named *resources.h" | |
590 # and lines of the type: | |
591 # #define MY_RESOURCE_JS 1234 | |
592 PAK_RESOURCE_ID_TO_STRING['inited'] = True | |
593 gen_dir = os.path.join(os.path.dirname(pak_file), 'gen') | |
594 if os.path.isdir(gen_dir): | |
595 for dirname, _dirs, files in os.walk(gen_dir): | |
596 for filename in files: | |
597 if filename.endswith('resources.h'): | |
598 LoadPakIdsFromResourceFile(os.path.join(dirname, filename)) | |
599 return PAK_RESOURCE_ID_TO_STRING.get(resource_id, | |
600 'Pak Resource %d' % resource_id) | |
601 | |
602 def AddPakData(symbols, pak_file): | |
603 """Adds pseudo-symbols from a pak file.""" | |
604 pak_file = os.path.abspath(pak_file) | |
605 with open(pak_file, 'rb') as pak: | |
606 data = pak.read() | |
607 | |
608 PAK_FILE_VERSION = 4 | |
609 HEADER_LENGTH = 2 * 4 + 1 # Two uint32s. (file version, number of entries) | |
610 # and one uint8 (encoding of text resources) | |
611 INDEX_ENTRY_SIZE = 2 + 4 # Each entry is a uint16 and a uint32. | |
612 version, num_entries, _encoding = struct.unpack('<IIB', data[:HEADER_LENGTH]) | |
613 assert version == PAK_FILE_VERSION, ('Unsupported pak file ' | |
614 'version (%d) in %s. Only ' | |
615 'support version %d' % | |
616 (version, pak_file, PAK_FILE_VERSION)) | |
617 if num_entries > 0: | |
618 # Read the index and data. | |
619 data = data[HEADER_LENGTH:] | |
620 for _ in range(num_entries): | |
621 resource_id, offset = struct.unpack('<HI', data[:INDEX_ENTRY_SIZE]) | |
622 data = data[INDEX_ENTRY_SIZE:] | |
623 _next_id, next_offset = struct.unpack('<HI', data[:INDEX_ENTRY_SIZE]) | |
624 resource_size = next_offset - offset | |
625 | |
626 symbol_name = GetReadablePakResourceName(pak_file, resource_id) | |
627 symbol_path = pak_file | |
628 symbol_type = 'd' # Data. Approximation. | |
629 symbol_size = resource_size | |
630 symbols.append((symbol_name, symbol_type, symbol_size, symbol_path)) | |
631 | |
632 def _find_in_system_path(binary): | |
633 """Locate the full path to binary in the system path or return None | |
634 if not found.""" | |
635 system_path = os.environ["PATH"].split(os.pathsep) | |
636 for path in system_path: | |
637 binary_path = os.path.join(path, binary) | |
638 if os.path.isfile(binary_path): | |
639 return binary_path | |
640 return None | |
641 | |
642 def CheckDebugFormatSupport(library, addr2line_binary): | |
643 """Kills the program if debug data is in an unsupported format. | |
644 | |
645 There are two common versions of the DWARF debug formats and | |
646 since we are right now transitioning from DWARF2 to newer formats, | |
647 it's possible to have a mix of tools that are not compatible. Detect | |
648 that and abort rather than produce meaningless output.""" | |
649 tool_output = subprocess.check_output([addr2line_binary, '--version']) | |
650 version_re = re.compile(r'^GNU [^ ]+ .* (\d+).(\d+).*?$', re.M) | |
651 parsed_output = version_re.match(tool_output) | |
652 major = int(parsed_output.group(1)) | |
653 minor = int(parsed_output.group(2)) | |
654 supports_dwarf4 = major > 2 or major == 2 and minor > 22 | |
655 | |
656 if supports_dwarf4: | |
657 return | |
658 | |
659 print('Checking version of debug information in %s.' % library) | |
660 debug_info = subprocess.check_output(['readelf', '--debug-dump=info', | |
661 '--dwarf-depth=1', library]) | |
662 dwarf_version_re = re.compile(r'^\s+Version:\s+(\d+)$', re.M) | |
663 parsed_dwarf_format_output = dwarf_version_re.search(debug_info) | |
664 version = int(parsed_dwarf_format_output.group(1)) | |
665 if version > 2: | |
666 print('The supplied tools only support DWARF2 debug data but the binary\n' + | |
667 'uses DWARF%d. Update the tools or compile the binary\n' % version + | |
668 'with -gdwarf-2.') | |
669 sys.exit(1) | |
670 | |
671 | |
672 def main(): | |
673 usage = """%prog [options] | |
674 | |
675 Runs a spatial analysis on a given library, looking up the source locations | |
676 of its symbols and calculating how much space each directory, source file, | |
677 and so on is taking. The result is a report that can be used to pinpoint | |
678 sources of large portions of the binary, etceteras. | |
679 | |
680 Under normal circumstances, you only need to pass two arguments, thusly: | |
681 | |
682 %prog --library /path/to/library --destdir /path/to/output | |
683 | |
684 In this mode, the program will dump the symbols from the specified library | |
685 and map those symbols back to source locations, producing a web-based | |
686 report in the specified output directory. | |
687 | |
688 Other options are available via '--help'. | |
689 """ | |
690 parser = optparse.OptionParser(usage=usage) | |
691 parser.add_option('--nm-in', metavar='PATH', | |
692 help='if specified, use nm input from <path> instead of ' | |
693 'generating it. Note that source locations should be ' | |
694 'present in the file; i.e., no addr2line symbol lookups ' | |
695 'will be performed when this option is specified. ' | |
696 'Mutually exclusive with --library.') | |
697 parser.add_option('--destdir', metavar='PATH', | |
698 help='write output to the specified directory. An HTML ' | |
699 'report is generated here along with supporting files; ' | |
700 'any existing report will be overwritten. Not used in ' | |
701 'Skia.') | |
702 parser.add_option('--library', metavar='PATH', | |
703 help='if specified, process symbols in the library at ' | |
704 'the specified path. Mutually exclusive with --nm-in.') | |
705 parser.add_option('--pak', metavar='PATH', | |
706 help='if specified, includes the contents of the ' | |
707 'specified *.pak file in the output.') | |
708 parser.add_option('--nm-binary', | |
709 help='use the specified nm binary to analyze library. ' | |
710 'This is to be used when the nm in the path is not for ' | |
711 'the right architecture or of the right version.') | |
712 parser.add_option('--addr2line-binary', | |
713 help='use the specified addr2line binary to analyze ' | |
714 'library. This is to be used when the addr2line in ' | |
715 'the path is not for the right architecture or ' | |
716 'of the right version.') | |
717 parser.add_option('--jobs', type='int', | |
718 help='number of jobs to use for the parallel ' | |
719 'addr2line processing pool; defaults to 1. More ' | |
720 'jobs greatly improve throughput but eat RAM like ' | |
721 'popcorn, and take several gigabytes each. Start low ' | |
722 'and ramp this number up until your machine begins to ' | |
723 'struggle with RAM. ' | |
724 'This argument is only valid when using --library.') | |
725 parser.add_option('-v', dest='verbose', action='store_true', | |
726 help='be verbose, printing lots of status information.') | |
727 parser.add_option('--nm-out', metavar='PATH', | |
728 help='keep the nm output file, and store it at the ' | |
729 'specified path. This is useful if you want to see the ' | |
730 'fully processed nm output after the symbols have been ' | |
731 'mapped to source locations. By default, a tempfile is ' | |
732 'used and is deleted when the program terminates.' | |
733 'This argument is only valid when using --library.') | |
734 parser.add_option('--legacy', action='store_true', | |
735 help='emit legacy binary size report instead of modern') | |
736 parser.add_option('--disable-disambiguation', action='store_true', | |
737 help='disables the disambiguation process altogether,' | |
738 ' NOTE: this may, depending on your toolchain, produce' | |
739 ' output with some symbols at the top layer if addr2line' | |
740 ' could not get the entire source path.') | |
741 parser.add_option('--source-path', default='./', | |
742 help='the path to the source code of the output binary, ' | |
743 'default set to current directory. Used in the' | |
744 ' disambiguation process.') | |
745 parser.add_option('--githash', default='latest', | |
746 help='Git hash for the binary version. Added by Skia.') | |
747 parser.add_option('--commit_ts', type='int', default=-1, | |
748 help='Timestamp for the commit. Added by Skia.') | |
749 parser.add_option('--issue_number', default='', | |
750 help='The trybot issue number in string. Added by Skia.') | |
751 parser.add_option('--gsutil_path', default='gsutil', | |
752 help='Path to gsutil binary. Added by Skia.') | |
753 opts, _args = parser.parse_args() | |
754 | |
755 if ((not opts.library) and (not opts.nm_in)) or (opts.library and opts.nm_in): | |
756 parser.error('exactly one of --library or --nm-in is required') | |
757 if (opts.nm_in): | |
758 if opts.jobs: | |
759 print >> sys.stderr, ('WARNING: --jobs has no effect ' | |
760 'when used with --nm-in') | |
761 if not opts.jobs: | |
762 # Use the number of processors but cap between 2 and 4 since raw | |
763 # CPU power isn't the limiting factor. It's I/O limited, memory | |
764 # bus limited and available-memory-limited. Too many processes and | |
765 # the computer will run out of memory and it will be slow. | |
766 opts.jobs = max(2, min(4, str(multiprocessing.cpu_count()))) | |
767 | |
768 if opts.addr2line_binary: | |
769 assert os.path.isfile(opts.addr2line_binary) | |
770 addr2line_binary = opts.addr2line_binary | |
771 else: | |
772 addr2line_binary = _find_in_system_path('addr2line') | |
773 assert addr2line_binary, 'Unable to find addr2line in the path. '\ | |
774 'Use --addr2line-binary to specify location.' | |
775 | |
776 if opts.nm_binary: | |
777 assert os.path.isfile(opts.nm_binary) | |
778 nm_binary = opts.nm_binary | |
779 else: | |
780 nm_binary = _find_in_system_path('nm') | |
781 assert nm_binary, 'Unable to find nm in the path. Use --nm-binary '\ | |
782 'to specify location.' | |
783 | |
784 if opts.pak: | |
785 assert os.path.isfile(opts.pak), 'Could not find ' % opts.pak | |
786 | |
787 print('addr2line: %s' % addr2line_binary) | |
788 print('nm: %s' % nm_binary) | |
789 | |
790 if opts.library: | |
791 CheckDebugFormatSupport(opts.library, addr2line_binary) | |
792 | |
793 symbols = GetNmSymbols(opts.nm_in, opts.nm_out, opts.library, | |
794 opts.jobs, opts.verbose is True, | |
795 addr2line_binary, nm_binary, | |
796 opts.disable_disambiguation is None, | |
797 opts.source_path) | |
798 | |
799 if opts.pak: | |
800 AddPakData(symbols, opts.pak) | |
801 | |
802 if opts.legacy: # legacy report | |
803 print 'Do Not set legacy flag.' | |
804 | |
805 else: # modern report | |
806 if opts.library: | |
807 symbol_path_origin_dir = os.path.dirname(os.path.abspath(opts.library)) | |
808 else: | |
809 # Just a guess. Hopefully all paths in the input file are absolute. | |
810 symbol_path_origin_dir = os.path.abspath(os.getcwd()) | |
811 DumpCompactTree(symbols, symbol_path_origin_dir, opts.githash, | |
812 opts.commit_ts, opts.issue_number, opts.gsutil_path) | |
813 print 'Report data uploaded to GS.' | |
814 | |
815 | |
816 if __name__ == '__main__': | |
817 sys.exit(main()) | |
OLD | NEW |