Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(242)

Side by Side Diff: infra/bots/recipe_modules/skia/resources/run_binary_size_analysis.py

Issue 2198173002: Re-organize Skia recipes (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Fix missing dependency Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 # Copyright 2014 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 """Generate a spatial analysis against an arbitrary library.
7
8 Adapted for Skia's use case from
9 chromium/src/tools/binary_size/run_binary_size_analysis.py. Main changes:
10
11 -- Cleans up some deprecated codes.
12 -- Always use relative code path so the tree root is Skia repo's root.
13 -- Instead of outputting the standalone HTML/CSS/JS filesets, writes the
14 TreeMap JSON data into a Google Storage bucket.
15 -- Adds githash and total_size to the JSON data.
16 -- Outputs another summary data in JSON Bench format for skiaperf ingestion.
17
18 The output JSON data for visualization is in the following format:
19
20 {
21 "githash": 123abc,
22 "commit_ts": 1234567890,
23 "total_size": 1234567,
24 "key": {"source_type": "binary_size"},
25 "tree_data": {
26 "maxDepth": 9,
27 "k": "p", "children":[
28 {"k":"p","children":[
29 {"k":"p","children":[
30 {"k":"p","lastPathElement":true,"children":[
31 {"k":"b","t":"t","children":[
32 {"k":"s", "t":"t", "value":4029,
33 "n":"etc_encode_subblock_helper(unsigned char const*, ...)"
34 },
35 ......
36 }
37 }
38
39 Another JSON file is generated for size summaries to be used in skiaperf. The
40 JSON format details can be found at:
41 https://github.com/google/skia/blob/master/bench/ResultsWriter.h#L54
42 and:
43 https://skia.googlesource.com/buildbot/+/master/perf/go/ingester/nanobench.go
44
45 In the binary size case, outputs look like:
46
47 {
48 "gitHash": "123abc",
49 "key": {
50 "source_type": "binarysize"
51 }
52 "results: {
53 "src_lazy_global_weak_symbol": {
54 "memory": {
55 "bytes": 41,
56 "options": {
57 "path": "src_lazy",
58 "symbol": "global_weak_symbol"
59 }
60 }
61 },
62 "src_lazy_global_read_only_data": {
63 "memory": {
64 "bytes": 13476,
65 "options": {
66 "path": "src_lazy",
67 "symbol": "global_read_only_data"
68 }
69 }
70 },
71 ...
72 }
73 }
74
75 """
76
77 import collections
78 import datetime
79 import json
80 import logging
81 import multiprocessing
82 import optparse
83 import os
84 import re
85 import shutil
86 import struct
87 import subprocess
88 import sys
89 import tempfile
90 import time
91 import urllib2
92
93 import binary_size_utils
94 import elf_symbolizer
95
96 from recipe_engine.types import freeze
97
98 # Node dictionary keys. These are output in json read by the webapp so
99 # keep them short to save file size.
100 # Note: If these change, the webapp must also change.
101 NODE_TYPE_KEY = 'k'
102 NODE_NAME_KEY = 'n'
103 NODE_CHILDREN_KEY = 'children'
104 NODE_SYMBOL_TYPE_KEY = 't'
105 NODE_SYMBOL_SIZE_KEY = 'value'
106 NODE_MAX_DEPTH_KEY = 'maxDepth'
107 NODE_LAST_PATH_ELEMENT_KEY = 'lastPathElement'
108
109 # The display name of the bucket where we put symbols without path.
110 NAME_NO_PATH_BUCKET = '(No Path)'
111
112 # Try to keep data buckets smaller than this to avoid killing the
113 # graphing lib.
114 BIG_BUCKET_LIMIT = 3000
115
116 # Skia addition: relative dir for libskia.so from code base.
117 LIBSKIA_RELATIVE_PATH = os.path.join('out', 'Release', 'lib')
118
119 # Skia addition: dictionary mapping symbol type code to symbol name.
120 # See
121 # https://code.google.com/p/chromium/codesearch#chromium/src/tools/binary_size/t emplate/D3SymbolTreeMap.js&l=74
122 SYMBOL_MAP = freeze({
123 'A': 'global_absolute',
124 'B': 'global_uninitialized_data',
125 'b': 'local_uninitialized_data',
126 'C': 'global_uninitialized_common',
127 'D': 'global_initialized_data',
128 'd': 'local_initialized_data',
129 'G': 'global_small initialized_data',
130 'g': 'local_small_initialized_data',
131 'i': 'indirect_function',
132 'N': 'debugging',
133 'p': 'stack_unwind',
134 'R': 'global_read_only_data',
135 'r': 'local_read_only_data',
136 'S': 'global_small_uninitialized_data',
137 's': 'local_small_uninitialized_data',
138 'T': 'global_code',
139 't': 'local_code',
140 'U': 'undefined',
141 'u': 'unique',
142 'V': 'global_weak_object',
143 'v': 'local_weak_object',
144 'W': 'global_weak_symbol',
145 'w': 'local_weak_symbol',
146 '@': 'vtable_entry',
147 '-': 'stabs_debugging',
148 '?': 'unrecognized',
149 })
150
151
152 def _MkChild(node, name):
153 child = node[NODE_CHILDREN_KEY].get(name)
154 if child is None:
155 child = {NODE_NAME_KEY: name,
156 NODE_CHILDREN_KEY: {}}
157 node[NODE_CHILDREN_KEY][name] = child
158 return child
159
160
161 def SplitNoPathBucket(node):
162 """NAME_NO_PATH_BUCKET can be too large for the graphing lib to
163 handle. Split it into sub-buckets in that case."""
164 root_children = node[NODE_CHILDREN_KEY]
165 if NAME_NO_PATH_BUCKET in root_children:
166 no_path_bucket = root_children[NAME_NO_PATH_BUCKET]
167 old_children = no_path_bucket[NODE_CHILDREN_KEY]
168 count = 0
169 for symbol_type, symbol_bucket in old_children.iteritems():
170 count += len(symbol_bucket[NODE_CHILDREN_KEY])
171 if count > BIG_BUCKET_LIMIT:
172 new_children = {}
173 no_path_bucket[NODE_CHILDREN_KEY] = new_children
174 current_bucket = None
175 index = 0
176 for symbol_type, symbol_bucket in old_children.iteritems():
177 for symbol_name, value in symbol_bucket[NODE_CHILDREN_KEY].iteritems():
178 if index % BIG_BUCKET_LIMIT == 0:
179 group_no = (index / BIG_BUCKET_LIMIT) + 1
180 current_bucket = _MkChild(no_path_bucket,
181 '%s subgroup %d' % (NAME_NO_PATH_BUCKET,
182 group_no))
183 assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'p'
184 node[NODE_TYPE_KEY] = 'p' # p for path
185 index += 1
186 symbol_size = value[NODE_SYMBOL_SIZE_KEY]
187 AddSymbolIntoFileNode(current_bucket, symbol_type,
188 symbol_name, symbol_size)
189
190
191 def MakeChildrenDictsIntoLists(node):
192 largest_list_len = 0
193 if NODE_CHILDREN_KEY in node:
194 largest_list_len = len(node[NODE_CHILDREN_KEY])
195 child_list = []
196 for child in node[NODE_CHILDREN_KEY].itervalues():
197 child_largest_list_len = MakeChildrenDictsIntoLists(child)
198 if child_largest_list_len > largest_list_len:
199 largest_list_len = child_largest_list_len
200 child_list.append(child)
201 node[NODE_CHILDREN_KEY] = child_list
202
203 return largest_list_len
204
205
206 def AddSymbolIntoFileNode(node, symbol_type, symbol_name, symbol_size):
207 """Puts symbol into the file path node |node|.
208 Returns the number of added levels in tree. I.e. returns 2."""
209
210 # 'node' is the file node and first step is to find its symbol-type bucket.
211 node[NODE_LAST_PATH_ELEMENT_KEY] = True
212 node = _MkChild(node, symbol_type)
213 assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'b'
214 node[NODE_SYMBOL_TYPE_KEY] = symbol_type
215 node[NODE_TYPE_KEY] = 'b' # b for bucket
216
217 # 'node' is now the symbol-type bucket. Make the child entry.
218 node = _MkChild(node, symbol_name)
219 if NODE_CHILDREN_KEY in node:
220 if node[NODE_CHILDREN_KEY]:
221 logging.warning('A container node used as symbol for %s.' % symbol_name)
222 # This is going to be used as a leaf so no use for child list.
223 del node[NODE_CHILDREN_KEY]
224 node[NODE_SYMBOL_SIZE_KEY] = symbol_size
225 node[NODE_SYMBOL_TYPE_KEY] = symbol_type
226 node[NODE_TYPE_KEY] = 's' # s for symbol
227
228 return 2 # Depth of the added subtree.
229
230
231 def MakeCompactTree(symbols, symbol_path_origin_dir):
232 result = {NODE_NAME_KEY: '/',
233 NODE_CHILDREN_KEY: {},
234 NODE_TYPE_KEY: 'p',
235 NODE_MAX_DEPTH_KEY: 0}
236 seen_symbol_with_path = False
237 for symbol_name, symbol_type, symbol_size, file_path in symbols:
238
239 if 'vtable for ' in symbol_name:
240 symbol_type = '@' # hack to categorize these separately
241 if file_path and file_path != "??":
242 seen_symbol_with_path = True
243 else:
244 file_path = NAME_NO_PATH_BUCKET
245
246 path_parts = file_path.split('/')
247
248 # Find pre-existing node in tree, or update if it already exists
249 node = result
250 depth = 0
251 while len(path_parts) > 0:
252 path_part = path_parts.pop(0)
253 if len(path_part) == 0:
254 continue
255 depth += 1
256 node = _MkChild(node, path_part)
257 assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'p'
258 node[NODE_TYPE_KEY] = 'p' # p for path
259
260 depth += AddSymbolIntoFileNode(node, symbol_type, symbol_name, symbol_size)
261 result[NODE_MAX_DEPTH_KEY] = max(result[NODE_MAX_DEPTH_KEY], depth)
262
263 if not seen_symbol_with_path:
264 logging.warning('Symbols lack paths. Data will not be structured.')
265
266 # The (no path) bucket can be extremely large if we failed to get
267 # path information. Split it into subgroups if needed.
268 SplitNoPathBucket(result)
269
270 largest_list_len = MakeChildrenDictsIntoLists(result)
271
272 if largest_list_len > BIG_BUCKET_LIMIT:
273 logging.warning('There are sections with %d nodes. '
274 'Results might be unusable.' % largest_list_len)
275 return result
276
277
278 # Skia added: summarizes tree size by symbol type for the given root node.
279 # Returns a dict keyed by symbol type, and value the type's overall size.
280 # e.g., {"t": 12345, "W": 543}.
281 def GetTreeSizes(node):
282 if 'children' not in node or not node['children']:
283 return {node['t']: node['value']}
284 dic = {}
285 for i in node['children']:
286 for k, v in GetTreeSizes(i).items():
287 dic.setdefault(k, 0)
288 dic[k] += v
289
290 return dic
291
292
293 # Skia added: creates dict to be converted to JSON in bench format.
294 # See top of file for the structure description.
295 def GetBenchDict(githash, tree_root):
296 dic = {'gitHash': githash,
297 'key': {'source_type': 'binarysize'},
298 'results': {},}
299 for i in tree_root['children']:
300 if '(No Path)' == i['n']: # Already at symbol summary level.
301 for k, v in GetTreeSizes(i).items():
302 dic['results']['no_path_' + SYMBOL_MAP[k]] = {
303 'memory': {
304 'bytes': v,
305 'options': {'path': 'no_path',
306 'symbol': SYMBOL_MAP[k],},}}
307 else: # We need to go deeper.
308 for c in i['children']:
309 path = i['n'] + '_' + c['n']
310 for k, v in GetTreeSizes(c).items():
311 dic['results'][path + '_' + SYMBOL_MAP[k]] = {
312 'memory': {
313 'bytes': v,
314 'options': {'path': path,
315 'symbol': SYMBOL_MAP[k],}}}
316
317 return dic
318
319
320 # Skia added: constructs 'gsutil cp' subprocess command list.
321 def GetGsCopyCommandList(gsutil, src, dst):
322 return [gsutil, '-h', 'Content-Type:application/json', 'cp', '-a',
323 'public-read', src, dst]
324
325
326 def DumpCompactTree(symbols, symbol_path_origin_dir, ha, ts, issue, gsutil):
327 tree_root = MakeCompactTree(symbols, symbol_path_origin_dir)
328 json_data = {'tree_data': tree_root,
329 'githash': ha,
330 'commit_ts': ts,
331 'key': {'source_type': 'binary_size'},
332 'total_size': sum(GetTreeSizes(tree_root).values()),}
333 tmpfile = tempfile.NamedTemporaryFile(delete=False).name
334 with open(tmpfile, 'w') as out:
335 # Use separators without whitespace to get a smaller file.
336 json.dump(json_data, out, separators=(',', ':'))
337
338 GS_PREFIX = 'gs://chromium-skia-gm/'
339 # Writes to Google Storage for visualization.
340 subprocess.check_call(GetGsCopyCommandList(
341 gsutil, tmpfile, GS_PREFIX + 'size/' + ha + '.json'))
342 # Updates the latest data.
343 if not issue:
344 subprocess.check_call(GetGsCopyCommandList(gsutil, tmpfile,
345 GS_PREFIX + 'size/latest.json'))
346 # Writes an extra copy using year/month/day/hour path for easy ingestion.
347 with open(tmpfile, 'w') as out:
348 json.dump(GetBenchDict(ha, tree_root), out, separators=(',', ':'))
349 now = datetime.datetime.utcnow()
350 ingest_path = '/'.join(('nano-json-v1', str(now.year).zfill(4),
351 str(now.month).zfill(2), str(now.day).zfill(2),
352 str(now.hour).zfill(2)))
353 if issue:
354 ingest_path = '/'.join('trybot', ingest_path, issue)
355 subprocess.check_call(GetGsCopyCommandList(gsutil, tmpfile,
356 GS_PREFIX + ingest_path + '/binarysize_' + ha + '.json'))
357
358
359 def MakeSourceMap(symbols):
360 sources = {}
361 for _sym, _symbol_type, size, path in symbols:
362 key = None
363 if path:
364 key = os.path.normpath(path)
365 else:
366 key = '[no path]'
367 if key not in sources:
368 sources[key] = {'path': path, 'symbol_count': 0, 'size': 0}
369 record = sources[key]
370 record['size'] += size
371 record['symbol_count'] += 1
372 return sources
373
374
375 # Regex for parsing "nm" output. A sample line looks like this:
376 # 0167b39c 00000018 t ACCESS_DESCRIPTION_free /path/file.c:95
377 #
378 # The fields are: address, size, type, name, source location
379 # Regular expression explained ( see also: https://xkcd.com/208 ):
380 # ([0-9a-f]{8,}+) The address
381 # [\s]+ Whitespace separator
382 # ([0-9a-f]{8,}+) The size. From here on out it's all optional.
383 # [\s]+ Whitespace separator
384 # (\S?) The symbol type, which is any non-whitespace char
385 # [\s*] Whitespace separator
386 # ([^\t]*) Symbol name, any non-tab character (spaces ok!)
387 # [\t]? Tab separator
388 # (.*) The location (filename[:linennum|?][ (discriminator n)]
389 sNmPattern = re.compile(
390 r'([0-9a-f]{8,})[\s]+([0-9a-f]{8,})[\s]*(\S?)[\s*]([^\t]*)[\t]?(.*)')
391
392 class Progress():
393 def __init__(self):
394 self.count = 0
395 self.skip_count = 0
396 self.collisions = 0
397 self.time_last_output = time.time()
398 self.count_last_output = 0
399 self.disambiguations = 0
400 self.was_ambiguous = 0
401
402
403 def RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary, jobs,
404 disambiguate, src_path):
405 nm_output = RunNm(library, nm_binary)
406 nm_output_lines = nm_output.splitlines()
407 nm_output_lines_len = len(nm_output_lines)
408 address_symbol = {}
409 progress = Progress()
410 def map_address_symbol(symbol, addr):
411 progress.count += 1
412 if addr in address_symbol:
413 # 'Collision between %s and %s.' % (str(symbol.name),
414 # str(address_symbol[addr].name))
415 progress.collisions += 1
416 else:
417 if symbol.disambiguated:
418 progress.disambiguations += 1
419 if symbol.was_ambiguous:
420 progress.was_ambiguous += 1
421
422 address_symbol[addr] = symbol
423
424 progress_output()
425
426 def progress_output():
427 progress_chunk = 100
428 if progress.count % progress_chunk == 0:
429 time_now = time.time()
430 time_spent = time_now - progress.time_last_output
431 if time_spent > 1.0:
432 # Only output at most once per second.
433 progress.time_last_output = time_now
434 chunk_size = progress.count - progress.count_last_output
435 progress.count_last_output = progress.count
436 if time_spent > 0:
437 speed = chunk_size / time_spent
438 else:
439 speed = 0
440 progress_percent = (100.0 * (progress.count + progress.skip_count) /
441 nm_output_lines_len)
442 disambiguation_percent = 0
443 if progress.disambiguations != 0:
444 disambiguation_percent = (100.0 * progress.disambiguations /
445 progress.was_ambiguous)
446
447 sys.stdout.write('\r%.1f%%: Looked up %d symbols (%d collisions, '
448 '%d disambiguations where %.1f%% succeeded)'
449 ' - %.1f lookups/s.' %
450 (progress_percent, progress.count, progress.collisions,
451 progress.disambiguations, disambiguation_percent, speed))
452
453 # In case disambiguation was disabled, we remove the source path (which upon
454 # being set signals the symbolizer to enable disambiguation)
455 if not disambiguate:
456 src_path = None
457 symbol_path_origin_dir = os.path.dirname(library)
458 # Skia specific.
459 symbol_path_prefix = symbol_path_origin_dir.replace(LIBSKIA_RELATIVE_PATH, '')
460 symbolizer = elf_symbolizer.ELFSymbolizer(library, addr2line_binary,
461 map_address_symbol,
462 max_concurrent_jobs=jobs,
463 source_root_path=src_path,
464 prefix_to_remove=symbol_path_prefix)
465 user_interrupted = False
466 try:
467 for line in nm_output_lines:
468 match = sNmPattern.match(line)
469 if match:
470 location = match.group(5)
471 if not location:
472 addr = int(match.group(1), 16)
473 size = int(match.group(2), 16)
474 if addr in address_symbol: # Already looked up, shortcut
475 # ELFSymbolizer.
476 map_address_symbol(address_symbol[addr], addr)
477 continue
478 elif size == 0:
479 # Save time by not looking up empty symbols (do they even exist?)
480 print('Empty symbol: ' + line)
481 else:
482 symbolizer.SymbolizeAsync(addr, addr)
483 continue
484
485 progress.skip_count += 1
486 except KeyboardInterrupt:
487 user_interrupted = True
488 print('Interrupting - killing subprocesses. Please wait.')
489
490 try:
491 symbolizer.Join()
492 except KeyboardInterrupt:
493 # Don't want to abort here since we will be finished in a few seconds.
494 user_interrupted = True
495 print('Patience you must have my young padawan.')
496
497 print ''
498
499 if user_interrupted:
500 print('Skipping the rest of the file mapping. '
501 'Output will not be fully classified.')
502
503 symbol_path_origin_dir = os.path.dirname(library)
504 # Skia specific: path prefix to strip.
505 symbol_path_prefix = symbol_path_origin_dir.replace(LIBSKIA_RELATIVE_PATH, '')
506
507 with open(outfile, 'w') as out:
508 for line in nm_output_lines:
509 match = sNmPattern.match(line)
510 if match:
511 location = match.group(5)
512 if not location:
513 addr = int(match.group(1), 16)
514 symbol = address_symbol.get(addr)
515 if symbol is not None:
516 path = '??'
517 if symbol.source_path is not None:
518 path = symbol.source_path.replace(symbol_path_prefix, '')
519 line_number = 0
520 if symbol.source_line is not None:
521 line_number = symbol.source_line
522 out.write('%s\t%s:%d\n' % (line, path, line_number))
523 continue
524
525 out.write('%s\n' % line)
526
527 print('%d symbols in the results.' % len(address_symbol))
528
529
530 def RunNm(binary, nm_binary):
531 cmd = [nm_binary, '-C', '--print-size', '--size-sort', '--reverse-sort',
532 binary]
533 nm_process = subprocess.Popen(cmd,
534 stdout=subprocess.PIPE,
535 stderr=subprocess.PIPE)
536 (process_output, err_output) = nm_process.communicate()
537
538 if nm_process.returncode != 0:
539 if err_output:
540 raise Exception, err_output
541 else:
542 raise Exception, process_output
543
544 return process_output
545
546
547 def GetNmSymbols(nm_infile, outfile, library, jobs, verbose,
548 addr2line_binary, nm_binary, disambiguate, src_path):
549 if nm_infile is None:
550 if outfile is None:
551 outfile = tempfile.NamedTemporaryFile(delete=False).name
552
553 if verbose:
554 print 'Running parallel addr2line, dumping symbols to ' + outfile
555 RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary, jobs,
556 disambiguate, src_path)
557
558 nm_infile = outfile
559
560 elif verbose:
561 print 'Using nm input from ' + nm_infile
562 with file(nm_infile, 'r') as infile:
563 return list(binary_size_utils.ParseNm(infile))
564
565
566 PAK_RESOURCE_ID_TO_STRING = { "inited": False }
567
568 def LoadPakIdsFromResourceFile(filename):
569 """Given a file name, it loads everything that looks like a resource id
570 into PAK_RESOURCE_ID_TO_STRING."""
571 with open(filename) as resource_header:
572 for line in resource_header:
573 if line.startswith("#define "):
574 line_data = line.split()
575 if len(line_data) == 3:
576 try:
577 resource_number = int(line_data[2])
578 resource_name = line_data[1]
579 PAK_RESOURCE_ID_TO_STRING[resource_number] = resource_name
580 except ValueError:
581 pass
582
583 def GetReadablePakResourceName(pak_file, resource_id):
584 """Pak resources have a numeric identifier. It is not helpful when
585 trying to locate where footprint is generated. This does its best to
586 map the number to a usable string."""
587 if not PAK_RESOURCE_ID_TO_STRING['inited']:
588 # Try to find resource header files generated by grit when
589 # building the pak file. We'll look for files named *resources.h"
590 # and lines of the type:
591 # #define MY_RESOURCE_JS 1234
592 PAK_RESOURCE_ID_TO_STRING['inited'] = True
593 gen_dir = os.path.join(os.path.dirname(pak_file), 'gen')
594 if os.path.isdir(gen_dir):
595 for dirname, _dirs, files in os.walk(gen_dir):
596 for filename in files:
597 if filename.endswith('resources.h'):
598 LoadPakIdsFromResourceFile(os.path.join(dirname, filename))
599 return PAK_RESOURCE_ID_TO_STRING.get(resource_id,
600 'Pak Resource %d' % resource_id)
601
602 def AddPakData(symbols, pak_file):
603 """Adds pseudo-symbols from a pak file."""
604 pak_file = os.path.abspath(pak_file)
605 with open(pak_file, 'rb') as pak:
606 data = pak.read()
607
608 PAK_FILE_VERSION = 4
609 HEADER_LENGTH = 2 * 4 + 1 # Two uint32s. (file version, number of entries)
610 # and one uint8 (encoding of text resources)
611 INDEX_ENTRY_SIZE = 2 + 4 # Each entry is a uint16 and a uint32.
612 version, num_entries, _encoding = struct.unpack('<IIB', data[:HEADER_LENGTH])
613 assert version == PAK_FILE_VERSION, ('Unsupported pak file '
614 'version (%d) in %s. Only '
615 'support version %d' %
616 (version, pak_file, PAK_FILE_VERSION))
617 if num_entries > 0:
618 # Read the index and data.
619 data = data[HEADER_LENGTH:]
620 for _ in range(num_entries):
621 resource_id, offset = struct.unpack('<HI', data[:INDEX_ENTRY_SIZE])
622 data = data[INDEX_ENTRY_SIZE:]
623 _next_id, next_offset = struct.unpack('<HI', data[:INDEX_ENTRY_SIZE])
624 resource_size = next_offset - offset
625
626 symbol_name = GetReadablePakResourceName(pak_file, resource_id)
627 symbol_path = pak_file
628 symbol_type = 'd' # Data. Approximation.
629 symbol_size = resource_size
630 symbols.append((symbol_name, symbol_type, symbol_size, symbol_path))
631
632 def _find_in_system_path(binary):
633 """Locate the full path to binary in the system path or return None
634 if not found."""
635 system_path = os.environ["PATH"].split(os.pathsep)
636 for path in system_path:
637 binary_path = os.path.join(path, binary)
638 if os.path.isfile(binary_path):
639 return binary_path
640 return None
641
642 def CheckDebugFormatSupport(library, addr2line_binary):
643 """Kills the program if debug data is in an unsupported format.
644
645 There are two common versions of the DWARF debug formats and
646 since we are right now transitioning from DWARF2 to newer formats,
647 it's possible to have a mix of tools that are not compatible. Detect
648 that and abort rather than produce meaningless output."""
649 tool_output = subprocess.check_output([addr2line_binary, '--version'])
650 version_re = re.compile(r'^GNU [^ ]+ .* (\d+).(\d+).*?$', re.M)
651 parsed_output = version_re.match(tool_output)
652 major = int(parsed_output.group(1))
653 minor = int(parsed_output.group(2))
654 supports_dwarf4 = major > 2 or major == 2 and minor > 22
655
656 if supports_dwarf4:
657 return
658
659 print('Checking version of debug information in %s.' % library)
660 debug_info = subprocess.check_output(['readelf', '--debug-dump=info',
661 '--dwarf-depth=1', library])
662 dwarf_version_re = re.compile(r'^\s+Version:\s+(\d+)$', re.M)
663 parsed_dwarf_format_output = dwarf_version_re.search(debug_info)
664 version = int(parsed_dwarf_format_output.group(1))
665 if version > 2:
666 print('The supplied tools only support DWARF2 debug data but the binary\n' +
667 'uses DWARF%d. Update the tools or compile the binary\n' % version +
668 'with -gdwarf-2.')
669 sys.exit(1)
670
671
672 def main():
673 usage = """%prog [options]
674
675 Runs a spatial analysis on a given library, looking up the source locations
676 of its symbols and calculating how much space each directory, source file,
677 and so on is taking. The result is a report that can be used to pinpoint
678 sources of large portions of the binary, etceteras.
679
680 Under normal circumstances, you only need to pass two arguments, thusly:
681
682 %prog --library /path/to/library --destdir /path/to/output
683
684 In this mode, the program will dump the symbols from the specified library
685 and map those symbols back to source locations, producing a web-based
686 report in the specified output directory.
687
688 Other options are available via '--help'.
689 """
690 parser = optparse.OptionParser(usage=usage)
691 parser.add_option('--nm-in', metavar='PATH',
692 help='if specified, use nm input from <path> instead of '
693 'generating it. Note that source locations should be '
694 'present in the file; i.e., no addr2line symbol lookups '
695 'will be performed when this option is specified. '
696 'Mutually exclusive with --library.')
697 parser.add_option('--destdir', metavar='PATH',
698 help='write output to the specified directory. An HTML '
699 'report is generated here along with supporting files; '
700 'any existing report will be overwritten. Not used in '
701 'Skia.')
702 parser.add_option('--library', metavar='PATH',
703 help='if specified, process symbols in the library at '
704 'the specified path. Mutually exclusive with --nm-in.')
705 parser.add_option('--pak', metavar='PATH',
706 help='if specified, includes the contents of the '
707 'specified *.pak file in the output.')
708 parser.add_option('--nm-binary',
709 help='use the specified nm binary to analyze library. '
710 'This is to be used when the nm in the path is not for '
711 'the right architecture or of the right version.')
712 parser.add_option('--addr2line-binary',
713 help='use the specified addr2line binary to analyze '
714 'library. This is to be used when the addr2line in '
715 'the path is not for the right architecture or '
716 'of the right version.')
717 parser.add_option('--jobs', type='int',
718 help='number of jobs to use for the parallel '
719 'addr2line processing pool; defaults to 1. More '
720 'jobs greatly improve throughput but eat RAM like '
721 'popcorn, and take several gigabytes each. Start low '
722 'and ramp this number up until your machine begins to '
723 'struggle with RAM. '
724 'This argument is only valid when using --library.')
725 parser.add_option('-v', dest='verbose', action='store_true',
726 help='be verbose, printing lots of status information.')
727 parser.add_option('--nm-out', metavar='PATH',
728 help='keep the nm output file, and store it at the '
729 'specified path. This is useful if you want to see the '
730 'fully processed nm output after the symbols have been '
731 'mapped to source locations. By default, a tempfile is '
732 'used and is deleted when the program terminates.'
733 'This argument is only valid when using --library.')
734 parser.add_option('--legacy', action='store_true',
735 help='emit legacy binary size report instead of modern')
736 parser.add_option('--disable-disambiguation', action='store_true',
737 help='disables the disambiguation process altogether,'
738 ' NOTE: this may, depending on your toolchain, produce'
739 ' output with some symbols at the top layer if addr2line'
740 ' could not get the entire source path.')
741 parser.add_option('--source-path', default='./',
742 help='the path to the source code of the output binary, '
743 'default set to current directory. Used in the'
744 ' disambiguation process.')
745 parser.add_option('--githash', default='latest',
746 help='Git hash for the binary version. Added by Skia.')
747 parser.add_option('--commit_ts', type='int', default=-1,
748 help='Timestamp for the commit. Added by Skia.')
749 parser.add_option('--issue_number', default='',
750 help='The trybot issue number in string. Added by Skia.')
751 parser.add_option('--gsutil_path', default='gsutil',
752 help='Path to gsutil binary. Added by Skia.')
753 opts, _args = parser.parse_args()
754
755 if ((not opts.library) and (not opts.nm_in)) or (opts.library and opts.nm_in):
756 parser.error('exactly one of --library or --nm-in is required')
757 if (opts.nm_in):
758 if opts.jobs:
759 print >> sys.stderr, ('WARNING: --jobs has no effect '
760 'when used with --nm-in')
761 if not opts.jobs:
762 # Use the number of processors but cap between 2 and 4 since raw
763 # CPU power isn't the limiting factor. It's I/O limited, memory
764 # bus limited and available-memory-limited. Too many processes and
765 # the computer will run out of memory and it will be slow.
766 opts.jobs = max(2, min(4, str(multiprocessing.cpu_count())))
767
768 if opts.addr2line_binary:
769 assert os.path.isfile(opts.addr2line_binary)
770 addr2line_binary = opts.addr2line_binary
771 else:
772 addr2line_binary = _find_in_system_path('addr2line')
773 assert addr2line_binary, 'Unable to find addr2line in the path. '\
774 'Use --addr2line-binary to specify location.'
775
776 if opts.nm_binary:
777 assert os.path.isfile(opts.nm_binary)
778 nm_binary = opts.nm_binary
779 else:
780 nm_binary = _find_in_system_path('nm')
781 assert nm_binary, 'Unable to find nm in the path. Use --nm-binary '\
782 'to specify location.'
783
784 if opts.pak:
785 assert os.path.isfile(opts.pak), 'Could not find ' % opts.pak
786
787 print('addr2line: %s' % addr2line_binary)
788 print('nm: %s' % nm_binary)
789
790 if opts.library:
791 CheckDebugFormatSupport(opts.library, addr2line_binary)
792
793 symbols = GetNmSymbols(opts.nm_in, opts.nm_out, opts.library,
794 opts.jobs, opts.verbose is True,
795 addr2line_binary, nm_binary,
796 opts.disable_disambiguation is None,
797 opts.source_path)
798
799 if opts.pak:
800 AddPakData(symbols, opts.pak)
801
802 if opts.legacy: # legacy report
803 print 'Do Not set legacy flag.'
804
805 else: # modern report
806 if opts.library:
807 symbol_path_origin_dir = os.path.dirname(os.path.abspath(opts.library))
808 else:
809 # Just a guess. Hopefully all paths in the input file are absolute.
810 symbol_path_origin_dir = os.path.abspath(os.getcwd())
811 DumpCompactTree(symbols, symbol_path_origin_dir, opts.githash,
812 opts.commit_ts, opts.issue_number, opts.gsutil_path)
813 print 'Report data uploaded to GS.'
814
815
816 if __name__ == '__main__':
817 sys.exit(main())
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698