infra/bots/recipe_modules/skia/resources/run_binary_size_analysis.py - Issue 2198173002: Re-organize Skia recipes

Side by Side Diff: infra/bots/recipe_modules/skia/resources/run_binary_size_analysis.py

Issue 2198173002: Re-organize Skia recipes (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: Fix missing dependency Created 4 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« no previous file with comments | « infra/bots/recipe_modules/skia/resources/generate_and_upload_doxygen.py ('k') | infra/bots/recipe_modules/skia/resources/upload_bench_results.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
	(Empty)
1 #!/usr/bin/env python

2 # Copyright 2014 The Chromium Authors. All rights reserved.

3 # Use of this source code is governed by a BSD-style license that can be

4 # found in the LICENSE file.

5

6 """Generate a spatial analysis against an arbitrary library.

7

8 Adapted for Skia's use case from

9 chromium/src/tools/binary_size/run_binary_size_analysis.py. Main changes:

10

11 -- Cleans up some deprecated codes.

12 -- Always use relative code path so the tree root is Skia repo's root.

13 -- Instead of outputting the standalone HTML/CSS/JS filesets, writes the

14 TreeMap JSON data into a Google Storage bucket.

15 -- Adds githash and total_size to the JSON data.

16 -- Outputs another summary data in JSON Bench format for skiaperf ingestion.

17

18 The output JSON data for visualization is in the following format:

19

20 {

21 "githash": 123abc,

22 "commit_ts": 1234567890,

23 "total_size": 1234567,

24 "key": {"source_type": "binary_size"},

25 "tree_data": {

26 "maxDepth": 9,

27 "k": "p", "children":[

28 {"k":"p","children":[

29 {"k":"p","children":[

30 {"k":"p","lastPathElement":true,"children":[

31 {"k":"b","t":"t","children":[

32 {"k":"s", "t":"t", "value":4029,

33 "n":"etc_encode_subblock_helper(unsigned char const*, ...)"

34 },

35 ......

36 }

37 }

38

39 Another JSON file is generated for size summaries to be used in skiaperf. The

40 JSON format details can be found at:

41 https://github.com/google/skia/blob/master/bench/ResultsWriter.h#L54

42 and:

43 https://skia.googlesource.com/buildbot/+/master/perf/go/ingester/nanobench.go

44

45 In the binary size case, outputs look like:

46

47 {

48 "gitHash": "123abc",

49 "key": {

50 "source_type": "binarysize"

51 }

52 "results: {

53 "src_lazy_global_weak_symbol": {

54 "memory": {

55 "bytes": 41,

56 "options": {

57 "path": "src_lazy",

58 "symbol": "global_weak_symbol"

59 }

60 }

61 },

62 "src_lazy_global_read_only_data": {

63 "memory": {

64 "bytes": 13476,

65 "options": {

66 "path": "src_lazy",

67 "symbol": "global_read_only_data"

68 }

69 }

70 },

71 ...

72 }

73 }

74

75 """

76

77 import collections

78 import datetime

79 import json

80 import logging

81 import multiprocessing

82 import optparse

83 import os

84 import re

85 import shutil

86 import struct

87 import subprocess

88 import sys

89 import tempfile

90 import time

91 import urllib2

92

93 import binary_size_utils

94 import elf_symbolizer

95

96 from recipe_engine.types import freeze

97

98 # Node dictionary keys. These are output in json read by the webapp so

99 # keep them short to save file size.

100 # Note: If these change, the webapp must also change.

101 NODE_TYPE_KEY = 'k'

102 NODE_NAME_KEY = 'n'

103 NODE_CHILDREN_KEY = 'children'

104 NODE_SYMBOL_TYPE_KEY = 't'

105 NODE_SYMBOL_SIZE_KEY = 'value'

106 NODE_MAX_DEPTH_KEY = 'maxDepth'

107 NODE_LAST_PATH_ELEMENT_KEY = 'lastPathElement'

108

109 # The display name of the bucket where we put symbols without path.

110 NAME_NO_PATH_BUCKET = '(No Path)'

111

112 # Try to keep data buckets smaller than this to avoid killing the

113 # graphing lib.

114 BIG_BUCKET_LIMIT = 3000

115

116 # Skia addition: relative dir for libskia.so from code base.

117 LIBSKIA_RELATIVE_PATH = os.path.join('out', 'Release', 'lib')

118

119 # Skia addition: dictionary mapping symbol type code to symbol name.

120 # See

121 # https://code.google.com/p/chromium/codesearch#chromium/src/tools/binary_size/t emplate/D3SymbolTreeMap.js&l=74

122 SYMBOL_MAP = freeze({

123 'A': 'global_absolute',

124 'B': 'global_uninitialized_data',

125 'b': 'local_uninitialized_data',

126 'C': 'global_uninitialized_common',

127 'D': 'global_initialized_data',

128 'd': 'local_initialized_data',

129 'G': 'global_small initialized_data',

130 'g': 'local_small_initialized_data',

131 'i': 'indirect_function',

132 'N': 'debugging',

133 'p': 'stack_unwind',

134 'R': 'global_read_only_data',

135 'r': 'local_read_only_data',

136 'S': 'global_small_uninitialized_data',

137 's': 'local_small_uninitialized_data',

138 'T': 'global_code',

139 't': 'local_code',

140 'U': 'undefined',

141 'u': 'unique',

142 'V': 'global_weak_object',

143 'v': 'local_weak_object',

144 'W': 'global_weak_symbol',

145 'w': 'local_weak_symbol',

146 '@': 'vtable_entry',

147 '-': 'stabs_debugging',

148 '?': 'unrecognized',

149 })

150

151

152 def _MkChild(node, name):

153 child = node[NODE_CHILDREN_KEY].get(name)

154 if child is None:

155 child = {NODE_NAME_KEY: name,

156 NODE_CHILDREN_KEY: {}}

157 node[NODE_CHILDREN_KEY][name] = child

158 return child

159

160

161 def SplitNoPathBucket(node):

162 """NAME_NO_PATH_BUCKET can be too large for the graphing lib to

163 handle. Split it into sub-buckets in that case."""

164 root_children = node[NODE_CHILDREN_KEY]

165 if NAME_NO_PATH_BUCKET in root_children:

166 no_path_bucket = root_children[NAME_NO_PATH_BUCKET]

167 old_children = no_path_bucket[NODE_CHILDREN_KEY]

168 count = 0

169 for symbol_type, symbol_bucket in old_children.iteritems():

170 count += len(symbol_bucket[NODE_CHILDREN_KEY])

171 if count > BIG_BUCKET_LIMIT:

172 new_children = {}

173 no_path_bucket[NODE_CHILDREN_KEY] = new_children

174 current_bucket = None

175 index = 0

176 for symbol_type, symbol_bucket in old_children.iteritems():

177 for symbol_name, value in symbol_bucket[NODE_CHILDREN_KEY].iteritems():

178 if index % BIG_BUCKET_LIMIT == 0:

179 group_no = (index / BIG_BUCKET_LIMIT) + 1

180 current_bucket = _MkChild(no_path_bucket,

181 '%s subgroup %d' % (NAME_NO_PATH_BUCKET,

182 group_no))

183 assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'p'

184 node[NODE_TYPE_KEY] = 'p' # p for path

185 index += 1

186 symbol_size = value[NODE_SYMBOL_SIZE_KEY]

187 AddSymbolIntoFileNode(current_bucket, symbol_type,

188 symbol_name, symbol_size)

189

190

191 def MakeChildrenDictsIntoLists(node):

192 largest_list_len = 0

193 if NODE_CHILDREN_KEY in node:

194 largest_list_len = len(node[NODE_CHILDREN_KEY])

195 child_list = []

196 for child in node[NODE_CHILDREN_KEY].itervalues():

197 child_largest_list_len = MakeChildrenDictsIntoLists(child)

198 if child_largest_list_len > largest_list_len:

199 largest_list_len = child_largest_list_len

200 child_list.append(child)

201 node[NODE_CHILDREN_KEY] = child_list

202

203 return largest_list_len

204

205

206 def AddSymbolIntoFileNode(node, symbol_type, symbol_name, symbol_size):

207 """Puts symbol into the file path node \|node\|.

208 Returns the number of added levels in tree. I.e. returns 2."""

209

210 # 'node' is the file node and first step is to find its symbol-type bucket.

211 node[NODE_LAST_PATH_ELEMENT_KEY] = True

212 node = _MkChild(node, symbol_type)

213 assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'b'

214 node[NODE_SYMBOL_TYPE_KEY] = symbol_type

215 node[NODE_TYPE_KEY] = 'b' # b for bucket

216

217 # 'node' is now the symbol-type bucket. Make the child entry.

218 node = _MkChild(node, symbol_name)

219 if NODE_CHILDREN_KEY in node:

220 if node[NODE_CHILDREN_KEY]:

221 logging.warning('A container node used as symbol for %s.' % symbol_name)

222 # This is going to be used as a leaf so no use for child list.

223 del node[NODE_CHILDREN_KEY]

224 node[NODE_SYMBOL_SIZE_KEY] = symbol_size

225 node[NODE_SYMBOL_TYPE_KEY] = symbol_type

226 node[NODE_TYPE_KEY] = 's' # s for symbol

227

228 return 2 # Depth of the added subtree.

229

230

231 def MakeCompactTree(symbols, symbol_path_origin_dir):

232 result = {NODE_NAME_KEY: '/',

233 NODE_CHILDREN_KEY: {},

234 NODE_TYPE_KEY: 'p',

235 NODE_MAX_DEPTH_KEY: 0}

236 seen_symbol_with_path = False

237 for symbol_name, symbol_type, symbol_size, file_path in symbols:

238

239 if 'vtable for ' in symbol_name:

240 symbol_type = '@' # hack to categorize these separately

241 if file_path and file_path != "??":

242 seen_symbol_with_path = True

243 else:

244 file_path = NAME_NO_PATH_BUCKET

245

246 path_parts = file_path.split('/')

247

248 # Find pre-existing node in tree, or update if it already exists

249 node = result

250 depth = 0

251 while len(path_parts) > 0:

252 path_part = path_parts.pop(0)

253 if len(path_part) == 0:

254 continue

255 depth += 1

256 node = _MkChild(node, path_part)

257 assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'p'

258 node[NODE_TYPE_KEY] = 'p' # p for path

259

260 depth += AddSymbolIntoFileNode(node, symbol_type, symbol_name, symbol_size)

261 result[NODE_MAX_DEPTH_KEY] = max(result[NODE_MAX_DEPTH_KEY], depth)

262

263 if not seen_symbol_with_path:

264 logging.warning('Symbols lack paths. Data will not be structured.')

265

266 # The (no path) bucket can be extremely large if we failed to get

267 # path information. Split it into subgroups if needed.

268 SplitNoPathBucket(result)

269

270 largest_list_len = MakeChildrenDictsIntoLists(result)

271

272 if largest_list_len > BIG_BUCKET_LIMIT:

273 logging.warning('There are sections with %d nodes. '

274 'Results might be unusable.' % largest_list_len)

275 return result

276

277

278 # Skia added: summarizes tree size by symbol type for the given root node.

279 # Returns a dict keyed by symbol type, and value the type's overall size.

280 # e.g., {"t": 12345, "W": 543}.

281 def GetTreeSizes(node):

282 if 'children' not in node or not node['children']:

283 return {node['t']: node['value']}

284 dic = {}

285 for i in node['children']:

286 for k, v in GetTreeSizes(i).items():

287 dic.setdefault(k, 0)

288 dic[k] += v

289

290 return dic

291

292

293 # Skia added: creates dict to be converted to JSON in bench format.

294 # See top of file for the structure description.

295 def GetBenchDict(githash, tree_root):

296 dic = {'gitHash': githash,

297 'key': {'source_type': 'binarysize'},

298 'results': {},}

299 for i in tree_root['children']:

300 if '(No Path)' == i['n']: # Already at symbol summary level.

301 for k, v in GetTreeSizes(i).items():

302 dic['results']['no_path_' + SYMBOL_MAP[k]] = {

303 'memory': {

304 'bytes': v,

305 'options': {'path': 'no_path',

306 'symbol': SYMBOL_MAP[k],},}}

307 else: # We need to go deeper.

308 for c in i['children']:

309 path = i['n'] + '_' + c['n']

310 for k, v in GetTreeSizes(c).items():

311 dic['results'][path + '_' + SYMBOL_MAP[k]] = {

312 'memory': {

313 'bytes': v,

314 'options': {'path': path,

315 'symbol': SYMBOL_MAP[k],}}}

316

317 return dic

318

319

320 # Skia added: constructs 'gsutil cp' subprocess command list.

321 def GetGsCopyCommandList(gsutil, src, dst):

322 return [gsutil, '-h', 'Content-Type:application/json', 'cp', '-a',

323 'public-read', src, dst]

324

325

326 def DumpCompactTree(symbols, symbol_path_origin_dir, ha, ts, issue, gsutil):

327 tree_root = MakeCompactTree(symbols, symbol_path_origin_dir)

328 json_data = {'tree_data': tree_root,

329 'githash': ha,

330 'commit_ts': ts,

331 'key': {'source_type': 'binary_size'},

332 'total_size': sum(GetTreeSizes(tree_root).values()),}

333 tmpfile = tempfile.NamedTemporaryFile(delete=False).name

334 with open(tmpfile, 'w') as out:

335 # Use separators without whitespace to get a smaller file.

336 json.dump(json_data, out, separators=(',', ':'))

337

338 GS_PREFIX = 'gs://chromium-skia-gm/'

339 # Writes to Google Storage for visualization.

340 subprocess.check_call(GetGsCopyCommandList(

341 gsutil, tmpfile, GS_PREFIX + 'size/' + ha + '.json'))

342 # Updates the latest data.

343 if not issue:

344 subprocess.check_call(GetGsCopyCommandList(gsutil, tmpfile,

345 GS_PREFIX + 'size/latest.json'))

346 # Writes an extra copy using year/month/day/hour path for easy ingestion.

347 with open(tmpfile, 'w') as out:

348 json.dump(GetBenchDict(ha, tree_root), out, separators=(',', ':'))

349 now = datetime.datetime.utcnow()

350 ingest_path = '/'.join(('nano-json-v1', str(now.year).zfill(4),

351 str(now.month).zfill(2), str(now.day).zfill(2),

352 str(now.hour).zfill(2)))

353 if issue:

354 ingest_path = '/'.join('trybot', ingest_path, issue)

355 subprocess.check_call(GetGsCopyCommandList(gsutil, tmpfile,

356 GS_PREFIX + ingest_path + '/binarysize_' + ha + '.json'))

357

358

359 def MakeSourceMap(symbols):

360 sources = {}

361 for _sym, _symbol_type, size, path in symbols:

362 key = None

363 if path:

364 key = os.path.normpath(path)

365 else:

366 key = '[no path]'

367 if key not in sources:

368 sources[key] = {'path': path, 'symbol_count': 0, 'size': 0}

369 record = sources[key]

370 record['size'] += size

371 record['symbol_count'] += 1

372 return sources

373

374

375 # Regex for parsing "nm" output. A sample line looks like this:

376 # 0167b39c 00000018 t ACCESS_DESCRIPTION_free /path/file.c:95

377 #

378 # The fields are: address, size, type, name, source location

379 # Regular expression explained ( see also: https://xkcd.com/208 ):

380 # ([0-9a-f]{8,}+) The address

381 # [\s]+ Whitespace separator

382 # ([0-9a-f]{8,}+) The size. From here on out it's all optional.

383 # [\s]+ Whitespace separator

384 # (\S?) The symbol type, which is any non-whitespace char

385 # [\s*] Whitespace separator

386 # ([^\t]*) Symbol name, any non-tab character (spaces ok!)

387 # [\t]? Tab separator

388 # (.*) The location (filename[:linennum\|?][ (discriminator n)]

389 sNmPattern = re.compile(

390 r'([0-9a-f]{8,})[\s]+([0-9a-f]{8,})[\s](\S?)[\s]([^\t])[\t]?(.)')

391

392 class Progress():

393 def __init__(self):

394 self.count = 0

395 self.skip_count = 0

396 self.collisions = 0

397 self.time_last_output = time.time()

398 self.count_last_output = 0

399 self.disambiguations = 0

400 self.was_ambiguous = 0

401

402

403 def RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary, jobs,

404 disambiguate, src_path):

405 nm_output = RunNm(library, nm_binary)

406 nm_output_lines = nm_output.splitlines()

407 nm_output_lines_len = len(nm_output_lines)

408 address_symbol = {}

409 progress = Progress()

410 def map_address_symbol(symbol, addr):

411 progress.count += 1

412 if addr in address_symbol:

413 # 'Collision between %s and %s.' % (str(symbol.name),

414 # str(address_symbol[addr].name))

415 progress.collisions += 1

416 else:

417 if symbol.disambiguated:

418 progress.disambiguations += 1

419 if symbol.was_ambiguous:

420 progress.was_ambiguous += 1

421

422 address_symbol[addr] = symbol

423

424 progress_output()

425

426 def progress_output():

427 progress_chunk = 100

428 if progress.count % progress_chunk == 0:

429 time_now = time.time()

430 time_spent = time_now - progress.time_last_output

431 if time_spent > 1.0:

432 # Only output at most once per second.

433 progress.time_last_output = time_now

434 chunk_size = progress.count - progress.count_last_output

435 progress.count_last_output = progress.count

436 if time_spent > 0:

437 speed = chunk_size / time_spent

438 else:

439 speed = 0

440 progress_percent = (100.0 * (progress.count + progress.skip_count) /

441 nm_output_lines_len)

442 disambiguation_percent = 0

443 if progress.disambiguations != 0:

444 disambiguation_percent = (100.0 * progress.disambiguations /

445 progress.was_ambiguous)

446

447 sys.stdout.write('\r%.1f%%: Looked up %d symbols (%d collisions, '

448 '%d disambiguations where %.1f%% succeeded)'

449 ' - %.1f lookups/s.' %

450 (progress_percent, progress.count, progress.collisions,

451 progress.disambiguations, disambiguation_percent, speed))

452

453 # In case disambiguation was disabled, we remove the source path (which upon

454 # being set signals the symbolizer to enable disambiguation)

455 if not disambiguate:

456 src_path = None

457 symbol_path_origin_dir = os.path.dirname(library)

458 # Skia specific.

459 symbol_path_prefix = symbol_path_origin_dir.replace(LIBSKIA_RELATIVE_PATH, '')

460 symbolizer = elf_symbolizer.ELFSymbolizer(library, addr2line_binary,

461 map_address_symbol,

462 max_concurrent_jobs=jobs,

463 source_root_path=src_path,

464 prefix_to_remove=symbol_path_prefix)

465 user_interrupted = False

466 try:

467 for line in nm_output_lines:

468 match = sNmPattern.match(line)

469 if match:

470 location = match.group(5)

471 if not location:

472 addr = int(match.group(1), 16)

473 size = int(match.group(2), 16)

474 if addr in address_symbol: # Already looked up, shortcut

475 # ELFSymbolizer.

476 map_address_symbol(address_symbol[addr], addr)

477 continue

478 elif size == 0:

479 # Save time by not looking up empty symbols (do they even exist?)

480 print('Empty symbol: ' + line)

481 else:

482 symbolizer.SymbolizeAsync(addr, addr)

483 continue

484

485 progress.skip_count += 1

486 except KeyboardInterrupt:

487 user_interrupted = True

488 print('Interrupting - killing subprocesses. Please wait.')

489

490 try:

491 symbolizer.Join()

492 except KeyboardInterrupt:

493 # Don't want to abort here since we will be finished in a few seconds.

494 user_interrupted = True

495 print('Patience you must have my young padawan.')

496

497 print ''

498

499 if user_interrupted:

500 print('Skipping the rest of the file mapping. '

501 'Output will not be fully classified.')

502

503 symbol_path_origin_dir = os.path.dirname(library)

504 # Skia specific: path prefix to strip.

505 symbol_path_prefix = symbol_path_origin_dir.replace(LIBSKIA_RELATIVE_PATH, '')

506

507 with open(outfile, 'w') as out:

508 for line in nm_output_lines:

509 match = sNmPattern.match(line)

510 if match:

511 location = match.group(5)

512 if not location:

513 addr = int(match.group(1), 16)

514 symbol = address_symbol.get(addr)

515 if symbol is not None:

516 path = '??'

517 if symbol.source_path is not None:

518 path = symbol.source_path.replace(symbol_path_prefix, '')

519 line_number = 0

520 if symbol.source_line is not None:

521 line_number = symbol.source_line

522 out.write('%s\t%s:%d\n' % (line, path, line_number))

523 continue

524

525 out.write('%s\n' % line)

526

527 print('%d symbols in the results.' % len(address_symbol))

528

529

530 def RunNm(binary, nm_binary):

531 cmd = [nm_binary, '-C', '--print-size', '--size-sort', '--reverse-sort',

532 binary]

533 nm_process = subprocess.Popen(cmd,

534 stdout=subprocess.PIPE,

535 stderr=subprocess.PIPE)

536 (process_output, err_output) = nm_process.communicate()

537

538 if nm_process.returncode != 0:

539 if err_output:

540 raise Exception, err_output

541 else:

542 raise Exception, process_output

543

544 return process_output

545

546

547 def GetNmSymbols(nm_infile, outfile, library, jobs, verbose,

548 addr2line_binary, nm_binary, disambiguate, src_path):

549 if nm_infile is None:

550 if outfile is None:

551 outfile = tempfile.NamedTemporaryFile(delete=False).name

552

553 if verbose:

554 print 'Running parallel addr2line, dumping symbols to ' + outfile

555 RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary, jobs,

556 disambiguate, src_path)

557

558 nm_infile = outfile

559

560 elif verbose:

561 print 'Using nm input from ' + nm_infile

562 with file(nm_infile, 'r') as infile:

563 return list(binary_size_utils.ParseNm(infile))

564

565

566 PAK_RESOURCE_ID_TO_STRING = { "inited": False }

567

568 def LoadPakIdsFromResourceFile(filename):

569 """Given a file name, it loads everything that looks like a resource id

570 into PAK_RESOURCE_ID_TO_STRING."""

571 with open(filename) as resource_header:

572 for line in resource_header:

573 if line.startswith("#define "):

574 line_data = line.split()

575 if len(line_data) == 3:

576 try:

577 resource_number = int(line_data[2])

578 resource_name = line_data[1]

579 PAK_RESOURCE_ID_TO_STRING[resource_number] = resource_name

580 except ValueError:

581 pass

582

583 def GetReadablePakResourceName(pak_file, resource_id):

584 """Pak resources have a numeric identifier. It is not helpful when

585 trying to locate where footprint is generated. This does its best to

586 map the number to a usable string."""

587 if not PAK_RESOURCE_ID_TO_STRING['inited']:

588 # Try to find resource header files generated by grit when

589 # building the pak file. We'll look for files named *resources.h"

590 # and lines of the type:

591 # #define MY_RESOURCE_JS 1234

592 PAK_RESOURCE_ID_TO_STRING['inited'] = True

593 gen_dir = os.path.join(os.path.dirname(pak_file), 'gen')

594 if os.path.isdir(gen_dir):

595 for dirname, _dirs, files in os.walk(gen_dir):

596 for filename in files:

597 if filename.endswith('resources.h'):

598 LoadPakIdsFromResourceFile(os.path.join(dirname, filename))

599 return PAK_RESOURCE_ID_TO_STRING.get(resource_id,

600 'Pak Resource %d' % resource_id)

601

602 def AddPakData(symbols, pak_file):

603 """Adds pseudo-symbols from a pak file."""

604 pak_file = os.path.abspath(pak_file)

605 with open(pak_file, 'rb') as pak:

606 data = pak.read()

607

608 PAK_FILE_VERSION = 4

609 HEADER_LENGTH = 2 * 4 + 1 # Two uint32s. (file version, number of entries)

610 # and one uint8 (encoding of text resources)

611 INDEX_ENTRY_SIZE = 2 + 4 # Each entry is a uint16 and a uint32.

612 version, num_entries, _encoding = struct.unpack('<IIB', data[:HEADER_LENGTH])

613 assert version == PAK_FILE_VERSION, ('Unsupported pak file '

614 'version (%d) in %s. Only '

615 'support version %d' %

616 (version, pak_file, PAK_FILE_VERSION))

617 if num_entries > 0:

618 # Read the index and data.

619 data = data[HEADER_LENGTH:]

620 for _ in range(num_entries):

621 resource_id, offset = struct.unpack('<HI', data[:INDEX_ENTRY_SIZE])

622 data = data[INDEX_ENTRY_SIZE:]

623 _next_id, next_offset = struct.unpack('<HI', data[:INDEX_ENTRY_SIZE])

624 resource_size = next_offset - offset

625

626 symbol_name = GetReadablePakResourceName(pak_file, resource_id)

627 symbol_path = pak_file

628 symbol_type = 'd' # Data. Approximation.

629 symbol_size = resource_size

630 symbols.append((symbol_name, symbol_type, symbol_size, symbol_path))

631

632 def _find_in_system_path(binary):

633 """Locate the full path to binary in the system path or return None

634 if not found."""

635 system_path = os.environ["PATH"].split(os.pathsep)

636 for path in system_path:

637 binary_path = os.path.join(path, binary)

638 if os.path.isfile(binary_path):

639 return binary_path

640 return None

641

642 def CheckDebugFormatSupport(library, addr2line_binary):

643 """Kills the program if debug data is in an unsupported format.

644

645 There are two common versions of the DWARF debug formats and

646 since we are right now transitioning from DWARF2 to newer formats,

647 it's possible to have a mix of tools that are not compatible. Detect

648 that and abort rather than produce meaningless output."""

649 tool_output = subprocess.check_output([addr2line_binary, '--version'])

650 version_re = re.compile(r'^GNU [^ ]+ .* (\d+).(\d+).*?$', re.M)

651 parsed_output = version_re.match(tool_output)

652 major = int(parsed_output.group(1))

653 minor = int(parsed_output.group(2))

654 supports_dwarf4 = major > 2 or major == 2 and minor > 22

655

656 if supports_dwarf4:

657 return

658

659 print('Checking version of debug information in %s.' % library)

660 debug_info = subprocess.check_output(['readelf', '--debug-dump=info',

661 '--dwarf-depth=1', library])

662 dwarf_version_re = re.compile(r'^\s+Version:\s+(\d+)$', re.M)

663 parsed_dwarf_format_output = dwarf_version_re.search(debug_info)

664 version = int(parsed_dwarf_format_output.group(1))

665 if version > 2:

666 print('The supplied tools only support DWARF2 debug data but the binary\n' +

667 'uses DWARF%d. Update the tools or compile the binary\n' % version +

668 'with -gdwarf-2.')

669 sys.exit(1)

670

671

672 def main():

673 usage = """%prog [options]

674

675 Runs a spatial analysis on a given library, looking up the source locations

676 of its symbols and calculating how much space each directory, source file,

677 and so on is taking. The result is a report that can be used to pinpoint

678 sources of large portions of the binary, etceteras.

679

680 Under normal circumstances, you only need to pass two arguments, thusly:

681

682 %prog --library /path/to/library --destdir /path/to/output

683

684 In this mode, the program will dump the symbols from the specified library

685 and map those symbols back to source locations, producing a web-based

686 report in the specified output directory.

687

688 Other options are available via '--help'.

689 """

690 parser = optparse.OptionParser(usage=usage)

691 parser.add_option('--nm-in', metavar='PATH',

692 help='if specified, use nm input from <path> instead of '

693 'generating it. Note that source locations should be '

694 'present in the file; i.e., no addr2line symbol lookups '

695 'will be performed when this option is specified. '

696 'Mutually exclusive with --library.')

697 parser.add_option('--destdir', metavar='PATH',

698 help='write output to the specified directory. An HTML '

699 'report is generated here along with supporting files; '

700 'any existing report will be overwritten. Not used in '

701 'Skia.')

702 parser.add_option('--library', metavar='PATH',

703 help='if specified, process symbols in the library at '

704 'the specified path. Mutually exclusive with --nm-in.')

705 parser.add_option('--pak', metavar='PATH',

706 help='if specified, includes the contents of the '

707 'specified *.pak file in the output.')

708 parser.add_option('--nm-binary',

709 help='use the specified nm binary to analyze library. '

710 'This is to be used when the nm in the path is not for '

711 'the right architecture or of the right version.')

712 parser.add_option('--addr2line-binary',

713 help='use the specified addr2line binary to analyze '

714 'library. This is to be used when the addr2line in '

715 'the path is not for the right architecture or '

716 'of the right version.')

717 parser.add_option('--jobs', type='int',

718 help='number of jobs to use for the parallel '

719 'addr2line processing pool; defaults to 1. More '

720 'jobs greatly improve throughput but eat RAM like '

721 'popcorn, and take several gigabytes each. Start low '

722 'and ramp this number up until your machine begins to '

723 'struggle with RAM. '

724 'This argument is only valid when using --library.')

725 parser.add_option('-v', dest='verbose', action='store_true',

726 help='be verbose, printing lots of status information.')

727 parser.add_option('--nm-out', metavar='PATH',

728 help='keep the nm output file, and store it at the '

729 'specified path. This is useful if you want to see the '

730 'fully processed nm output after the symbols have been '

731 'mapped to source locations. By default, a tempfile is '

732 'used and is deleted when the program terminates.'

733 'This argument is only valid when using --library.')

734 parser.add_option('--legacy', action='store_true',

735 help='emit legacy binary size report instead of modern')

736 parser.add_option('--disable-disambiguation', action='store_true',

737 help='disables the disambiguation process altogether,'

738 ' NOTE: this may, depending on your toolchain, produce'

739 ' output with some symbols at the top layer if addr2line'

740 ' could not get the entire source path.')

741 parser.add_option('--source-path', default='./',

742 help='the path to the source code of the output binary, '

743 'default set to current directory. Used in the'

744 ' disambiguation process.')

745 parser.add_option('--githash', default='latest',

746 help='Git hash for the binary version. Added by Skia.')

747 parser.add_option('--commit_ts', type='int', default=-1,

748 help='Timestamp for the commit. Added by Skia.')

749 parser.add_option('--issue_number', default='',

750 help='The trybot issue number in string. Added by Skia.')

751 parser.add_option('--gsutil_path', default='gsutil',

752 help='Path to gsutil binary. Added by Skia.')

753 opts, _args = parser.parse_args()

754

755 if ((not opts.library) and (not opts.nm_in)) or (opts.library and opts.nm_in):

756 parser.error('exactly one of --library or --nm-in is required')

757 if (opts.nm_in):

758 if opts.jobs:

759 print >> sys.stderr, ('WARNING: --jobs has no effect '

760 'when used with --nm-in')

761 if not opts.jobs:

762 # Use the number of processors but cap between 2 and 4 since raw

763 # CPU power isn't the limiting factor. It's I/O limited, memory

764 # bus limited and available-memory-limited. Too many processes and

765 # the computer will run out of memory and it will be slow.

766 opts.jobs = max(2, min(4, str(multiprocessing.cpu_count())))

767

768 if opts.addr2line_binary:

769 assert os.path.isfile(opts.addr2line_binary)

770 addr2line_binary = opts.addr2line_binary

771 else:

772 addr2line_binary = _find_in_system_path('addr2line')

773 assert addr2line_binary, 'Unable to find addr2line in the path. '\

774 'Use --addr2line-binary to specify location.'

775

776 if opts.nm_binary:

777 assert os.path.isfile(opts.nm_binary)

778 nm_binary = opts.nm_binary

779 else:

780 nm_binary = _find_in_system_path('nm')

781 assert nm_binary, 'Unable to find nm in the path. Use --nm-binary '\

782 'to specify location.'

783

784 if opts.pak:

785 assert os.path.isfile(opts.pak), 'Could not find ' % opts.pak

786

787 print('addr2line: %s' % addr2line_binary)

788 print('nm: %s' % nm_binary)

789

790 if opts.library:

791 CheckDebugFormatSupport(opts.library, addr2line_binary)

792

793 symbols = GetNmSymbols(opts.nm_in, opts.nm_out, opts.library,

794 opts.jobs, opts.verbose is True,

795 addr2line_binary, nm_binary,

796 opts.disable_disambiguation is None,

797 opts.source_path)

798

799 if opts.pak:

800 AddPakData(symbols, opts.pak)

801

802 if opts.legacy: # legacy report

803 print 'Do Not set legacy flag.'

804

805 else: # modern report

806 if opts.library:

807 symbol_path_origin_dir = os.path.dirname(os.path.abspath(opts.library))

808 else:

809 # Just a guess. Hopefully all paths in the input file are absolute.

810 symbol_path_origin_dir = os.path.abspath(os.getcwd())

811 DumpCompactTree(symbols, symbol_path_origin_dir, opts.githash,

812 opts.commit_ts, opts.issue_number, opts.gsutil_path)

813 print 'Report data uploaded to GS.'

814

815

816 if __name__ == '__main__':

817 sys.exit(main())

OLD	NEW