tools/deep_memory_profiler/dmprof.py - Issue 19346002: Refactor dmprof: Split dmprof.py into modules.

Side by Side Diff: tools/deep_memory_profiler/dmprof.py

Issue 19346002: Refactor dmprof: Split dmprof.py into modules. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Created 7 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 """The deep heap profiler script for Chrome."""	5 """The Deep Memory Profiler analyzer script.

6	6

7 import copy	7 See http://dev.chromium.org/developers/deep-memory-profiler for details.

8 import cStringIO	8 """

9 import datetime	9

10 import json

11 import logging	10 import logging

12 import optparse

13 import os

14 import re

15 import struct

16 import subprocess

17 import sys	11 import sys

18 import tempfile

19 import time

20 import zipfile

21	12

22 try:	13 from lib.exception import ParsingException

23 from collections import OrderedDict # pylint: disable=E0611	14 import subcommands

24 except ImportError:

25 # TODO(dmikurube): Remove this once Python 2.7 is required.

26 BASE_PATH = os.path.dirname(os.path.abspath(__file__))

27 SIMPLEJSON_PATH = os.path.join(BASE_PATH, os.pardir, os.pardir, 'third_party')

28 sys.path.insert(0, SIMPLEJSON_PATH)

29 from simplejson import OrderedDict

30	15

31 from range_dict import ExclusiveRangeDict

32

33 BASE_PATH = os.path.dirname(os.path.abspath(__file__))

34 FIND_RUNTIME_SYMBOLS_PATH = os.path.join(

35 BASE_PATH, os.pardir, 'find_runtime_symbols')

36 sys.path.append(FIND_RUNTIME_SYMBOLS_PATH)

37

38 import find_runtime_symbols

39 import prepare_symbol_info

40 import proc_maps

41

42 from find_runtime_symbols import FUNCTION_SYMBOLS

43 from find_runtime_symbols import SOURCEFILE_SYMBOLS

44 from find_runtime_symbols import TYPEINFO_SYMBOLS

45

46 BUCKET_ID = 5

47 VIRTUAL = 0

48 COMMITTED = 1

49 ALLOC_COUNT = 2

50 FREE_COUNT = 3

51 NULL_REGEX = re.compile('')

52	16

53 LOGGER = logging.getLogger('dmprof')	17 LOGGER = logging.getLogger('dmprof')

54 POLICIES_JSON_PATH = os.path.join(BASE_PATH, 'policies.json')

55 CHROME_SRC_PATH = os.path.join(BASE_PATH, os.pardir, os.pardir)

56

57 DEFAULT_SORTERS = [

58 os.path.join(BASE_PATH, 'sorter.malloc-component.json'),

59 os.path.join(BASE_PATH, 'sorter.malloc-type.json'),

60 os.path.join(BASE_PATH, 'sorter.vm-map.json'),

61 os.path.join(BASE_PATH, 'sorter.vm-sharing.json'),

62 ]

63

64

65 # Heap Profile Dump versions

66

67 # DUMP_DEEP_[1-4] are obsolete.

68 # DUMP_DEEP_2+ distinct mmap regions and malloc chunks.

69 # DUMP_DEEP_3+ don't include allocation functions in their stack dumps.

70 # DUMP_DEEP_4+ support comments with '#' and global stats "nonprofiled-*".

71 # DUMP_DEEP_[1-2] should be processed by POLICY_DEEP_1.

72 # DUMP_DEEP_[3-4] should be processed by POLICY_DEEP_2 or POLICY_DEEP_3.

73 DUMP_DEEP_1 = 'DUMP_DEEP_1'

74 DUMP_DEEP_2 = 'DUMP_DEEP_2'

75 DUMP_DEEP_3 = 'DUMP_DEEP_3'

76 DUMP_DEEP_4 = 'DUMP_DEEP_4'

77

78 DUMP_DEEP_OBSOLETE = (DUMP_DEEP_1, DUMP_DEEP_2, DUMP_DEEP_3, DUMP_DEEP_4)

79

80 # DUMP_DEEP_5 doesn't separate sections for malloc and mmap.

81 # malloc and mmap are identified in bucket files.

82 # DUMP_DEEP_5 should be processed by POLICY_DEEP_4.

83 DUMP_DEEP_5 = 'DUMP_DEEP_5'

84

85 # DUMP_DEEP_6 adds a mmap list to DUMP_DEEP_5.

86 DUMP_DEEP_6 = 'DUMP_DEEP_6'

87

88 # Heap Profile Policy versions

89

90 # POLICY_DEEP_1 DOES NOT include allocation_type columns.

91 # mmap regions are distincted w/ mmap frames in the pattern column.

92 POLICY_DEEP_1 = 'POLICY_DEEP_1'

93

94 # POLICY_DEEP_2 DOES include allocation_type columns.

95 # mmap regions are distincted w/ the allocation_type column.

96 POLICY_DEEP_2 = 'POLICY_DEEP_2'

97

98 # POLICY_DEEP_3 is in JSON format.

99 POLICY_DEEP_3 = 'POLICY_DEEP_3'

100

101 # POLICY_DEEP_3 contains typeinfo.

102 POLICY_DEEP_4 = 'POLICY_DEEP_4'

103

104

105 class EmptyDumpException(Exception):

106 def __init__(self, value=''):

107 super(EmptyDumpException, self).__init__()

108 self.value = value

109 def __str__(self):

110 return repr(self.value)

111

112

113 class ParsingException(Exception):

114 def __init__(self, value=''):

115 super(ParsingException, self).__init__()

116 self.value = value

117 def __str__(self):

118 return repr(self.value)

119

120

121 class InvalidDumpException(ParsingException):

122 def __init__(self, value):

123 super(InvalidDumpException, self).__init__()

124 self.value = value

125 def __str__(self):

126 return "invalid heap profile dump: %s" % repr(self.value)

127

128

129 class ObsoleteDumpVersionException(ParsingException):

130 def __init__(self, value):

131 super(ObsoleteDumpVersionException, self).__init__()

132 self.value = value

133 def __str__(self):

134 return "obsolete heap profile dump version: %s" % repr(self.value)

135

136

137 class ListAttribute(ExclusiveRangeDict.RangeAttribute):

138 """Represents a list for an attribute in range_dict.ExclusiveRangeDict."""

139 def __init__(self):

140 super(ListAttribute, self).__init__()

141 self._list = []

142

143 def __str__(self):

144 return str(self._list)

145

146 def __repr__(self):

147 return 'ListAttribute' + str(self._list)

148

149 def __len__(self):

150 return len(self._list)

151

152 def __iter__(self):

153 for x in self._list:

154 yield x

155

156 def __getitem__(self, index):

157 return self._list[index]

158

159 def __setitem__(self, index, value):

160 if index >= len(self._list):

161 self._list.extend([None] * (index + 1 - len(self._list)))

162 self._list[index] = value

163

164 def copy(self):

165 new_list = ListAttribute()

166 for index, item in enumerate(self._list):

167 new_list[index] = copy.deepcopy(item)

168 return new_list

169

170

171 class ProcMapsEntryAttribute(ExclusiveRangeDict.RangeAttribute):

172 """Represents an entry of /proc/maps in range_dict.ExclusiveRangeDict."""

173 _DUMMY_ENTRY = proc_maps.ProcMapsEntry(

174 0, # begin

175 0, # end

176 '-', # readable

177 '-', # writable

178 '-', # executable

179 '-', # private

180 0, # offset

181 '00', # major

182 '00', # minor

183 0, # inode

184 '' # name

185 )

186

187 def __init__(self):

188 super(ProcMapsEntryAttribute, self).__init__()

189 self._entry = self._DUMMY_ENTRY.as_dict()

190

191 def __str__(self):

192 return str(self._entry)

193

194 def __repr__(self):

195 return 'ProcMapsEntryAttribute' + str(self._entry)

196

197 def __getitem__(self, key):

198 return self._entry[key]

199

200 def __setitem__(self, key, value):

201 if key not in self._entry:

202 raise KeyError(key)

203 self._entry[key] = value

204

205 def copy(self):

206 new_entry = ProcMapsEntryAttribute()

207 for key, value in self._entry.iteritems():

208 new_entry[key] = copy.deepcopy(value)

209 return new_entry

210

211

212 def skip_while(index, max_index, skipping_condition):

213 """Increments \|index\| until \|skipping_condition\|(\|index\|) is False.

214

215 Returns:

216 A pair of an integer indicating a line number after skipped, and a

217 boolean value which is True if found a line which skipping_condition

218 is False for.

219 """

220 while skipping_condition(index):

221 index += 1

222 if index >= max_index:

223 return index, False

224 return index, True

225

226

227 class SymbolDataSources(object):

228 """Manages symbol data sources in a process.

229

230 The symbol data sources consist of maps (/proc/<pid>/maps), nm, readelf and

231 so on. They are collected into a directory '\|prefix\|.symmap' from the binary

232 files by 'prepare()' with tools/find_runtime_symbols/prepare_symbol_info.py.

233

234 Binaries are not mandatory to profile. The prepared data sources work in

235 place of the binary even if the binary has been overwritten with another

236 binary.

237

238 Note that loading the symbol data sources takes a long time. They are often

239 very big. So, the 'dmprof' profiler is designed to use 'SymbolMappingCache'

240 which caches actually used symbols.

241 """

242 def __init__(self, prefix, alternative_dirs=None):

243 self._prefix = prefix

244 self._prepared_symbol_data_sources_path = None

245 self._loaded_symbol_data_sources = None

246 self._alternative_dirs = alternative_dirs or {}

247

248 def prepare(self):

249 """Prepares symbol data sources by extracting mapping from a binary.

250

251 The prepared symbol data sources are stored in a directory. The directory

252 name is stored in \|self._prepared_symbol_data_sources_path\|.

253

254 Returns:

255 True if succeeded.

256 """

257 LOGGER.info('Preparing symbol mapping...')

258 self._prepared_symbol_data_sources_path, used_tempdir = (

259 prepare_symbol_info.prepare_symbol_info(

260 self._prefix + '.maps',

261 output_dir_path=self._prefix + '.symmap',

262 alternative_dirs=self._alternative_dirs,

263 use_tempdir=True,

264 use_source_file_name=True))

265 if self._prepared_symbol_data_sources_path:

266 LOGGER.info(' Prepared symbol mapping.')

267 if used_tempdir:

268 LOGGER.warn(' Using a temporary directory for symbol mapping.')

269 LOGGER.warn(' Delete it by yourself.')

270 LOGGER.warn(' Or, move the directory by yourself to use it later.')

271 return True

272 else:

273 LOGGER.warn(' Failed to prepare symbol mapping.')

274 return False

275

276 def get(self):

277 """Returns the prepared symbol data sources.

278

279 Returns:

280 The prepared symbol data sources. None if failed.

281 """

282 if not self._prepared_symbol_data_sources_path and not self.prepare():

283 return None

284 if not self._loaded_symbol_data_sources:

285 LOGGER.info('Loading symbol mapping...')

286 self._loaded_symbol_data_sources = (

287 find_runtime_symbols.RuntimeSymbolsInProcess.load(

288 self._prepared_symbol_data_sources_path))

289 return self._loaded_symbol_data_sources

290

291 def path(self):

292 """Returns the path of the prepared symbol data sources if possible."""

293 if not self._prepared_symbol_data_sources_path and not self.prepare():

294 return None

295 return self._prepared_symbol_data_sources_path

296

297

298 class SymbolFinder(object):

299 """Finds corresponding symbols from addresses.

300

301 This class does only 'find()' symbols from a specified \|address_list\|.

302 It is introduced to make a finder mockable.

303 """

304 def __init__(self, symbol_type, symbol_data_sources):

305 self._symbol_type = symbol_type

306 self._symbol_data_sources = symbol_data_sources

307

308 def find(self, address_list):

309 return find_runtime_symbols.find_runtime_symbols(

310 self._symbol_type, self._symbol_data_sources.get(), address_list)

311

312

313 class SymbolMappingCache(object):

314 """Caches mapping from actually used addresses to symbols.

315

316 'update()' updates the cache from the original symbol data sources via

317 'SymbolFinder'. Symbols can be looked up by the method 'lookup()'.

318 """

319 def __init__(self):

320 self._symbol_mapping_caches = {

321 FUNCTION_SYMBOLS: {},

322 SOURCEFILE_SYMBOLS: {},

323 TYPEINFO_SYMBOLS: {},

324 }

325

326 def update(self, symbol_type, bucket_set, symbol_finder, cache_f):

327 """Updates symbol mapping cache on memory and in a symbol cache file.

328

329 It reads cached symbol mapping from a symbol cache file \|cache_f\| if it

330 exists. Unresolved addresses are then resolved and added to the cache

331 both on memory and in the symbol cache file with using 'SymbolFinder'.

332

333 A cache file is formatted as follows:

334 <Address> <Symbol>

335 <Address> <Symbol>

336 <Address> <Symbol>

337 ...

338

339 Args:

340 symbol_type: A type of symbols to update. It should be one of

341 FUNCTION_SYMBOLS, SOURCEFILE_SYMBOLS and TYPEINFO_SYMBOLS.

342 bucket_set: A BucketSet object.

343 symbol_finder: A SymbolFinder object to find symbols.

344 cache_f: A readable and writable IO object of the symbol cache file.

345 """

346 cache_f.seek(0, os.SEEK_SET)

347 self._load(cache_f, symbol_type)

348

349 unresolved_addresses = sorted(

350 address for address in bucket_set.iter_addresses(symbol_type)

351 if address not in self._symbol_mapping_caches[symbol_type])

352

353 if not unresolved_addresses:

354 LOGGER.info('No need to resolve any more addresses.')

355 return

356

357 cache_f.seek(0, os.SEEK_END)

358 LOGGER.info('Loading %d unresolved addresses.' %

359 len(unresolved_addresses))

360 symbol_dict = symbol_finder.find(unresolved_addresses)

361

362 for address, symbol in symbol_dict.iteritems():

363 stripped_symbol = symbol.strip() or '?'

364 self._symbol_mapping_caches[symbol_type][address] = stripped_symbol

365 cache_f.write('%x %s\n' % (address, stripped_symbol))

366

367 def lookup(self, symbol_type, address):

368 """Looks up a symbol for a given \|address\|.

369

370 Args:

371 symbol_type: A type of symbols to update. It should be one of

372 FUNCTION_SYMBOLS, SOURCEFILE_SYMBOLS and TYPEINFO_SYMBOLS.

373 address: An integer that represents an address.

374

375 Returns:

376 A string that represents a symbol.

377 """

378 return self._symbol_mapping_caches[symbol_type].get(address)

379

380 def _load(self, cache_f, symbol_type):

381 try:

382 for line in cache_f:

383 items = line.rstrip().split(None, 1)

384 if len(items) == 1:

385 items.append('??')

386 self._symbol_mapping_caches[symbol_type][int(items[0], 16)] = items[1]

387 LOGGER.info('Loaded %d entries from symbol cache.' %

388 len(self._symbol_mapping_caches[symbol_type]))

389 except IOError as e:

390 LOGGER.info('The symbol cache file is invalid: %s' % e)

391

392

393 class Rule(object):

394 """Represents one matching rule in a policy file."""

395

396 def __init__(self,

397 name,

398 allocator_type,

399 stackfunction_pattern=None,

400 stacksourcefile_pattern=None,

401 typeinfo_pattern=None,

402 mappedpathname_pattern=None,

403 mappedpermission_pattern=None,

404 sharedwith=None):

405 self._name = name

406 self._allocator_type = allocator_type

407

408 self._stackfunction_pattern = None

409 if stackfunction_pattern:

410 self._stackfunction_pattern = re.compile(

411 stackfunction_pattern + r'\Z')

412

413 self._stacksourcefile_pattern = None

414 if stacksourcefile_pattern:

415 self._stacksourcefile_pattern = re.compile(

416 stacksourcefile_pattern + r'\Z')

417

418 self._typeinfo_pattern = None

419 if typeinfo_pattern:

420 self._typeinfo_pattern = re.compile(typeinfo_pattern + r'\Z')

421

422 self._mappedpathname_pattern = None

423 if mappedpathname_pattern:

424 self._mappedpathname_pattern = re.compile(mappedpathname_pattern + r'\Z')

425

426 self._mappedpermission_pattern = None

427 if mappedpermission_pattern:

428 self._mappedpermission_pattern = re.compile(

429 mappedpermission_pattern + r'\Z')

430

431 self._sharedwith = []

432 if sharedwith:

433 self._sharedwith = sharedwith

434

435 @property

436 def name(self):

437 return self._name

438

439 @property

440 def allocator_type(self):

441 return self._allocator_type

442

443 @property

444 def stackfunction_pattern(self):

445 return self._stackfunction_pattern

446

447 @property

448 def stacksourcefile_pattern(self):

449 return self._stacksourcefile_pattern

450

451 @property

452 def typeinfo_pattern(self):

453 return self._typeinfo_pattern

454

455 @property

456 def mappedpathname_pattern(self):

457 return self._mappedpathname_pattern

458

459 @property

460 def mappedpermission_pattern(self):

461 return self._mappedpermission_pattern

462

463 @property

464 def sharedwith(self):

465 return self._sharedwith

466

467

468 class Policy(object):

469 """Represents a policy, a content of a policy file."""

470

471 def __init__(self, rules, version, components):

472 self._rules = rules

473 self._version = version

474 self._components = components

475

476 @property

477 def rules(self):

478 return self._rules

479

480 @property

481 def version(self):

482 return self._version

483

484 @property

485 def components(self):

486 return self._components

487

488 def find_rule(self, component_name):

489 """Finds a rule whose name is \|component_name\|. """

490 for rule in self._rules:

491 if rule.name == component_name:

492 return rule

493 return None

494

495 def find_malloc(self, bucket):

496 """Finds a matching component name which a given \|bucket\| belongs to.

497

498 Args:

499 bucket: A Bucket object to be searched for.

500

501 Returns:

502 A string representing a component name.

503 """

504 assert not bucket or bucket.allocator_type == 'malloc'

505

506 if not bucket:

507 return 'no-bucket'

508 if bucket.component_cache:

509 return bucket.component_cache

510

511 stackfunction = bucket.symbolized_joined_stackfunction

512 stacksourcefile = bucket.symbolized_joined_stacksourcefile

513 typeinfo = bucket.symbolized_typeinfo

514 if typeinfo.startswith('0x'):

515 typeinfo = bucket.typeinfo_name

516

517 for rule in self._rules:

518 if (rule.allocator_type == 'malloc' and

519 (not rule.stackfunction_pattern or

520 rule.stackfunction_pattern.match(stackfunction)) and

521 (not rule.stacksourcefile_pattern or

522 rule.stacksourcefile_pattern.match(stacksourcefile)) and

523 (not rule.typeinfo_pattern or rule.typeinfo_pattern.match(typeinfo))):

524 bucket.component_cache = rule.name

525 return rule.name

526

527 assert False

528

529 def find_mmap(self, region, bucket_set,

530 pageframe=None, group_pfn_counts=None):

531 """Finds a matching component which a given mmap \|region\| belongs to.

532

533 It uses \|bucket_set\| to match with backtraces. If \|pageframe\| is given,

534 it considers memory sharing among processes.

535

536 NOTE: Don't use Bucket's \|component_cache\| for mmap regions because they're

537 classified not only with bucket information (mappedpathname for example).

538

539 Args:

540 region: A tuple representing a memory region.

541 bucket_set: A BucketSet object to look up backtraces.

542 pageframe: A PageFrame object representing a pageframe maybe including

543 a pagecount.

544 group_pfn_counts: A dict mapping a PFN to the number of times the

545 the pageframe is mapped by the known "group (Chrome)" processes.

546

547 Returns:

548 A string representing a component name.

549 """

550 assert region[0] == 'hooked'

551 bucket = bucket_set.get(region[1]['bucket_id'])

552 assert not bucket or bucket.allocator_type == 'mmap'

553

554 if not bucket:

555 return 'no-bucket', None

556

557 stackfunction = bucket.symbolized_joined_stackfunction

558 stacksourcefile = bucket.symbolized_joined_stacksourcefile

559 sharedwith = self._categorize_pageframe(pageframe, group_pfn_counts)

560

561 for rule in self._rules:

562 if (rule.allocator_type == 'mmap' and

563 (not rule.stackfunction_pattern or

564 rule.stackfunction_pattern.match(stackfunction)) and

565 (not rule.stacksourcefile_pattern or

566 rule.stacksourcefile_pattern.match(stacksourcefile)) and

567 (not rule.mappedpathname_pattern or

568 rule.mappedpathname_pattern.match(region[1]['vma']['name'])) and

569 (not rule.mappedpermission_pattern or

570 rule.mappedpermission_pattern.match(

571 region[1]['vma']['readable'] +

572 region[1]['vma']['writable'] +

573 region[1]['vma']['executable'] +

574 region[1]['vma']['private'])) and

575 (not rule.sharedwith or

576 not pageframe or sharedwith in rule.sharedwith)):

577 return rule.name, bucket

578

579 assert False

580

581 def find_unhooked(self, region, pageframe=None, group_pfn_counts=None):

582 """Finds a matching component which a given unhooked \|region\| belongs to.

583

584 If \|pageframe\| is given, it considers memory sharing among processes.

585

586 Args:

587 region: A tuple representing a memory region.

588 pageframe: A PageFrame object representing a pageframe maybe including

589 a pagecount.

590 group_pfn_counts: A dict mapping a PFN to the number of times the

591 the pageframe is mapped by the known "group (Chrome)" processes.

592

593 Returns:

594 A string representing a component name.

595 """

596 assert region[0] == 'unhooked'

597 sharedwith = self._categorize_pageframe(pageframe, group_pfn_counts)

598

599 for rule in self._rules:

600 if (rule.allocator_type == 'unhooked' and

601 (not rule.mappedpathname_pattern or

602 rule.mappedpathname_pattern.match(region[1]['vma']['name'])) and

603 (not rule.mappedpermission_pattern or

604 rule.mappedpermission_pattern.match(

605 region[1]['vma']['readable'] +

606 region[1]['vma']['writable'] +

607 region[1]['vma']['executable'] +

608 region[1]['vma']['private'])) and

609 (not rule.sharedwith or

610 not pageframe or sharedwith in rule.sharedwith)):

611 return rule.name

612

613 assert False

614

615 @staticmethod

616 def load(filename, filetype):

617 """Loads a policy file of \|filename\| in a \|format\|.

618

619 Args:

620 filename: A filename to be loaded.

621 filetype: A string to specify a type of the file. Only 'json' is

622 supported for now.

623

624 Returns:

625 A loaded Policy object.

626 """

627 with open(os.path.join(BASE_PATH, filename)) as policy_f:

628 return Policy.parse(policy_f, filetype)

629

630 @staticmethod

631 def parse(policy_f, filetype):

632 """Parses a policy file content in a \|format\|.

633

634 Args:

635 policy_f: An IO object to be loaded.

636 filetype: A string to specify a type of the file. Only 'json' is

637 supported for now.

638

639 Returns:

640 A loaded Policy object.

641 """

642 if filetype == 'json':

643 return Policy._parse_json(policy_f)

644 else:

645 return None

646

647 @staticmethod

648 def _parse_json(policy_f):

649 """Parses policy file in json format.

650

651 A policy file contains component's names and their stacktrace pattern

652 written in regular expression. Those patterns are matched against each

653 symbols of each stacktraces in the order written in the policy file

654

655 Args:

656 policy_f: A File/IO object to read.

657

658 Returns:

659 A loaded policy object.

660 """

661 policy = json.load(policy_f)

662

663 rules = []

664 for rule in policy['rules']:

665 stackfunction = rule.get('stackfunction') or rule.get('stacktrace')

666 stacksourcefile = rule.get('stacksourcefile')

667 rules.append(Rule(

668 rule['name'],

669 rule['allocator'], # allocator_type

670 stackfunction,

671 stacksourcefile,

672 rule['typeinfo'] if 'typeinfo' in rule else None,

673 rule.get('mappedpathname'),

674 rule.get('mappedpermission'),

675 rule.get('sharedwith')))

676

677 return Policy(rules, policy['version'], policy['components'])

678

679 @staticmethod

680 def _categorize_pageframe(pageframe, group_pfn_counts):

681 """Categorizes a pageframe based on its sharing status.

682

683 Returns:

684 'private' if \|pageframe\| is not shared with other processes. 'group'

685 if \|pageframe\| is shared only with group (Chrome-related) processes.

686 'others' if \|pageframe\| is shared with non-group processes.

687 """

688 if not pageframe:

689 return 'private'

690

691 if pageframe.pagecount:

692 if pageframe.pagecount == 1:

693 return 'private'

694 elif pageframe.pagecount <= group_pfn_counts.get(pageframe.pfn, 0) + 1:

695 return 'group'

696 else:

697 return 'others'

698 else:

699 if pageframe.pfn in group_pfn_counts:

700 return 'group'

701 else:

702 return 'private'

703

704

705 class PolicySet(object):

706 """Represents a set of policies."""

707

708 def __init__(self, policy_directory):

709 self._policy_directory = policy_directory

710

711 @staticmethod

712 def load(labels=None):

713 """Loads a set of policies via the "default policy directory".

714

715 The "default policy directory" contains pairs of policies and their labels.

716 For example, a policy "policy.l0.json" is labeled "l0" in the default

717 policy directory "policies.json".

718

719 All policies in the directory are loaded by default. Policies can be

720 limited by \|labels\|.

721

722 Args:

723 labels: An array that contains policy labels to be loaded.

724

725 Returns:

726 A PolicySet object.

727 """

728 default_policy_directory = PolicySet._load_default_policy_directory()

729 if labels:

730 specified_policy_directory = {}

731 for label in labels:

732 if label in default_policy_directory:

733 specified_policy_directory[label] = default_policy_directory[label]

734 # TODO(dmikurube): Load an un-labeled policy file.

735 return PolicySet._load_policies(specified_policy_directory)

736 else:

737 return PolicySet._load_policies(default_policy_directory)

738

739 def __len__(self):

740 return len(self._policy_directory)

741

742 def __iter__(self):

743 for label in self._policy_directory:

744 yield label

745

746 def __getitem__(self, label):

747 return self._policy_directory[label]

748

749 @staticmethod

750 def _load_default_policy_directory():

751 with open(POLICIES_JSON_PATH, mode='r') as policies_f:

752 default_policy_directory = json.load(policies_f)

753 return default_policy_directory

754

755 @staticmethod

756 def _load_policies(directory):

757 LOGGER.info('Loading policy files.')

758 policies = {}

759 for label in directory:

760 LOGGER.info(' %s: %s' % (label, directory[label]['file']))

761 loaded = Policy.load(directory[label]['file'], directory[label]['format'])

762 if loaded:

763 policies[label] = loaded

764 return PolicySet(policies)

765

766

767 class Bucket(object):

768 """Represents a bucket, which is a unit of memory block classification."""

769

770 def __init__(self, stacktrace, allocator_type, typeinfo, typeinfo_name):

771 self._stacktrace = stacktrace

772 self._allocator_type = allocator_type

773 self._typeinfo = typeinfo

774 self._typeinfo_name = typeinfo_name

775

776 self._symbolized_stackfunction = stacktrace

777 self._symbolized_joined_stackfunction = ''

778 self._symbolized_stacksourcefile = stacktrace

779 self._symbolized_joined_stacksourcefile = ''

780 self._symbolized_typeinfo = typeinfo_name

781

782 self.component_cache = ''

783

784 def __str__(self):

785 result = []

786 result.append(self._allocator_type)

787 if self._symbolized_typeinfo == 'no typeinfo':

788 result.append('tno_typeinfo')

789 else:

790 result.append('t' + self._symbolized_typeinfo)

791 result.append('n' + self._typeinfo_name)

792 result.extend(['%s(@%s)' % (function, sourcefile)

793 for function, sourcefile

794 in zip(self._symbolized_stackfunction,

795 self._symbolized_stacksourcefile)])

796 return ' '.join(result)

797

798 def symbolize(self, symbol_mapping_cache):

799 """Makes a symbolized stacktrace and typeinfo with \|symbol_mapping_cache\|.

800

801 Args:

802 symbol_mapping_cache: A SymbolMappingCache object.

803 """

804 # TODO(dmikurube): Fill explicitly with numbers if symbol not found.

805 self._symbolized_stackfunction = [

806 symbol_mapping_cache.lookup(FUNCTION_SYMBOLS, address)

807 for address in self._stacktrace]

808 self._symbolized_joined_stackfunction = ' '.join(

809 self._symbolized_stackfunction)

810 self._symbolized_stacksourcefile = [

811 symbol_mapping_cache.lookup(SOURCEFILE_SYMBOLS, address)

812 for address in self._stacktrace]

813 self._symbolized_joined_stacksourcefile = ' '.join(

814 self._symbolized_stacksourcefile)

815 if not self._typeinfo:

816 self._symbolized_typeinfo = 'no typeinfo'

817 else:

818 self._symbolized_typeinfo = symbol_mapping_cache.lookup(

819 TYPEINFO_SYMBOLS, self._typeinfo)

820 if not self._symbolized_typeinfo:

821 self._symbolized_typeinfo = 'no typeinfo'

822

823 def clear_component_cache(self):

824 self.component_cache = ''

825

826 @property

827 def stacktrace(self):

828 return self._stacktrace

829

830 @property

831 def allocator_type(self):

832 return self._allocator_type

833

834 @property

835 def typeinfo(self):

836 return self._typeinfo

837

838 @property

839 def typeinfo_name(self):

840 return self._typeinfo_name

841

842 @property

843 def symbolized_stackfunction(self):

844 return self._symbolized_stackfunction

845

846 @property

847 def symbolized_joined_stackfunction(self):

848 return self._symbolized_joined_stackfunction

849

850 @property

851 def symbolized_stacksourcefile(self):

852 return self._symbolized_stacksourcefile

853

854 @property

855 def symbolized_joined_stacksourcefile(self):

856 return self._symbolized_joined_stacksourcefile

857

858 @property

859 def symbolized_typeinfo(self):

860 return self._symbolized_typeinfo

861

862

863 class BucketSet(object):

864 """Represents a set of bucket."""

865 def __init__(self):

866 self._buckets = {}

867 self._code_addresses = set()

868 self._typeinfo_addresses = set()

869

870 def load(self, prefix):

871 """Loads all related bucket files.

872

873 Args:

874 prefix: A prefix string for bucket file names.

875 """

876 LOGGER.info('Loading bucket files.')

877

878 n = 0

879 skipped = 0

880 while True:

881 path = '%s.%04d.buckets' % (prefix, n)

882 if not os.path.exists(path) or not os.stat(path).st_size:

883 if skipped > 10:

884 break

885 n += 1

886 skipped += 1

887 continue

888 LOGGER.info(' %s' % path)

889 with open(path, 'r') as f:

890 self._load_file(f)

891 n += 1

892 skipped = 0

893

894 def _load_file(self, bucket_f):

895 for line in bucket_f:

896 words = line.split()

897 typeinfo = None

898 typeinfo_name = ''

899 stacktrace_begin = 2

900 for index, word in enumerate(words):

901 if index < 2:

902 continue

903 if word[0] == 't':

904 typeinfo = int(word[1:], 16)

905 self._typeinfo_addresses.add(typeinfo)

906 elif word[0] == 'n':

907 typeinfo_name = word[1:]

908 else:

909 stacktrace_begin = index

910 break

911 stacktrace = [int(address, 16) for address in words[stacktrace_begin:]]

912 for frame in stacktrace:

913 self._code_addresses.add(frame)

914 self._buckets[int(words[0])] = Bucket(

915 stacktrace, words[1], typeinfo, typeinfo_name)

916

917 def __iter__(self):

918 for bucket_id, bucket_content in self._buckets.iteritems():

919 yield bucket_id, bucket_content

920

921 def __getitem__(self, bucket_id):

922 return self._buckets[bucket_id]

923

924 def get(self, bucket_id):

925 return self._buckets.get(bucket_id)

926

927 def symbolize(self, symbol_mapping_cache):

928 for bucket_content in self._buckets.itervalues():

929 bucket_content.symbolize(symbol_mapping_cache)

930

931 def clear_component_cache(self):

932 for bucket_content in self._buckets.itervalues():

933 bucket_content.clear_component_cache()

934

935 def iter_addresses(self, symbol_type):

936 if symbol_type in [FUNCTION_SYMBOLS, SOURCEFILE_SYMBOLS]:

937 for function in self._code_addresses:

938 yield function

939 else:

940 for function in self._typeinfo_addresses:

941 yield function

942

943

944 class PageFrame(object):

945 """Represents a pageframe and maybe its shared count."""

946 def __init__(self, pfn, size, pagecount, start_truncated, end_truncated):

947 self._pfn = pfn

948 self._size = size

949 self._pagecount = pagecount

950 self._start_truncated = start_truncated

951 self._end_truncated = end_truncated

952

953 def __str__(self):

954 result = str()

955 if self._start_truncated:

956 result += '<'

957 result += '%06x#%d' % (self._pfn, self._pagecount)

958 if self._end_truncated:

959 result += '>'

960 return result

961

962 def __repr__(self):

963 return str(self)

964

965 @staticmethod

966 def parse(encoded_pfn, size):

967 start = 0

968 end = len(encoded_pfn)

969 end_truncated = False

970 if encoded_pfn.endswith('>'):

971 end = len(encoded_pfn) - 1

972 end_truncated = True

973 pagecount_found = encoded_pfn.find('#')

974 pagecount = None

975 if pagecount_found >= 0:

976 encoded_pagecount = 'AAA' + encoded_pfn[pagecount_found+1 : end]

977 pagecount = struct.unpack(

978 '>I', '\x00' + encoded_pagecount.decode('base64'))[0]

979 end = pagecount_found

980 start_truncated = False

981 if encoded_pfn.startswith('<'):

982 start = 1

983 start_truncated = True

984

985 pfn = struct.unpack(

986 '>I', '\x00' + (encoded_pfn[start:end]).decode('base64'))[0]

987

988 return PageFrame(pfn, size, pagecount, start_truncated, end_truncated)

989

990 @property

991 def pfn(self):

992 return self._pfn

993

994 @property

995 def size(self):

996 return self._size

997

998 def set_size(self, size):

999 self._size = size

1000

1001 @property

1002 def pagecount(self):

1003 return self._pagecount

1004

1005 @property

1006 def start_truncated(self):

1007 return self._start_truncated

1008

1009 @property

1010 def end_truncated(self):

1011 return self._end_truncated

1012

1013

1014 class PFNCounts(object):

1015 """Represents counts of PFNs in a process."""

1016

1017 _PATH_PATTERN = re.compile(r'^(.*)\.([0-9]+)\.([0-9]+)\.heap$')

1018

1019 def __init__(self, path, modified_time):

1020 matched = self._PATH_PATTERN.match(path)

1021 if matched:

1022 self._pid = int(matched.group(2))

1023 else:

1024 self._pid = 0

1025 self._command_line = ''

1026 self._pagesize = 4096

1027 self._path = path

1028 self._pfn_meta = ''

1029 self._pfnset = {}

1030 self._reason = ''

1031 self._time = modified_time

1032

1033 @staticmethod

1034 def load(path, log_header='Loading PFNs from a heap profile dump: '):

1035 pfnset = PFNCounts(path, float(os.stat(path).st_mtime))

1036 LOGGER.info('%s%s' % (log_header, path))

1037

1038 with open(path, 'r') as pfnset_f:

1039 pfnset.load_file(pfnset_f)

1040

1041 return pfnset

1042

1043 @property

1044 def path(self):

1045 return self._path

1046

1047 @property

1048 def pid(self):

1049 return self._pid

1050

1051 @property

1052 def time(self):

1053 return self._time

1054

1055 @property

1056 def reason(self):

1057 return self._reason

1058

1059 @property

1060 def iter_pfn(self):

1061 for pfn, count in self._pfnset.iteritems():

1062 yield pfn, count

1063

1064 def load_file(self, pfnset_f):

1065 prev_pfn_end_truncated = None

1066 for line in pfnset_f:

1067 line = line.strip()

1068 if line.startswith('GLOBAL_STATS:') or line.startswith('STACKTRACES:'):

1069 break

1070 elif line.startswith('PF: '):

1071 for encoded_pfn in line[3:].split():

1072 page_frame = PageFrame.parse(encoded_pfn, self._pagesize)

1073 if page_frame.start_truncated and (

1074 not prev_pfn_end_truncated or

1075 prev_pfn_end_truncated != page_frame.pfn):

1076 LOGGER.error('Broken page frame number: %s.' % encoded_pfn)

1077 self._pfnset[page_frame.pfn] = self._pfnset.get(page_frame.pfn, 0) + 1

1078 if page_frame.end_truncated:

1079 prev_pfn_end_truncated = page_frame.pfn

1080 else:

1081 prev_pfn_end_truncated = None

1082 elif line.startswith('PageSize: '):

1083 self._pagesize = int(line[10:])

1084 elif line.startswith('PFN: '):

1085 self._pfn_meta = line[5:]

1086 elif line.startswith('PageFrame: '):

1087 self._pfn_meta = line[11:]

1088 elif line.startswith('Time: '):

1089 self._time = float(line[6:])

1090 elif line.startswith('CommandLine: '):

1091 self._command_line = line[13:]

1092 elif line.startswith('Reason: '):

1093 self._reason = line[8:]

1094

1095

1096 class Dump(object):

1097 """Represents a heap profile dump."""

1098

1099 _PATH_PATTERN = re.compile(r'^(.*)\.([0-9]+)\.([0-9]+)\.heap$')

1100

1101 _HOOK_PATTERN = re.compile(

1102 r'^ ([ $])([a-f0-9]+)([ $])-([ $])([a-f0-9]+)([ $])\s+'

1103 r'(hooked\|unhooked)\s+(.+)$', re.IGNORECASE)

1104

1105 _HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / '

1106 '(?P<RESERVED>[0-9]+) @ (?P<BUCKETID>[0-9]+)')

1107 _UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / '

1108 '(?P<RESERVED>[0-9]+)')

1109

1110 _OLD_HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) @ (?P<BUCKETID>[0-9]+)')

1111 _OLD_UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) (?P<COMMITTED>[0-9]+)')

1112

1113 _TIME_PATTERN_FORMAT = re.compile(

1114 r'^Time: ([0-9]+/[0-9]+/[0-9]+ [0-9]+:[0-9]+:[0-9]+)(\.[0-9]+)?')

1115 _TIME_PATTERN_SECONDS = re.compile(r'^Time: ([0-9]+)$')

1116

1117 def __init__(self, path, modified_time):

1118 self._path = path

1119 matched = self._PATH_PATTERN.match(path)

1120 self._pid = int(matched.group(2))

1121 self._count = int(matched.group(3))

1122 self._time = modified_time

1123 self._map = {}

1124 self._procmaps = ExclusiveRangeDict(ProcMapsEntryAttribute)

1125 self._stacktrace_lines = []

1126 self._global_stats = {} # used only in apply_policy

1127

1128 self._run_id = ''

1129 self._pagesize = 4096

1130 self._pageframe_length = 0

1131 self._pageframe_encoding = ''

1132 self._has_pagecount = False

1133

1134 self._version = ''

1135 self._lines = []

1136

1137 @property

1138 def path(self):

1139 return self._path

1140

1141 @property

1142 def count(self):

1143 return self._count

1144

1145 @property

1146 def time(self):

1147 return self._time

1148

1149 @property

1150 def iter_map(self):

1151 for region in sorted(self._map.iteritems()):

1152 yield region[0], region[1]

1153

1154 def iter_procmaps(self):

1155 for begin, end, attr in self._map.iter_range():

1156 yield begin, end, attr

1157

1158 @property

1159 def iter_stacktrace(self):

1160 for line in self._stacktrace_lines:

1161 yield line

1162

1163 def global_stat(self, name):

1164 return self._global_stats[name]

1165

1166 @property

1167 def run_id(self):

1168 return self._run_id

1169

1170 @property

1171 def pagesize(self):

1172 return self._pagesize

1173

1174 @property

1175 def pageframe_length(self):

1176 return self._pageframe_length

1177

1178 @property

1179 def pageframe_encoding(self):

1180 return self._pageframe_encoding

1181

1182 @property

1183 def has_pagecount(self):

1184 return self._has_pagecount

1185

1186 @staticmethod

1187 def load(path, log_header='Loading a heap profile dump: '):

1188 """Loads a heap profile dump.

1189

1190 Args:

1191 path: A file path string to load.

1192 log_header: A preceding string for log messages.

1193

1194 Returns:

1195 A loaded Dump object.

1196

1197 Raises:

1198 ParsingException for invalid heap profile dumps.

1199 """

1200 dump = Dump(path, os.stat(path).st_mtime)

1201 with open(path, 'r') as f:

1202 dump.load_file(f, log_header)

1203 return dump

1204

1205 def load_file(self, f, log_header):

1206 self._lines = [line for line in f

1207 if line and not line.startswith('#')]

1208

1209 try:

1210 self._version, ln = self._parse_version()

1211 self._parse_meta_information()

1212 if self._version == DUMP_DEEP_6:

1213 self._parse_mmap_list()

1214 self._parse_global_stats()

1215 self._extract_stacktrace_lines(ln)

1216 except EmptyDumpException:

1217 LOGGER.info('%s%s ...ignored an empty dump.' % (log_header, self._path))

1218 except ParsingException, e:

1219 LOGGER.error('%s%s ...error %s' % (log_header, self._path, e))

1220 raise

1221 else:

1222 LOGGER.info('%s%s (version:%s)' % (log_header, self._path, self._version))

1223

1224 def _parse_version(self):

1225 """Parses a version string in self._lines.

1226

1227 Returns:

1228 A pair of (a string representing a version of the stacktrace dump,

1229 and an integer indicating a line number next to the version string).

1230

1231 Raises:

1232 ParsingException for invalid dump versions.

1233 """

1234 version = ''

1235

1236 # Skip until an identifiable line.

1237 headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ')

1238 if not self._lines:

1239 raise EmptyDumpException('Empty heap dump file.')

1240 (ln, found) = skip_while(

1241 0, len(self._lines),

1242 lambda n: not self._lines[n].startswith(headers))

1243 if not found:

1244 raise InvalidDumpException('No version header.')

1245

1246 # Identify a version.

1247 if self._lines[ln].startswith('heap profile: '):

1248 version = self._lines[ln][13:].strip()

1249 if version in (DUMP_DEEP_5, DUMP_DEEP_6):

1250 (ln, _) = skip_while(

1251 ln, len(self._lines),

1252 lambda n: self._lines[n] != 'STACKTRACES:\n')

1253 elif version in DUMP_DEEP_OBSOLETE:

1254 raise ObsoleteDumpVersionException(version)

1255 else:

1256 raise InvalidDumpException('Invalid version: %s' % version)

1257 elif self._lines[ln] == 'STACKTRACES:\n':

1258 raise ObsoleteDumpVersionException(DUMP_DEEP_1)

1259 elif self._lines[ln] == 'MMAP_STACKTRACES:\n':

1260 raise ObsoleteDumpVersionException(DUMP_DEEP_2)

1261

1262 return (version, ln)

1263

1264 def _parse_global_stats(self):

1265 """Parses lines in self._lines as global stats."""

1266 (ln, _) = skip_while(

1267 0, len(self._lines),

1268 lambda n: self._lines[n] != 'GLOBAL_STATS:\n')

1269

1270 global_stat_names = [

1271 'total', 'absent', 'file-exec', 'file-nonexec', 'anonymous', 'stack',

1272 'other', 'nonprofiled-absent', 'nonprofiled-anonymous',

1273 'nonprofiled-file-exec', 'nonprofiled-file-nonexec',

1274 'nonprofiled-stack', 'nonprofiled-other',

1275 'profiled-mmap', 'profiled-malloc']

1276

1277 for prefix in global_stat_names:

1278 (ln, _) = skip_while(

1279 ln, len(self._lines),

1280 lambda n: self._lines[n].split()[0] != prefix)

1281 words = self._lines[ln].split()

1282 self._global_stats[prefix + '_virtual'] = int(words[-2])

1283 self._global_stats[prefix + '_committed'] = int(words[-1])

1284

1285 def _parse_meta_information(self):

1286 """Parses lines in self._lines for meta information."""

1287 (ln, found) = skip_while(

1288 0, len(self._lines),

1289 lambda n: self._lines[n] != 'META:\n')

1290 if not found:

1291 return

1292 ln += 1

1293

1294 while True:

1295 if self._lines[ln].startswith('Time:'):

1296 matched_seconds = self._TIME_PATTERN_SECONDS.match(self._lines[ln])

1297 matched_format = self._TIME_PATTERN_FORMAT.match(self._lines[ln])

1298 if matched_format:

1299 self._time = time.mktime(datetime.datetime.strptime(

1300 matched_format.group(1), '%Y/%m/%d %H:%M:%S').timetuple())

1301 if matched_format.group(2):

1302 self._time += float(matched_format.group(2)[1:]) / 1000.0

1303 elif matched_seconds:

1304 self._time = float(matched_seconds.group(1))

1305 elif self._lines[ln].startswith('Reason:'):

1306 pass # Nothing to do for 'Reason:'

1307 elif self._lines[ln].startswith('PageSize: '):

1308 self._pagesize = int(self._lines[ln][10:])

1309 elif self._lines[ln].startswith('CommandLine:'):

1310 pass

1311 elif (self._lines[ln].startswith('PageFrame: ') or

1312 self._lines[ln].startswith('PFN: ')):

1313 if self._lines[ln].startswith('PageFrame: '):

1314 words = self._lines[ln][11:].split(',')

1315 else:

1316 words = self._lines[ln][5:].split(',')

1317 for word in words:

1318 if word == '24':

1319 self._pageframe_length = 24

1320 elif word == 'Base64':

1321 self._pageframe_encoding = 'base64'

1322 elif word == 'PageCount':

1323 self._has_pagecount = True

1324 elif self._lines[ln].startswith('RunID: '):

1325 self._run_id = self._lines[ln][7:].strip()

1326 elif (self._lines[ln].startswith('MMAP_LIST:') or

1327 self._lines[ln].startswith('GLOBAL_STATS:')):

1328 # Skip until "MMAP_LIST:" or "GLOBAL_STATS" is found.

1329 break

1330 else:

1331 pass

1332 ln += 1

1333

1334 def _parse_mmap_list(self):

1335 """Parses lines in self._lines as a mmap list."""

1336 (ln, found) = skip_while(

1337 0, len(self._lines),

1338 lambda n: self._lines[n] != 'MMAP_LIST:\n')

1339 if not found:

1340 return {}

1341

1342 ln += 1

1343 self._map = {}

1344 current_vma = {}

1345 pageframe_list = []

1346 while True:

1347 entry = proc_maps.ProcMaps.parse_line(self._lines[ln])

1348 if entry:

1349 current_vma = {}

1350 for _, _, attr in self._procmaps.iter_range(entry.begin, entry.end):

1351 for key, value in entry.as_dict().iteritems():

1352 attr[key] = value

1353 current_vma[key] = value

1354 ln += 1

1355 continue

1356

1357 if self._lines[ln].startswith(' PF: '):

1358 for pageframe in self._lines[ln][5:].split():

1359 pageframe_list.append(PageFrame.parse(pageframe, self._pagesize))

1360 ln += 1

1361 continue

1362

1363 matched = self._HOOK_PATTERN.match(self._lines[ln])

1364 if not matched:

1365 break

1366 # 2: starting address

1367 # 5: end address

1368 # 7: hooked or unhooked

1369 # 8: additional information

1370 if matched.group(7) == 'hooked':

1371 submatched = self._HOOKED_PATTERN.match(matched.group(8))

1372 if not submatched:

1373 submatched = self._OLD_HOOKED_PATTERN.match(matched.group(8))

1374 elif matched.group(7) == 'unhooked':

1375 submatched = self._UNHOOKED_PATTERN.match(matched.group(8))

1376 if not submatched:

1377 submatched = self._OLD_UNHOOKED_PATTERN.match(matched.group(8))

1378 else:

1379 assert matched.group(7) in ['hooked', 'unhooked']

1380

1381 submatched_dict = submatched.groupdict()

1382 region_info = { 'vma': current_vma }

1383 if submatched_dict.get('TYPE'):

1384 region_info['type'] = submatched_dict['TYPE'].strip()

1385 if submatched_dict.get('COMMITTED'):

1386 region_info['committed'] = int(submatched_dict['COMMITTED'])

1387 if submatched_dict.get('RESERVED'):

1388 region_info['reserved'] = int(submatched_dict['RESERVED'])

1389 if submatched_dict.get('BUCKETID'):

1390 region_info['bucket_id'] = int(submatched_dict['BUCKETID'])

1391

1392 if matched.group(1) == '(':

1393 start = current_vma['begin']

1394 else:

1395 start = int(matched.group(2), 16)

1396 if matched.group(4) == '(':

1397 end = current_vma['end']

1398 else:

1399 end = int(matched.group(5), 16)

1400

1401 if pageframe_list and pageframe_list[0].start_truncated:

1402 pageframe_list[0].set_size(

1403 pageframe_list[0].size - start % self._pagesize)

1404 if pageframe_list and pageframe_list[-1].end_truncated:

1405 pageframe_list[-1].set_size(

1406 pageframe_list[-1].size - (self._pagesize - end % self._pagesize))

1407 region_info['pageframe'] = pageframe_list

1408 pageframe_list = []

1409

1410 self._map[(start, end)] = (matched.group(7), region_info)

1411 ln += 1

1412

1413 def _extract_stacktrace_lines(self, line_number):

1414 """Extracts the position of stacktrace lines.

1415

1416 Valid stacktrace lines are stored into self._stacktrace_lines.

1417

1418 Args:

1419 line_number: A line number to start parsing in lines.

1420

1421 Raises:

1422 ParsingException for invalid dump versions.

1423 """

1424 if self._version in (DUMP_DEEP_5, DUMP_DEEP_6):

1425 (line_number, _) = skip_while(

1426 line_number, len(self._lines),

1427 lambda n: not self._lines[n].split()[0].isdigit())

1428 stacktrace_start = line_number

1429 (line_number, _) = skip_while(

1430 line_number, len(self._lines),

1431 lambda n: self._check_stacktrace_line(self._lines[n]))

1432 self._stacktrace_lines = self._lines[stacktrace_start:line_number]

1433

1434 elif self._version in DUMP_DEEP_OBSOLETE:

1435 raise ObsoleteDumpVersionException(self._version)

1436

1437 else:

1438 raise InvalidDumpException('Invalid version: %s' % self._version)

1439

1440 @staticmethod

1441 def _check_stacktrace_line(stacktrace_line):

1442 """Checks if a given stacktrace_line is valid as stacktrace.

1443

1444 Args:

1445 stacktrace_line: A string to be checked.

1446

1447 Returns:

1448 True if the given stacktrace_line is valid.

1449 """

1450 words = stacktrace_line.split()

1451 if len(words) < BUCKET_ID + 1:

1452 return False

1453 if words[BUCKET_ID - 1] != '@':

1454 return False

1455 return True

1456

1457

1458 class DumpList(object):

1459 """Represents a sequence of heap profile dumps."""

1460

1461 def __init__(self, dump_list):

1462 self._dump_list = dump_list

1463

1464 @staticmethod

1465 def load(path_list):

1466 LOGGER.info('Loading heap dump profiles.')

1467 dump_list = []

1468 for path in path_list:

1469 dump_list.append(Dump.load(path, ' '))

1470 return DumpList(dump_list)

1471

1472 def __len__(self):

1473 return len(self._dump_list)

1474

1475 def __iter__(self):

1476 for dump in self._dump_list:

1477 yield dump

1478

1479 def __getitem__(self, index):

1480 return self._dump_list[index]

1481

1482

1483 class Unit(object):

1484 """Represents a minimum unit of memory usage categorization.

1485

1486 It is supposed to be inherited for some different spaces like the entire

1487 virtual memory and malloc arena. Such different spaces are called "worlds"

1488 in dmprof. (For example, the "vm" world and the "malloc" world.)

1489 """

1490 def __init__(self, unit_id, size):

1491 self._unit_id = unit_id

1492 self._size = size

1493

1494 @property

1495 def unit_id(self):

1496 return self._unit_id

1497

1498 @property

1499 def size(self):

1500 return self._size

1501

1502

1503 class VMUnit(Unit):

1504 """Represents a Unit for a memory region on virtual memory."""

1505 def __init__(self, unit_id, committed, reserved, mmap, region,

1506 pageframe=None, group_pfn_counts=None):

1507 super(VMUnit, self).__init__(unit_id, committed)

1508 self._reserved = reserved

1509 self._mmap = mmap

1510 self._region = region

1511 self._pageframe = pageframe

1512 self._group_pfn_counts = group_pfn_counts

1513

1514 @property

1515 def committed(self):

1516 return self._size

1517

1518 @property

1519 def reserved(self):

1520 return self._reserved

1521

1522 @property

1523 def mmap(self):

1524 return self._mmap

1525

1526 @property

1527 def region(self):

1528 return self._region

1529

1530 @property

1531 def pageframe(self):

1532 return self._pageframe

1533

1534 @property

1535 def group_pfn_counts(self):

1536 return self._group_pfn_counts

1537

1538

1539 class MMapUnit(VMUnit):

1540 """Represents a Unit for a mmap'ed region."""

1541 def __init__(self, unit_id, committed, reserved, region, bucket_set,

1542 pageframe=None, group_pfn_counts=None):

1543 super(MMapUnit, self).__init__(unit_id, committed, reserved, True,

1544 region, pageframe, group_pfn_counts)

1545 self._bucket_set = bucket_set

1546

1547 def __repr__(self):

1548 return str(self.region)

1549

1550 @property

1551 def bucket_set(self):

1552 return self._bucket_set

1553

1554

1555 class UnhookedUnit(VMUnit):

1556 """Represents a Unit for a non-mmap'ed memory region on virtual memory."""

1557 def __init__(self, unit_id, committed, reserved, region,

1558 pageframe=None, group_pfn_counts=None):

1559 super(UnhookedUnit, self).__init__(unit_id, committed, reserved, False,

1560 region, pageframe, group_pfn_counts)

1561

1562 def __repr__(self):

1563 return str(self.region)

1564

1565

1566 class MallocUnit(Unit):

1567 """Represents a Unit for a malloc'ed memory block."""

1568 def __init__(self, unit_id, size, alloc_count, free_count, bucket):

1569 super(MallocUnit, self).__init__(unit_id, size)

1570 self._bucket = bucket

1571 self._alloc_count = alloc_count

1572 self._free_count = free_count

1573

1574 def __repr__(self):

1575 return str(self.bucket)

1576

1577 @property

1578 def bucket(self):

1579 return self._bucket

1580

1581 @property

1582 def alloc_count(self):

1583 return self._alloc_count

1584

1585 @property

1586 def free_count(self):

1587 return self._free_count

1588

1589

1590 class UnitSet(object):

1591 """Represents an iterable set of Units."""

1592 def __init__(self, world):

1593 self._units = {}

1594 self._world = world

1595

1596 def __repr__(self):

1597 return str(self._units)

1598

1599 def __iter__(self):

1600 for unit_id in sorted(self._units):

1601 yield self._units[unit_id]

1602

1603 def append(self, unit, overwrite=False):

1604 if not overwrite and unit.unit_id in self._units:

1605 LOGGER.error('The unit id=%s already exists.' % str(unit.unit_id))

1606 self._units[unit.unit_id] = unit

1607

1608

1609 class AbstractRule(object):

1610 """An abstract class for rules to be matched with units."""

1611 def __init__(self, dct):

1612 self._name = dct['name']

1613 self._hidden = dct.get('hidden', False)

1614 self._subworlds = dct.get('subworlds', [])

1615

1616 def match(self, unit):

1617 raise NotImplementedError()

1618

1619 @property

1620 def name(self):

1621 return self._name

1622

1623 @property

1624 def hidden(self):

1625 return self._hidden

1626

1627 def iter_subworld(self):

1628 for subworld in self._subworlds:

1629 yield subworld

1630

1631

1632 class VMRule(AbstractRule):

1633 """Represents a Rule to match with virtual memory regions."""

1634 def __init__(self, dct):

1635 super(VMRule, self).__init__(dct)

1636 self._backtrace_function = dct.get('backtrace_function', None)

1637 if self._backtrace_function:

1638 self._backtrace_function = re.compile(self._backtrace_function)

1639 self._backtrace_sourcefile = dct.get('backtrace_sourcefile', None)

1640 if self._backtrace_sourcefile:

1641 self._backtrace_sourcefile = re.compile(self._backtrace_sourcefile)

1642 self._mmap = dct.get('mmap', None)

1643 self._sharedwith = dct.get('sharedwith', [])

1644 self._mapped_pathname = dct.get('mapped_pathname', None)

1645 if self._mapped_pathname:

1646 self._mapped_pathname = re.compile(self._mapped_pathname)

1647 self._mapped_permission = dct.get('mapped_permission', None)

1648 if self._mapped_permission:

1649 self._mapped_permission = re.compile(self._mapped_permission)

1650

1651 def __repr__(self):

1652 result = cStringIO.StringIO()

1653 result.write('{"%s"=>' % self._name)

1654 attributes = []

1655 attributes.append('mmap: %s' % self._mmap)

1656 if self._backtrace_function:

1657 attributes.append('backtrace_function: "%s"' %

1658 self._backtrace_function.pattern)

1659 if self._sharedwith:

1660 attributes.append('sharedwith: "%s"' % self._sharedwith)

1661 if self._mapped_pathname:

1662 attributes.append('mapped_pathname: "%s"' % self._mapped_pathname.pattern)

1663 if self._mapped_permission:

1664 attributes.append('mapped_permission: "%s"' %

1665 self._mapped_permission.pattern)

1666 result.write('%s}' % ', '.join(attributes))

1667 return result.getvalue()

1668

1669 def match(self, unit):

1670 if unit.mmap:

1671 assert unit.region[0] == 'hooked'

1672 bucket = unit.bucket_set.get(unit.region[1]['bucket_id'])

1673 assert bucket

1674 assert bucket.allocator_type == 'mmap'

1675

1676 stackfunction = bucket.symbolized_joined_stackfunction

1677 stacksourcefile = bucket.symbolized_joined_stacksourcefile

1678

1679 # TODO(dmikurube): Support shared memory.

1680 sharedwith = None

1681

1682 if self._mmap == False: # (self._mmap == None) should go through.

1683 return False

1684 if (self._backtrace_function and

1685 not self._backtrace_function.match(stackfunction)):

1686 return False

1687 if (self._backtrace_sourcefile and

1688 not self._backtrace_sourcefile.match(stacksourcefile)):

1689 return False

1690 if (self._mapped_pathname and

1691 not self._mapped_pathname.match(unit.region[1]['vma']['name'])):

1692 return False

1693 if (self._mapped_permission and

1694 not self._mapped_permission.match(

1695 unit.region[1]['vma']['readable'] +

1696 unit.region[1]['vma']['writable'] +

1697 unit.region[1]['vma']['executable'] +

1698 unit.region[1]['vma']['private'])):

1699 return False

1700 if (self._sharedwith and

1701 unit.pageframe and sharedwith not in self._sharedwith):

1702 return False

1703

1704 return True

1705

1706 else:

1707 assert unit.region[0] == 'unhooked'

1708

1709 # TODO(dmikurube): Support shared memory.

1710 sharedwith = None

1711

1712 if self._mmap == True: # (self._mmap == None) should go through.

1713 return False

1714 if (self._mapped_pathname and

1715 not self._mapped_pathname.match(unit.region[1]['vma']['name'])):

1716 return False

1717 if (self._mapped_permission and

1718 not self._mapped_permission.match(

1719 unit.region[1]['vma']['readable'] +

1720 unit.region[1]['vma']['writable'] +

1721 unit.region[1]['vma']['executable'] +

1722 unit.region[1]['vma']['private'])):

1723 return False

1724 if (self._sharedwith and

1725 unit.pageframe and sharedwith not in self._sharedwith):

1726 return False

1727

1728 return True

1729

1730

1731 class MallocRule(AbstractRule):

1732 """Represents a Rule to match with malloc'ed blocks."""

1733 def __init__(self, dct):

1734 super(MallocRule, self).__init__(dct)

1735 self._backtrace_function = dct.get('backtrace_function', None)

1736 if self._backtrace_function:

1737 self._backtrace_function = re.compile(self._backtrace_function)

1738 self._backtrace_sourcefile = dct.get('backtrace_sourcefile', None)

1739 if self._backtrace_sourcefile:

1740 self._backtrace_sourcefile = re.compile(self._backtrace_sourcefile)

1741 self._typeinfo = dct.get('typeinfo', None)

1742 if self._typeinfo:

1743 self._typeinfo = re.compile(self._typeinfo)

1744

1745 def __repr__(self):

1746 result = cStringIO.StringIO()

1747 result.write('{"%s"=>' % self._name)

1748 attributes = []

1749 if self._backtrace_function:

1750 attributes.append('backtrace_function: "%s"' % self._backtrace_function)

1751 if self._typeinfo:

1752 attributes.append('typeinfo: "%s"' % self._typeinfo)

1753 result.write('%s}' % ', '.join(attributes))

1754 return result.getvalue()

1755

1756 def match(self, unit):

1757 assert unit.bucket.allocator_type == 'malloc'

1758

1759 stackfunction = unit.bucket.symbolized_joined_stackfunction

1760 stacksourcefile = unit.bucket.symbolized_joined_stacksourcefile

1761 typeinfo = unit.bucket.symbolized_typeinfo

1762 if typeinfo.startswith('0x'):

1763 typeinfo = unit.bucket.typeinfo_name

1764

1765 return ((not self._backtrace_function or

1766 self._backtrace_function.match(stackfunction)) and

1767 (not self._backtrace_sourcefile or

1768 self._backtrace_sourcefile.match(stacksourcefile)) and

1769 (not self._typeinfo or self._typeinfo.match(typeinfo)))

1770

1771

1772 class NoBucketMallocRule(MallocRule):

1773 """Represents a Rule that small ignorable units match with."""

1774 def __init__(self):

1775 super(NoBucketMallocRule, self).__init__({'name': 'tc-no-bucket'})

1776 self._no_bucket = True

1777

1778 @property

1779 def no_bucket(self):

1780 return self._no_bucket

1781

1782

1783 class AbstractSorter(object):

1784 """An abstract class for classifying Units with a set of Rules."""

1785 def __init__(self, dct):

1786 self._type = 'sorter'

1787 self._version = dct['version']

1788 self._world = dct['world']

1789 self._name = dct['name']

1790 self._order = dct['order']

1791

1792 self._rules = []

1793 for rule in dct['rules']:

1794 if dct['world'] == 'vm':

1795 self._rules.append(VMRule(rule))

1796 elif dct['world'] == 'malloc':

1797 self._rules.append(MallocRule(rule))

1798 else:

1799 LOGGER.error('Unknown sorter world type')

1800

1801 def __repr__(self):

1802 result = cStringIO.StringIO()

1803 result.write('world=%s' % self._world)

1804 result.write('order=%s' % self._order)

1805 result.write('rules:')

1806 for rule in self._rules:

1807 result.write(' %s' % rule)

1808 return result.getvalue()

1809

1810 @staticmethod

1811 def load(filename):

1812 with open(filename) as sorter_f:

1813 sorter_dict = json.load(sorter_f)

1814 if sorter_dict['world'] == 'vm':

1815 return VMSorter(sorter_dict)

1816 elif sorter_dict['world'] == 'malloc':

1817 return MallocSorter(sorter_dict)

1818 else:

1819 LOGGER.error('Unknown sorter world type')

1820 return None

1821

1822 @property

1823 def world(self):

1824 return self._world

1825

1826 @property

1827 def name(self):

1828 return self._name

1829

1830 def find(self, unit):

1831 raise NotImplementedError()

1832

1833 def find_rule(self, name):

1834 """Finds a rule whose name is \|name\|. """

1835 for rule in self._rules:

1836 if rule.name == name:

1837 return rule

1838 return None

1839

1840

1841 class VMSorter(AbstractSorter):

1842 """Represents a Sorter for memory regions on virtual memory."""

1843 def __init__(self, dct):

1844 assert dct['world'] == 'vm'

1845 super(VMSorter, self).__init__(dct)

1846

1847 def find(self, unit):

1848 for rule in self._rules:

1849 if rule.match(unit):

1850 return rule

1851 assert False

1852

1853

1854 class MallocSorter(AbstractSorter):

1855 """Represents a Sorter for malloc'ed blocks."""

1856 def __init__(self, dct):

1857 assert dct['world'] == 'malloc'

1858 super(MallocSorter, self).__init__(dct)

1859 self._no_bucket_rule = NoBucketMallocRule()

1860

1861 def find(self, unit):

1862 if not unit.bucket:

1863 return self._no_bucket_rule

1864 assert unit.bucket.allocator_type == 'malloc'

1865

1866 if unit.bucket.component_cache:

1867 return unit.bucket.component_cache

1868

1869 for rule in self._rules:

1870 if rule.match(unit):

1871 unit.bucket.component_cache = rule

1872 return rule

1873 assert False

1874

1875

1876 class SorterSet(object):

1877 """Represents an iterable set of Sorters."""

1878 def __init__(self, additional=None, default=None):

1879 if not additional:

1880 additional = []

1881 if not default:

1882 default = DEFAULT_SORTERS

1883 self._sorters = {}

1884 for filename in default + additional:

1885 sorter = AbstractSorter.load(filename)

1886 if sorter.world not in self._sorters:

1887 self._sorters[sorter.world] = []

1888 self._sorters[sorter.world].append(sorter)

1889

1890 def __repr__(self):

1891 result = cStringIO.StringIO()

1892 result.write(self._sorters)

1893 return result.getvalue()

1894

1895 def __iter__(self):

1896 for sorters in self._sorters.itervalues():

1897 for sorter in sorters:

1898 yield sorter

1899

1900 def iter_world(self, world):

1901 for sorter in self._sorters.get(world, []):

1902 yield sorter

1903

1904

1905 class Command(object):

1906 """Subclasses are a subcommand for this executable.

1907

1908 See COMMANDS in main().

1909 """

1910 _DEVICE_LIB_BASEDIRS = ['/data/data/', '/data/app-lib/', '/data/local/tmp']

1911

1912 def __init__(self, usage):

1913 self._parser = optparse.OptionParser(usage)

1914

1915 @staticmethod

1916 def load_basic_files(

1917 dump_path, multiple, no_dump=False, alternative_dirs=None):

1918 prefix = Command._find_prefix(dump_path)

1919 # If the target process is estimated to be working on Android, converts

1920 # a path in the Android device to a path estimated to be corresponding in

1921 # the host. Use --alternative-dirs to specify the conversion manually.

1922 if not alternative_dirs:

1923 alternative_dirs = Command._estimate_alternative_dirs(prefix)

1924 if alternative_dirs:

1925 for device, host in alternative_dirs.iteritems():

1926 LOGGER.info('Assuming %s on device as %s on host' % (device, host))

1927 symbol_data_sources = SymbolDataSources(prefix, alternative_dirs)

1928 symbol_data_sources.prepare()

1929 bucket_set = BucketSet()

1930 bucket_set.load(prefix)

1931 if not no_dump:

1932 if multiple:

1933 dump_list = DumpList.load(Command._find_all_dumps(dump_path))

1934 else:

1935 dump = Dump.load(dump_path)

1936 symbol_mapping_cache = SymbolMappingCache()

1937 with open(prefix + '.cache.function', 'a+') as cache_f:

1938 symbol_mapping_cache.update(

1939 FUNCTION_SYMBOLS, bucket_set,

1940 SymbolFinder(FUNCTION_SYMBOLS, symbol_data_sources), cache_f)

1941 with open(prefix + '.cache.typeinfo', 'a+') as cache_f:

1942 symbol_mapping_cache.update(

1943 TYPEINFO_SYMBOLS, bucket_set,

1944 SymbolFinder(TYPEINFO_SYMBOLS, symbol_data_sources), cache_f)

1945 with open(prefix + '.cache.sourcefile', 'a+') as cache_f:

1946 symbol_mapping_cache.update(

1947 SOURCEFILE_SYMBOLS, bucket_set,

1948 SymbolFinder(SOURCEFILE_SYMBOLS, symbol_data_sources), cache_f)

1949 bucket_set.symbolize(symbol_mapping_cache)

1950 if no_dump:

1951 return bucket_set

1952 elif multiple:

1953 return (bucket_set, dump_list)

1954 else:

1955 return (bucket_set, dump)

1956

1957 @staticmethod

1958 def _find_prefix(path):

1959 return re.sub('\.[0-9][0-9][0-9][0-9]\.heap', '', path)

1960

1961 @staticmethod

1962 def _estimate_alternative_dirs(prefix):

1963 """Estimates a path in host from a corresponding path in target device.

1964

1965 For Android, dmprof.py should find symbol information from binaries in

1966 the host instead of the Android device because dmprof.py doesn't run on

1967 the Android device. This method estimates a path in the host

1968 corresponding to a path in the Android device.

1969

1970 Returns:

1971 A dict that maps a path in the Android device to a path in the host.

1972 If a file in Command._DEVICE_LIB_BASEDIRS is found in /proc/maps, it

1973 assumes the process was running on Android and maps the path to

1974 "out/Debug/lib" in the Chromium directory. An empty dict is returned

1975 unless Android.

1976 """

1977 device_lib_path_candidates = set()

1978

1979 with open(prefix + '.maps') as maps_f:

1980 maps = proc_maps.ProcMaps.load(maps_f)

1981 for entry in maps:

1982 name = entry.as_dict()['name']

1983 if any([base_dir in name for base_dir in Command._DEVICE_LIB_BASEDIRS]):

1984 device_lib_path_candidates.add(os.path.dirname(name))

1985

1986 if len(device_lib_path_candidates) == 1:

1987 return {device_lib_path_candidates.pop(): os.path.join(

1988 CHROME_SRC_PATH, 'out', 'Debug', 'lib')}

1989 else:

1990 return {}

1991

1992 @staticmethod

1993 def _find_all_dumps(dump_path):

1994 prefix = Command._find_prefix(dump_path)

1995 dump_path_list = [dump_path]

1996

1997 n = int(dump_path[len(dump_path) - 9 : len(dump_path) - 5])

1998 n += 1

1999 skipped = 0

2000 while True:

2001 p = '%s.%04d.heap' % (prefix, n)

2002 if os.path.exists(p) and os.stat(p).st_size:

2003 dump_path_list.append(p)

2004 else:

2005 if skipped > 10:

2006 break

2007 skipped += 1

2008 n += 1

2009

2010 return dump_path_list

2011

2012 @staticmethod

2013 def _find_all_buckets(dump_path):

2014 prefix = Command._find_prefix(dump_path)

2015 bucket_path_list = []

2016

2017 n = 0

2018 while True:

2019 path = '%s.%04d.buckets' % (prefix, n)

2020 if not os.path.exists(path):

2021 if n > 10:

2022 break

2023 n += 1

2024 continue

2025 bucket_path_list.append(path)

2026 n += 1

2027

2028 return bucket_path_list

2029

2030 def _parse_args(self, sys_argv, required):

2031 options, args = self._parser.parse_args(sys_argv)

2032 if len(args) < required + 1:

2033 self._parser.error('needs %d argument(s).\n' % required)

2034 return None

2035 return (options, args)

2036

2037 @staticmethod

2038 def _parse_policy_list(options_policy):

2039 if options_policy:

2040 return options_policy.split(',')

2041 else:

2042 return None

2043

2044

2045 class BucketsCommand(Command):

2046 def __init__(self):

2047 super(BucketsCommand, self).__init__('Usage: %prog buckets <first-dump>')

2048

2049 def do(self, sys_argv, out=sys.stdout):

2050 _, args = self._parse_args(sys_argv, 1)

2051 dump_path = args[1]

2052 bucket_set = Command.load_basic_files(dump_path, True, True)

2053

2054 BucketsCommand._output(bucket_set, out)

2055 return 0

2056

2057 @staticmethod

2058 def _output(bucket_set, out):

2059 """Prints all buckets with resolving symbols.

2060

2061 Args:

2062 bucket_set: A BucketSet object.

2063 out: An IO object to output.

2064 """

2065 for bucket_id, bucket in sorted(bucket_set):

2066 out.write('%d: %s\n' % (bucket_id, bucket))

2067

2068

2069 class StacktraceCommand(Command):

2070 def __init__(self):

2071 super(StacktraceCommand, self).__init__(

2072 'Usage: %prog stacktrace <dump>')

2073

2074 def do(self, sys_argv):

2075 _, args = self._parse_args(sys_argv, 1)

2076 dump_path = args[1]

2077 (bucket_set, dump) = Command.load_basic_files(dump_path, False)

2078

2079 StacktraceCommand._output(dump, bucket_set, sys.stdout)

2080 return 0

2081

2082 @staticmethod

2083 def _output(dump, bucket_set, out):

2084 """Outputs a given stacktrace.

2085

2086 Args:

2087 bucket_set: A BucketSet object.

2088 out: A file object to output.

2089 """

2090 for line in dump.iter_stacktrace:

2091 words = line.split()

2092 bucket = bucket_set.get(int(words[BUCKET_ID]))

2093 if not bucket:

2094 continue

2095 for i in range(0, BUCKET_ID - 1):

2096 out.write(words[i] + ' ')

2097 for frame in bucket.symbolized_stackfunction:

2098 out.write(frame + ' ')

2099 out.write('\n')

2100

2101

2102 class PolicyCommands(Command):

2103 def __init__(self, command):

2104 super(PolicyCommands, self).__init__(

2105 'Usage: %%prog %s [-p POLICY] <first-dump> [shared-first-dumps...]' %

2106 command)

2107 self._parser.add_option('-p', '--policy', type='string', dest='policy',

2108 help='profile with POLICY', metavar='POLICY')

2109 self._parser.add_option('--alternative-dirs', dest='alternative_dirs',

2110 metavar='/path/on/target@/path/on/host[:...]',

2111 help='Read files in /path/on/host/ instead of '

2112 'files in /path/on/target/.')

2113

2114 def _set_up(self, sys_argv):

2115 options, args = self._parse_args(sys_argv, 1)

2116 dump_path = args[1]

2117 shared_first_dump_paths = args[2:]

2118 alternative_dirs_dict = {}

2119 if options.alternative_dirs:

2120 for alternative_dir_pair in options.alternative_dirs.split(':'):

2121 target_path, host_path = alternative_dir_pair.split('@', 1)

2122 alternative_dirs_dict[target_path] = host_path

2123 (bucket_set, dumps) = Command.load_basic_files(

2124 dump_path, True, alternative_dirs=alternative_dirs_dict)

2125

2126 pfn_counts_dict = {}

2127 for shared_first_dump_path in shared_first_dump_paths:

2128 shared_dumps = Command._find_all_dumps(shared_first_dump_path)

2129 for shared_dump in shared_dumps:

2130 pfn_counts = PFNCounts.load(shared_dump)

2131 if pfn_counts.pid not in pfn_counts_dict:

2132 pfn_counts_dict[pfn_counts.pid] = []

2133 pfn_counts_dict[pfn_counts.pid].append(pfn_counts)

2134

2135 policy_set = PolicySet.load(Command._parse_policy_list(options.policy))

2136 return policy_set, dumps, pfn_counts_dict, bucket_set

2137

2138 @staticmethod

2139 def _apply_policy(dump, pfn_counts_dict, policy, bucket_set, first_dump_time):

2140 """Aggregates the total memory size of each component.

2141

2142 Iterate through all stacktraces and attribute them to one of the components

2143 based on the policy. It is important to apply policy in right order.

2144

2145 Args:

2146 dump: A Dump object.

2147 pfn_counts_dict: A dict mapping a pid to a list of PFNCounts.

2148 policy: A Policy object.

2149 bucket_set: A BucketSet object.

2150 first_dump_time: An integer representing time when the first dump is

2151 dumped.

2152

2153 Returns:

2154 A dict mapping components and their corresponding sizes.

2155 """

2156 LOGGER.info(' %s' % dump.path)

2157 all_pfn_dict = {}

2158 if pfn_counts_dict:

2159 LOGGER.info(' shared with...')

2160 for pid, pfnset_list in pfn_counts_dict.iteritems():

2161 closest_pfnset_index = None

2162 closest_pfnset_difference = 1024.0

2163 for index, pfnset in enumerate(pfnset_list):

2164 time_difference = pfnset.time - dump.time

2165 if time_difference >= 3.0:

2166 break

2167 elif ((time_difference < 0.0 and pfnset.reason != 'Exiting') or

2168 (0.0 <= time_difference and time_difference < 3.0)):

2169 closest_pfnset_index = index

2170 closest_pfnset_difference = time_difference

2171 elif time_difference < 0.0 and pfnset.reason == 'Exiting':

2172 closest_pfnset_index = None

2173 break

2174 if closest_pfnset_index:

2175 for pfn, count in pfnset_list[closest_pfnset_index].iter_pfn:

2176 all_pfn_dict[pfn] = all_pfn_dict.get(pfn, 0) + count

2177 LOGGER.info(' %s (time difference = %f)' %

2178 (pfnset_list[closest_pfnset_index].path,

2179 closest_pfnset_difference))

2180 else:

2181 LOGGER.info(' (no match with pid:%d)' % pid)

2182

2183 sizes = dict((c, 0) for c in policy.components)

2184

2185 PolicyCommands._accumulate_malloc(dump, policy, bucket_set, sizes)

2186 verify_global_stats = PolicyCommands._accumulate_maps(

2187 dump, all_pfn_dict, policy, bucket_set, sizes)

2188

2189 # TODO(dmikurube): Remove the verifying code when GLOBAL_STATS is removed.

2190 # http://crbug.com/245603.

2191 for verify_key, verify_value in verify_global_stats.iteritems():

2192 dump_value = dump.global_stat('%s_committed' % verify_key)

2193 if dump_value != verify_value:

2194 LOGGER.warn('%25s: %12d != %d (%d)' % (

2195 verify_key, dump_value, verify_value, dump_value - verify_value))

2196

2197 sizes['mmap-no-log'] = (

2198 dump.global_stat('profiled-mmap_committed') -

2199 sizes['mmap-total-log'])

2200 sizes['mmap-total-record'] = dump.global_stat('profiled-mmap_committed')

2201 sizes['mmap-total-record-vm'] = dump.global_stat('profiled-mmap_virtual')

2202

2203 sizes['tc-no-log'] = (

2204 dump.global_stat('profiled-malloc_committed') -

2205 sizes['tc-total-log'])

2206 sizes['tc-total-record'] = dump.global_stat('profiled-malloc_committed')

2207 sizes['tc-unused'] = (

2208 sizes['mmap-tcmalloc'] -

2209 dump.global_stat('profiled-malloc_committed'))

2210 if sizes['tc-unused'] < 0:

2211 LOGGER.warn(' Assuming tc-unused=0 as it is negative: %d (bytes)' %

2212 sizes['tc-unused'])

2213 sizes['tc-unused'] = 0

2214 sizes['tc-total'] = sizes['mmap-tcmalloc']

2215

2216 # TODO(dmikurube): global_stat will be deprecated.

2217 # See http://crbug.com/245603.

2218 for key, value in {

2219 'total': 'total_committed',

2220 'filemapped': 'file_committed',

2221 'absent': 'absent_committed',

2222 'file-exec': 'file-exec_committed',

2223 'file-nonexec': 'file-nonexec_committed',

2224 'anonymous': 'anonymous_committed',

2225 'stack': 'stack_committed',

2226 'other': 'other_committed',

2227 'unhooked-absent': 'nonprofiled-absent_committed',

2228 'total-vm': 'total_virtual',

2229 'filemapped-vm': 'file_virtual',

2230 'anonymous-vm': 'anonymous_virtual',

2231 'other-vm': 'other_virtual' }.iteritems():

2232 if key in sizes:

2233 sizes[key] = dump.global_stat(value)

2234

2235 if 'mustbezero' in sizes:

2236 removed_list = (

2237 'profiled-mmap_committed',

2238 'nonprofiled-absent_committed',

2239 'nonprofiled-anonymous_committed',

2240 'nonprofiled-file-exec_committed',

2241 'nonprofiled-file-nonexec_committed',

2242 'nonprofiled-stack_committed',

2243 'nonprofiled-other_committed')

2244 sizes['mustbezero'] = (

2245 dump.global_stat('total_committed') -

2246 sum(dump.global_stat(removed) for removed in removed_list))

2247 if 'total-exclude-profiler' in sizes:

2248 sizes['total-exclude-profiler'] = (

2249 dump.global_stat('total_committed') -

2250 (sizes['mmap-profiler'] + sizes['mmap-type-profiler']))

2251 if 'hour' in sizes:

2252 sizes['hour'] = (dump.time - first_dump_time) / 60.0 / 60.0

2253 if 'minute' in sizes:

2254 sizes['minute'] = (dump.time - first_dump_time) / 60.0

2255 if 'second' in sizes:

2256 sizes['second'] = dump.time - first_dump_time

2257

2258 return sizes

2259

2260 @staticmethod

2261 def _accumulate_malloc(dump, policy, bucket_set, sizes):

2262 for line in dump.iter_stacktrace:

2263 words = line.split()

2264 bucket = bucket_set.get(int(words[BUCKET_ID]))

2265 if not bucket or bucket.allocator_type == 'malloc':

2266 component_match = policy.find_malloc(bucket)

2267 elif bucket.allocator_type == 'mmap':

2268 continue

2269 else:

2270 assert False

2271 sizes[component_match] += int(words[COMMITTED])

2272

2273 assert not component_match.startswith('mmap-')

2274 if component_match.startswith('tc-'):

2275 sizes['tc-total-log'] += int(words[COMMITTED])

2276 else:

2277 sizes['other-total-log'] += int(words[COMMITTED])

2278

2279 @staticmethod

2280 def _accumulate_maps(dump, pfn_dict, policy, bucket_set, sizes):

2281 # TODO(dmikurube): Remove the dict when GLOBAL_STATS is removed.

2282 # http://crbug.com/245603.

2283 global_stats = {

2284 'total': 0,

2285 'file-exec': 0,

2286 'file-nonexec': 0,

2287 'anonymous': 0,

2288 'stack': 0,

2289 'other': 0,

2290 'nonprofiled-file-exec': 0,

2291 'nonprofiled-file-nonexec': 0,

2292 'nonprofiled-anonymous': 0,

2293 'nonprofiled-stack': 0,

2294 'nonprofiled-other': 0,

2295 'profiled-mmap': 0,

2296 }

2297

2298 for key, value in dump.iter_map:

2299 # TODO(dmikurube): Remove the subtotal code when GLOBAL_STATS is removed.

2300 # It's temporary verification code for transition described in

2301 # http://crbug.com/245603.

2302 committed = 0

2303 if 'committed' in value[1]:

2304 committed = value[1]['committed']

2305 global_stats['total'] += committed

2306 key = 'other'

2307 name = value[1]['vma']['name']

2308 if name.startswith('/'):

2309 if value[1]['vma']['executable'] == 'x':

2310 key = 'file-exec'

2311 else:

2312 key = 'file-nonexec'

2313 elif name == '[stack]':

2314 key = 'stack'

2315 elif name == '':

2316 key = 'anonymous'

2317 global_stats[key] += committed

2318 if value[0] == 'unhooked':

2319 global_stats['nonprofiled-' + key] += committed

2320 if value[0] == 'hooked':

2321 global_stats['profiled-mmap'] += committed

2322

2323 if value[0] == 'unhooked':

2324 if pfn_dict and dump.pageframe_length:

2325 for pageframe in value[1]['pageframe']:

2326 component_match = policy.find_unhooked(value, pageframe, pfn_dict)

2327 sizes[component_match] += pageframe.size

2328 else:

2329 component_match = policy.find_unhooked(value)

2330 sizes[component_match] += int(value[1]['committed'])

2331 elif value[0] == 'hooked':

2332 if pfn_dict and dump.pageframe_length:

2333 for pageframe in value[1]['pageframe']:

2334 component_match, _ = policy.find_mmap(

2335 value, bucket_set, pageframe, pfn_dict)

2336 sizes[component_match] += pageframe.size

2337 assert not component_match.startswith('tc-')

2338 if component_match.startswith('mmap-'):

2339 sizes['mmap-total-log'] += pageframe.size

2340 else:

2341 sizes['other-total-log'] += pageframe.size

2342 else:

2343 component_match, _ = policy.find_mmap(value, bucket_set)

2344 sizes[component_match] += int(value[1]['committed'])

2345 if component_match.startswith('mmap-'):

2346 sizes['mmap-total-log'] += int(value[1]['committed'])

2347 else:

2348 sizes['other-total-log'] += int(value[1]['committed'])

2349 else:

2350 LOGGER.error('Unrecognized mapping status: %s' % value[0])

2351

2352 return global_stats

2353

2354

2355 class CSVCommand(PolicyCommands):

2356 def __init__(self):

2357 super(CSVCommand, self).__init__('csv')

2358

2359 def do(self, sys_argv):

2360 policy_set, dumps, pfn_counts_dict, bucket_set = self._set_up(sys_argv)

2361 return CSVCommand._output(

2362 policy_set, dumps, pfn_counts_dict, bucket_set, sys.stdout)

2363

2364 @staticmethod

2365 def _output(policy_set, dumps, pfn_counts_dict, bucket_set, out):

2366 max_components = 0

2367 for label in policy_set:

2368 max_components = max(max_components, len(policy_set[label].components))

2369

2370 for label in sorted(policy_set):

2371 components = policy_set[label].components

2372 if len(policy_set) > 1:

2373 out.write('%s%s\n' % (label, ',' * (max_components - 1)))

2374 out.write('%s%s\n' % (

2375 ','.join(components), ',' * (max_components - len(components))))

2376

2377 LOGGER.info('Applying a policy %s to...' % label)

2378 for dump in dumps:

2379 component_sizes = PolicyCommands._apply_policy(

2380 dump, pfn_counts_dict, policy_set[label], bucket_set, dumps[0].time)

2381 s = []

2382 for c in components:

2383 if c in ('hour', 'minute', 'second'):

2384 s.append('%05.5f' % (component_sizes[c]))

2385 else:

2386 s.append('%05.5f' % (component_sizes[c] / 1024.0 / 1024.0))

2387 out.write('%s%s\n' % (

2388 ','.join(s), ',' * (max_components - len(components))))

2389

2390 bucket_set.clear_component_cache()

2391

2392 return 0

2393

2394

2395 class JSONCommand(PolicyCommands):

2396 def __init__(self):

2397 super(JSONCommand, self).__init__('json')

2398

2399 def do(self, sys_argv):

2400 policy_set, dumps, pfn_counts_dict, bucket_set = self._set_up(sys_argv)

2401 return JSONCommand._output(

2402 policy_set, dumps, pfn_counts_dict, bucket_set, sys.stdout)

2403

2404 @staticmethod

2405 def _output(policy_set, dumps, pfn_counts_dict, bucket_set, out):

2406 json_base = {

2407 'version': 'JSON_DEEP_2',

2408 'policies': {},

2409 }

2410

2411 for label in sorted(policy_set):

2412 json_base['policies'][label] = {

2413 'legends': policy_set[label].components,

2414 'snapshots': [],

2415 }

2416

2417 LOGGER.info('Applying a policy %s to...' % label)

2418 for dump in dumps:

2419 component_sizes = PolicyCommands._apply_policy(

2420 dump, pfn_counts_dict, policy_set[label], bucket_set, dumps[0].time)

2421 component_sizes['dump_path'] = dump.path

2422 component_sizes['dump_time'] = datetime.datetime.fromtimestamp(

2423 dump.time).strftime('%Y-%m-%d %H:%M:%S')

2424 json_base['policies'][label]['snapshots'].append(component_sizes)

2425

2426 bucket_set.clear_component_cache()

2427

2428 json.dump(json_base, out, indent=2, sort_keys=True)

2429

2430 return 0

2431

2432

2433 class ListCommand(PolicyCommands):

2434 def __init__(self):

2435 super(ListCommand, self).__init__('list')

2436

2437 def do(self, sys_argv):

2438 policy_set, dumps, pfn_counts_dict, bucket_set = self._set_up(sys_argv)

2439 return ListCommand._output(

2440 policy_set, dumps, pfn_counts_dict, bucket_set, sys.stdout)

2441

2442 @staticmethod

2443 def _output(policy_set, dumps, pfn_counts_dict, bucket_set, out):

2444 for label in sorted(policy_set):

2445 LOGGER.info('Applying a policy %s to...' % label)

2446 for dump in dumps:

2447 component_sizes = PolicyCommands._apply_policy(

2448 dump, pfn_counts_dict, policy_set[label], bucket_set, dump.time)

2449 out.write('%s for %s:\n' % (label, dump.path))

2450 for c in policy_set[label].components:

2451 if c in ['hour', 'minute', 'second']:

2452 out.write('%40s %12.3f\n' % (c, component_sizes[c]))

2453 else:

2454 out.write('%40s %12d\n' % (c, component_sizes[c]))

2455

2456 bucket_set.clear_component_cache()

2457

2458 return 0

2459

2460

2461 class MapCommand(Command):

2462 def __init__(self):

2463 super(MapCommand, self).__init__('Usage: %prog map <first-dump> <policy>')

2464

2465 def do(self, sys_argv, out=sys.stdout):

2466 _, args = self._parse_args(sys_argv, 2)

2467 dump_path = args[1]

2468 target_policy = args[2]

2469 (bucket_set, dumps) = Command.load_basic_files(dump_path, True)

2470 policy_set = PolicySet.load(Command._parse_policy_list(target_policy))

2471

2472 MapCommand._output(dumps, bucket_set, policy_set[target_policy], out)

2473 return 0

2474

2475 @staticmethod

2476 def _output(dumps, bucket_set, policy, out):

2477 """Prints all stacktraces in a given component of given depth.

2478

2479 Args:

2480 dumps: A list of Dump objects.

2481 bucket_set: A BucketSet object.

2482 policy: A Policy object.

2483 out: An IO object to output.

2484 """

2485 max_dump_count = 0

2486 range_dict = ExclusiveRangeDict(ListAttribute)

2487 for dump in dumps:

2488 max_dump_count = max(max_dump_count, dump.count)

2489 for key, value in dump.iter_map:

2490 for begin, end, attr in range_dict.iter_range(key[0], key[1]):

2491 attr[dump.count] = value

2492

2493 max_dump_count_digit = len(str(max_dump_count))

2494 for begin, end, attr in range_dict.iter_range():

2495 out.write('%x-%x\n' % (begin, end))

2496 if len(attr) < max_dump_count:

2497 attr[max_dump_count] = None

2498 for index, value in enumerate(attr[1:]):

2499 out.write(' #%0*d: ' % (max_dump_count_digit, index + 1))

2500 if not value:

2501 out.write('None\n')

2502 elif value[0] == 'hooked':

2503 component_match, _ = policy.find_mmap(value, bucket_set)

2504 out.write('%s @ %d\n' % (component_match, value[1]['bucket_id']))

2505 else:

2506 component_match = policy.find_unhooked(value)

2507 region_info = value[1]

2508 size = region_info['committed']

2509 out.write('%s [%d bytes] %s%s%s%s %s\n' % (

2510 component_match, size, value[1]['vma']['readable'],

2511 value[1]['vma']['writable'], value[1]['vma']['executable'],

2512 value[1]['vma']['private'], value[1]['vma']['name']))

2513

2514

2515 class ExpandCommand(Command):

2516 def __init__(self):

2517 super(ExpandCommand, self).__init__(

2518 'Usage: %prog expand <dump> <policy> <component> <depth>')

2519

2520 def do(self, sys_argv):

2521 _, args = self._parse_args(sys_argv, 4)

2522 dump_path = args[1]

2523 target_policy = args[2]

2524 component_name = args[3]

2525 depth = args[4]

2526 (bucket_set, dump) = Command.load_basic_files(dump_path, False)

2527 policy_set = PolicySet.load(Command._parse_policy_list(target_policy))

2528

2529 ExpandCommand._output(dump, policy_set[target_policy], bucket_set,

2530 component_name, int(depth), sys.stdout)

2531 return 0

2532

2533 @staticmethod

2534 def _output(dump, policy, bucket_set, component_name, depth, out):

2535 """Prints all stacktraces in a given component of given depth.

2536

2537 Args:

2538 dump: A Dump object.

2539 policy: A Policy object.

2540 bucket_set: A BucketSet object.

2541 component_name: A name of component for filtering.

2542 depth: An integer representing depth to be printed.

2543 out: An IO object to output.

2544 """

2545 sizes = {}

2546

2547 ExpandCommand._accumulate(

2548 dump, policy, bucket_set, component_name, depth, sizes)

2549

2550 sorted_sizes_list = sorted(

2551 sizes.iteritems(), key=(lambda x: x[1]), reverse=True)

2552 total = 0

2553 # TODO(dmikurube): Better formatting.

2554 for size_pair in sorted_sizes_list:

2555 out.write('%10d %s\n' % (size_pair[1], size_pair[0]))

2556 total += size_pair[1]

2557 LOGGER.info('total: %d\n' % total)

2558

2559 @staticmethod

2560 def _add_size(precedence, bucket, depth, committed, sizes):

2561 stacktrace_sequence = precedence

2562 for function, sourcefile in zip(

2563 bucket.symbolized_stackfunction[

2564 0 : min(len(bucket.symbolized_stackfunction), 1 + depth)],

2565 bucket.symbolized_stacksourcefile[

2566 0 : min(len(bucket.symbolized_stacksourcefile), 1 + depth)]):

2567 stacktrace_sequence += '%s(@%s) ' % (function, sourcefile)

2568 if not stacktrace_sequence in sizes:

2569 sizes[stacktrace_sequence] = 0

2570 sizes[stacktrace_sequence] += committed

2571

2572 @staticmethod

2573 def _accumulate(dump, policy, bucket_set, component_name, depth, sizes):

2574 rule = policy.find_rule(component_name)

2575 if not rule:

2576 pass

2577 elif rule.allocator_type == 'malloc':

2578 for line in dump.iter_stacktrace:

2579 words = line.split()

2580 bucket = bucket_set.get(int(words[BUCKET_ID]))

2581 if not bucket or bucket.allocator_type == 'malloc':

2582 component_match = policy.find_malloc(bucket)

2583 elif bucket.allocator_type == 'mmap':

2584 continue

2585 else:

2586 assert False

2587 if component_match == component_name:

2588 precedence = ''

2589 precedence += '(alloc=%d) ' % int(words[ALLOC_COUNT])

2590 precedence += '(free=%d) ' % int(words[FREE_COUNT])

2591 if bucket.typeinfo:

2592 precedence += '(type=%s) ' % bucket.symbolized_typeinfo

2593 precedence += '(type.name=%s) ' % bucket.typeinfo_name

2594 ExpandCommand._add_size(precedence, bucket, depth,

2595 int(words[COMMITTED]), sizes)

2596 elif rule.allocator_type == 'mmap':

2597 for _, region in dump.iter_map:

2598 if region[0] != 'hooked':

2599 continue

2600 component_match, bucket = policy.find_mmap(region, bucket_set)

2601 if component_match == component_name:

2602 ExpandCommand._add_size('', bucket, depth,

2603 region[1]['committed'], sizes)

2604

2605

2606 class PProfCommand(Command):

2607 def __init__(self):

2608 super(PProfCommand, self).__init__(

2609 'Usage: %prog pprof [-c COMPONENT] <dump> <policy>')

2610 self._parser.add_option('-c', '--component', type='string',

2611 dest='component',

2612 help='restrict to COMPONENT', metavar='COMPONENT')

2613

2614 def do(self, sys_argv):

2615 options, args = self._parse_args(sys_argv, 2)

2616

2617 dump_path = args[1]

2618 target_policy = args[2]

2619 component = options.component

2620

2621 (bucket_set, dump) = Command.load_basic_files(dump_path, False)

2622 policy_set = PolicySet.load(Command._parse_policy_list(target_policy))

2623

2624 with open(Command._find_prefix(dump_path) + '.maps', 'r') as maps_f:

2625 maps_lines = maps_f.readlines()

2626 PProfCommand._output(

2627 dump, policy_set[target_policy], bucket_set, maps_lines, component,

2628 sys.stdout)

2629

2630 return 0

2631

2632 @staticmethod

2633 def _output(dump, policy, bucket_set, maps_lines, component_name, out):

2634 """Converts the heap profile dump so it can be processed by pprof.

2635

2636 Args:

2637 dump: A Dump object.

2638 policy: A Policy object.

2639 bucket_set: A BucketSet object.

2640 maps_lines: A list of strings containing /proc/.../maps.

2641 component_name: A name of component for filtering.

2642 out: An IO object to output.

2643 """

2644 out.write('heap profile: ')

2645 com_committed, com_allocs = PProfCommand._accumulate(

2646 dump, policy, bucket_set, component_name)

2647

2648 out.write('%6d: %8s [%6d: %8s] @ heapprofile\n' % (

2649 com_allocs, com_committed, com_allocs, com_committed))

2650

2651 PProfCommand._output_stacktrace_lines(

2652 dump, policy, bucket_set, component_name, out)

2653

2654 out.write('MAPPED_LIBRARIES:\n')

2655 for line in maps_lines:

2656 out.write(line)

2657

2658 @staticmethod

2659 def _accumulate(dump, policy, bucket_set, component_name):

2660 """Accumulates size of committed chunks and the number of allocated chunks.

2661

2662 Args:

2663 dump: A Dump object.

2664 policy: A Policy object.

2665 bucket_set: A BucketSet object.

2666 component_name: A name of component for filtering.

2667

2668 Returns:

2669 Two integers which are the accumulated size of committed regions and the

2670 number of allocated chunks, respectively.

2671 """

2672 com_committed = 0

2673 com_allocs = 0

2674

2675 for _, region in dump.iter_map:

2676 if region[0] != 'hooked':

2677 continue

2678 component_match, bucket = policy.find_mmap(region, bucket_set)

2679

2680 if (component_name and component_name != component_match) or (

2681 region[1]['committed'] == 0):

2682 continue

2683

2684 com_committed += region[1]['committed']

2685 com_allocs += 1

2686

2687 for line in dump.iter_stacktrace:

2688 words = line.split()

2689 bucket = bucket_set.get(int(words[BUCKET_ID]))

2690 if not bucket or bucket.allocator_type == 'malloc':

2691 component_match = policy.find_malloc(bucket)

2692 elif bucket.allocator_type == 'mmap':

2693 continue

2694 else:

2695 assert False

2696 if (not bucket or

2697 (component_name and component_name != component_match)):

2698 continue

2699

2700 com_committed += int(words[COMMITTED])

2701 com_allocs += int(words[ALLOC_COUNT]) - int(words[FREE_COUNT])

2702

2703 return com_committed, com_allocs

2704

2705 @staticmethod

2706 def _output_stacktrace_lines(dump, policy, bucket_set, component_name, out):

2707 """Prints information of stacktrace lines for pprof.

2708

2709 Args:

2710 dump: A Dump object.

2711 policy: A Policy object.

2712 bucket_set: A BucketSet object.

2713 component_name: A name of component for filtering.

2714 out: An IO object to output.

2715 """

2716 for _, region in dump.iter_map:

2717 if region[0] != 'hooked':

2718 continue

2719 component_match, bucket = policy.find_mmap(region, bucket_set)

2720

2721 if (component_name and component_name != component_match) or (

2722 region[1]['committed'] == 0):

2723 continue

2724

2725 out.write(' 1: %8s [ 1: %8s] @' % (

2726 region[1]['committed'], region[1]['committed']))

2727 for address in bucket.stacktrace:

2728 out.write(' 0x%016x' % address)

2729 out.write('\n')

2730

2731 for line in dump.iter_stacktrace:

2732 words = line.split()

2733 bucket = bucket_set.get(int(words[BUCKET_ID]))

2734 if not bucket or bucket.allocator_type == 'malloc':

2735 component_match = policy.find_malloc(bucket)

2736 elif bucket.allocator_type == 'mmap':

2737 continue

2738 else:

2739 assert False

2740 if (not bucket or

2741 (component_name and component_name != component_match)):

2742 continue

2743

2744 out.write('%6d: %8s [%6d: %8s] @' % (

2745 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]),

2746 words[COMMITTED],

2747 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]),

2748 words[COMMITTED]))

2749 for address in bucket.stacktrace:

2750 out.write(' 0x%016x' % address)

2751 out.write('\n')

2752

2753

2754 class UploadCommand(Command):

2755 def __init__(self):

2756 super(UploadCommand, self).__init__(

2757 'Usage: %prog upload [--gsutil path/to/gsutil] '

2758 '<first-dump> <destination-gs-path>')

2759 self._parser.add_option('--gsutil', default='gsutil',

2760 help='path to GSUTIL', metavar='GSUTIL')

2761

2762 def do(self, sys_argv):

2763 options, args = self._parse_args(sys_argv, 2)

2764 dump_path = args[1]

2765 gs_path = args[2]

2766

2767 dump_files = Command._find_all_dumps(dump_path)

2768 bucket_files = Command._find_all_buckets(dump_path)

2769 prefix = Command._find_prefix(dump_path)

2770 symbol_data_sources = SymbolDataSources(prefix)

2771 symbol_data_sources.prepare()

2772 symbol_path = symbol_data_sources.path()

2773

2774 handle_zip, filename_zip = tempfile.mkstemp('.zip', 'dmprof')

2775 os.close(handle_zip)

2776

2777 try:

2778 file_zip = zipfile.ZipFile(filename_zip, 'w', zipfile.ZIP_DEFLATED)

2779 for filename in dump_files:

2780 file_zip.write(filename, os.path.basename(os.path.abspath(filename)))

2781 for filename in bucket_files:

2782 file_zip.write(filename, os.path.basename(os.path.abspath(filename)))

2783

2784 symbol_basename = os.path.basename(os.path.abspath(symbol_path))

2785 for filename in os.listdir(symbol_path):

2786 if not filename.startswith('.'):

2787 file_zip.write(os.path.join(symbol_path, filename),

2788 os.path.join(symbol_basename, os.path.basename(

2789 os.path.abspath(filename))))

2790 file_zip.close()

2791

2792 returncode = UploadCommand._run_gsutil(

2793 options.gsutil, 'cp', '-a', 'public-read', filename_zip, gs_path)

2794 finally:

2795 os.remove(filename_zip)

2796

2797 return returncode

2798

2799 @staticmethod

2800 def _run_gsutil(gsutil, *args):

2801 """Run gsutil as a subprocess.

2802

2803 Args:

2804 *args: Arguments to pass to gsutil. The first argument should be an

2805 operation such as ls, cp or cat.

2806 Returns:

2807 The return code from the process.

2808 """

2809 command = [gsutil] + list(args)

2810 LOGGER.info("Running: %s", command)

2811

2812 try:

2813 return subprocess.call(command)

2814 except OSError, e:

2815 LOGGER.error('Error to run gsutil: %s', e)

2816

2817

2818 class CatCommand(Command):

2819 def __init__(self):

2820 super(CatCommand, self).__init__('Usage: %prog cat <first-dump>')

2821 self._parser.add_option('--alternative-dirs', dest='alternative_dirs',

2822 metavar='/path/on/target@/path/on/host[:...]',

2823 help='Read files in /path/on/host/ instead of '

2824 'files in /path/on/target/.')

2825 self._parser.add_option('--indent', dest='indent', action='store_true',

2826 help='Indent the output.')

2827

2828 def do(self, sys_argv):

2829 options, args = self._parse_args(sys_argv, 1)

2830 dump_path = args[1]

2831 # TODO(dmikurube): Support shared memory.

2832 alternative_dirs_dict = {}

2833 if options.alternative_dirs:

2834 for alternative_dir_pair in options.alternative_dirs.split(':'):

2835 target_path, host_path = alternative_dir_pair.split('@', 1)

2836 alternative_dirs_dict[target_path] = host_path

2837 (bucket_set, dumps) = Command.load_basic_files(

2838 dump_path, True, alternative_dirs=alternative_dirs_dict)

2839

2840 json_root = OrderedDict()

2841 json_root['version'] = 1

2842 json_root['run_id'] = None

2843 for dump in dumps:

2844 if json_root['run_id'] and json_root['run_id'] != dump.run_id:

2845 LOGGER.error('Inconsistent heap profile dumps.')

2846 json_root['run_id'] = ''

2847 break

2848 json_root['run_id'] = dump.run_id

2849 json_root['snapshots'] = []

2850

2851 # Load all sorters.

2852 sorters = SorterSet()

2853

2854 for dump in dumps:

2855 json_root['snapshots'].append(

2856 self._fill_snapshot(dump, bucket_set, sorters))

2857

2858 if options.indent:

2859 json.dump(json_root, sys.stdout, indent=2)

2860 else:

2861 json.dump(json_root, sys.stdout)

2862 print ''

2863

2864 @staticmethod

2865 def _fill_snapshot(dump, bucket_set, sorters):

2866 root = OrderedDict()

2867 root['time'] = dump.time

2868 root['worlds'] = OrderedDict()

2869 root['worlds']['vm'] = CatCommand._fill_world(

2870 dump, bucket_set, sorters, 'vm')

2871 root['worlds']['malloc'] = CatCommand._fill_world(

2872 dump, bucket_set, sorters, 'malloc')

2873 return root

2874

2875 @staticmethod

2876 def _fill_world(dump, bucket_set, sorters, world):

2877 root = OrderedDict()

2878

2879 root['name'] = 'world'

2880 if world == 'vm':

2881 root['unit_fields'] = ['committed', 'reserved']

2882 elif world == 'malloc':

2883 root['unit_fields'] = ['size', 'alloc_count', 'free_count']

2884

2885 # Make { vm \| malloc } units with their sizes.

2886 root['units'] = OrderedDict()

2887 unit_set = UnitSet(world)

2888 if world == 'vm':

2889 for unit in CatCommand._iterate_vm_unit(dump, None, bucket_set):

2890 unit_set.append(unit)

2891 for unit in unit_set:

2892 root['units'][unit.unit_id] = [unit.committed, unit.reserved]

2893 elif world == 'malloc':

2894 for unit in CatCommand._iterate_malloc_unit(dump, bucket_set):

2895 unit_set.append(unit)

2896 for unit in unit_set:

2897 root['units'][unit.unit_id] = [

2898 unit.size, unit.alloc_count, unit.free_count]

2899

2900 # Iterate for { vm \| malloc } sorters.

2901 root['breakdown'] = OrderedDict()

2902 for sorter in sorters.iter_world(world):

2903 breakdown = OrderedDict()

2904 for unit in unit_set:

2905 found = sorter.find(unit)

2906 if found.name not in breakdown:

2907 category = OrderedDict()

2908 category['name'] = found.name

2909 category['color'] = 'random'

2910 subworlds = {}

2911 for subworld in found.iter_subworld():

2912 subworlds[subworld] = False

2913 if subworlds:

2914 category['subworlds'] = subworlds

2915 if found.hidden:

2916 category['hidden'] = True

2917 category['units'] = []

2918 breakdown[found.name] = category

2919 breakdown[found.name]['units'].append(unit.unit_id)

2920 root['breakdown'][sorter.name] = breakdown

2921

2922 return root

2923

2924 @staticmethod

2925 def _iterate_vm_unit(dump, pfn_dict, bucket_set):

2926 unit_id = 0

2927 for _, region in dump.iter_map:

2928 unit_id += 1

2929 if region[0] == 'unhooked':

2930 if pfn_dict and dump.pageframe_length:

2931 for pageframe in region[1]['pageframe']:

2932 yield UnhookedUnit(unit_id, pageframe.size, pageframe.size,

2933 region, pageframe, pfn_dict)

2934 else:

2935 yield UnhookedUnit(unit_id,

2936 int(region[1]['committed']),

2937 int(region[1]['reserved']),

2938 region)

2939 elif region[0] == 'hooked':

2940 if pfn_dict and dump.pageframe_length:

2941 for pageframe in region[1]['pageframe']:

2942 yield MMapUnit(unit_id,

2943 pageframe.size,

2944 pageframe.size,

2945 region, bucket_set, pageframe, pfn_dict)

2946 else:

2947 yield MMapUnit(unit_id,

2948 int(region[1]['committed']),

2949 int(region[1]['reserved']),

2950 region,

2951 bucket_set)

2952 else:

2953 LOGGER.error('Unrecognized mapping status: %s' % region[0])

2954

2955 @staticmethod

2956 def _iterate_malloc_unit(dump, bucket_set):

2957 for line in dump.iter_stacktrace:

2958 words = line.split()

2959 bucket = bucket_set.get(int(words[BUCKET_ID]))

2960 if bucket and bucket.allocator_type == 'malloc':

2961 yield MallocUnit(int(words[BUCKET_ID]),

2962 int(words[COMMITTED]),

2963 int(words[ALLOC_COUNT]),

2964 int(words[FREE_COUNT]),

2965 bucket)

2966 elif not bucket:

2967 # 'Not-found' buckets are all assumed as malloc buckets.

2968 yield MallocUnit(int(words[BUCKET_ID]),

2969 int(words[COMMITTED]),

2970 int(words[ALLOC_COUNT]),

2971 int(words[FREE_COUNT]),

2972 None)

2973	18

2974	19

2975 def main():	20 def main():

2976 COMMANDS = {	21 COMMANDS = {

2977 'buckets': BucketsCommand,	22 'buckets': subcommands.BucketsCommand,

2978 'cat': CatCommand,	23 'cat': subcommands.CatCommand,

2979 'csv': CSVCommand,	24 'csv': subcommands.CSVCommand,

2980 'expand': ExpandCommand,	25 'expand': subcommands.ExpandCommand,

2981 'json': JSONCommand,	26 'json': subcommands.JSONCommand,

2982 'list': ListCommand,	27 'list': subcommands.ListCommand,

2983 'map': MapCommand,	28 'map': subcommands.MapCommand,

2984 'pprof': PProfCommand,	29 'pprof': subcommands.PProfCommand,

2985 'stacktrace': StacktraceCommand,	30 'stacktrace': subcommands.StacktraceCommand,

2986 'upload': UploadCommand,	31 'upload': subcommands.UploadCommand,

2987 }	32 }

2988	33

2989 if len(sys.argv) < 2 or (not sys.argv[1] in COMMANDS):	34 if len(sys.argv) < 2 or (not sys.argv[1] in COMMANDS):

2990 sys.stderr.write("""Usage: dmprof <command> [options] [<args>]	35 sys.stderr.write("""Usage: dmprof <command> [options] [<args>]

2991	36

2992 Commands:	37 Commands:

2993 buckets Dump a bucket list with resolving symbols	38 buckets Dump a bucket list with resolving symbols

2994 cat Categorize memory usage (under development)	39 cat Categorize memory usage (under development)

2995 csv Classify memory usage in CSV	40 csv Classify memory usage in CSV

2996 expand Show all stacktraces contained in the specified component	41 expand Show all stacktraces contained in the specified component

(...skipping 30 matching lines...) Expand all Loading...
3027 errorcode = COMMANDS[action]().do(sys.argv)	72 errorcode = COMMANDS[action]().do(sys.argv)

3028 except ParsingException, e:	73 except ParsingException, e:

3029 errorcode = 1	74 errorcode = 1

3030 sys.stderr.write('Exit by parsing error: %s\n' % e)	75 sys.stderr.write('Exit by parsing error: %s\n' % e)

3031	76

3032 return errorcode	77 return errorcode

3033	78

3034	79

3035 if __name__ == '__main__':	80 if __name__ == '__main__':

3036 sys.exit(main())	81 sys.exit(main())

OLD	NEW

« no previous file with comments | « no previous file | tools/deep_memory_profiler/lib/__init__.py » ('j') | tools/deep_memory_profiler/lib/bucket.py » ('J')