| OLD | NEW |
| 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 """The deep heap profiler script for Chrome.""" | 5 """The Deep Memory Profiler analyzer script. |
| 6 | 6 |
| 7 import copy | 7 See http://dev.chromium.org/developers/deep-memory-profiler for details. |
| 8 import cStringIO | 8 """ |
| 9 import datetime | 9 |
| 10 import json | |
| 11 import logging | 10 import logging |
| 12 import optparse | |
| 13 import os | |
| 14 import re | |
| 15 import struct | |
| 16 import subprocess | |
| 17 import sys | 11 import sys |
| 18 import tempfile | |
| 19 import time | |
| 20 import zipfile | |
| 21 | 12 |
| 22 try: | 13 from lib.exception import ParsingException |
| 23 from collections import OrderedDict # pylint: disable=E0611 | 14 import subcommands |
| 24 except ImportError: | |
| 25 # TODO(dmikurube): Remove this once Python 2.7 is required. | |
| 26 BASE_PATH = os.path.dirname(os.path.abspath(__file__)) | |
| 27 SIMPLEJSON_PATH = os.path.join(BASE_PATH, os.pardir, os.pardir, 'third_party') | |
| 28 sys.path.insert(0, SIMPLEJSON_PATH) | |
| 29 from simplejson import OrderedDict | |
| 30 | 15 |
| 31 from range_dict import ExclusiveRangeDict | |
| 32 | |
| 33 BASE_PATH = os.path.dirname(os.path.abspath(__file__)) | |
| 34 FIND_RUNTIME_SYMBOLS_PATH = os.path.join( | |
| 35 BASE_PATH, os.pardir, 'find_runtime_symbols') | |
| 36 sys.path.append(FIND_RUNTIME_SYMBOLS_PATH) | |
| 37 | |
| 38 import find_runtime_symbols | |
| 39 import prepare_symbol_info | |
| 40 import proc_maps | |
| 41 | |
| 42 from find_runtime_symbols import FUNCTION_SYMBOLS | |
| 43 from find_runtime_symbols import SOURCEFILE_SYMBOLS | |
| 44 from find_runtime_symbols import TYPEINFO_SYMBOLS | |
| 45 | |
| 46 BUCKET_ID = 5 | |
| 47 VIRTUAL = 0 | |
| 48 COMMITTED = 1 | |
| 49 ALLOC_COUNT = 2 | |
| 50 FREE_COUNT = 3 | |
| 51 NULL_REGEX = re.compile('') | |
| 52 | 16 |
| 53 LOGGER = logging.getLogger('dmprof') | 17 LOGGER = logging.getLogger('dmprof') |
| 54 POLICIES_JSON_PATH = os.path.join(BASE_PATH, 'policies.json') | |
| 55 CHROME_SRC_PATH = os.path.join(BASE_PATH, os.pardir, os.pardir) | |
| 56 | |
| 57 DEFAULT_SORTERS = [ | |
| 58 os.path.join(BASE_PATH, 'sorter.malloc-component.json'), | |
| 59 os.path.join(BASE_PATH, 'sorter.malloc-type.json'), | |
| 60 os.path.join(BASE_PATH, 'sorter.vm-map.json'), | |
| 61 os.path.join(BASE_PATH, 'sorter.vm-sharing.json'), | |
| 62 ] | |
| 63 | |
| 64 | |
| 65 # Heap Profile Dump versions | |
| 66 | |
| 67 # DUMP_DEEP_[1-4] are obsolete. | |
| 68 # DUMP_DEEP_2+ distinct mmap regions and malloc chunks. | |
| 69 # DUMP_DEEP_3+ don't include allocation functions in their stack dumps. | |
| 70 # DUMP_DEEP_4+ support comments with '#' and global stats "nonprofiled-*". | |
| 71 # DUMP_DEEP_[1-2] should be processed by POLICY_DEEP_1. | |
| 72 # DUMP_DEEP_[3-4] should be processed by POLICY_DEEP_2 or POLICY_DEEP_3. | |
| 73 DUMP_DEEP_1 = 'DUMP_DEEP_1' | |
| 74 DUMP_DEEP_2 = 'DUMP_DEEP_2' | |
| 75 DUMP_DEEP_3 = 'DUMP_DEEP_3' | |
| 76 DUMP_DEEP_4 = 'DUMP_DEEP_4' | |
| 77 | |
| 78 DUMP_DEEP_OBSOLETE = (DUMP_DEEP_1, DUMP_DEEP_2, DUMP_DEEP_3, DUMP_DEEP_4) | |
| 79 | |
| 80 # DUMP_DEEP_5 doesn't separate sections for malloc and mmap. | |
| 81 # malloc and mmap are identified in bucket files. | |
| 82 # DUMP_DEEP_5 should be processed by POLICY_DEEP_4. | |
| 83 DUMP_DEEP_5 = 'DUMP_DEEP_5' | |
| 84 | |
| 85 # DUMP_DEEP_6 adds a mmap list to DUMP_DEEP_5. | |
| 86 DUMP_DEEP_6 = 'DUMP_DEEP_6' | |
| 87 | |
| 88 # Heap Profile Policy versions | |
| 89 | |
| 90 # POLICY_DEEP_1 DOES NOT include allocation_type columns. | |
| 91 # mmap regions are distincted w/ mmap frames in the pattern column. | |
| 92 POLICY_DEEP_1 = 'POLICY_DEEP_1' | |
| 93 | |
| 94 # POLICY_DEEP_2 DOES include allocation_type columns. | |
| 95 # mmap regions are distincted w/ the allocation_type column. | |
| 96 POLICY_DEEP_2 = 'POLICY_DEEP_2' | |
| 97 | |
| 98 # POLICY_DEEP_3 is in JSON format. | |
| 99 POLICY_DEEP_3 = 'POLICY_DEEP_3' | |
| 100 | |
| 101 # POLICY_DEEP_3 contains typeinfo. | |
| 102 POLICY_DEEP_4 = 'POLICY_DEEP_4' | |
| 103 | |
| 104 | |
| 105 class EmptyDumpException(Exception): | |
| 106 def __init__(self, value=''): | |
| 107 super(EmptyDumpException, self).__init__() | |
| 108 self.value = value | |
| 109 def __str__(self): | |
| 110 return repr(self.value) | |
| 111 | |
| 112 | |
| 113 class ParsingException(Exception): | |
| 114 def __init__(self, value=''): | |
| 115 super(ParsingException, self).__init__() | |
| 116 self.value = value | |
| 117 def __str__(self): | |
| 118 return repr(self.value) | |
| 119 | |
| 120 | |
| 121 class InvalidDumpException(ParsingException): | |
| 122 def __init__(self, value): | |
| 123 super(InvalidDumpException, self).__init__() | |
| 124 self.value = value | |
| 125 def __str__(self): | |
| 126 return "invalid heap profile dump: %s" % repr(self.value) | |
| 127 | |
| 128 | |
| 129 class ObsoleteDumpVersionException(ParsingException): | |
| 130 def __init__(self, value): | |
| 131 super(ObsoleteDumpVersionException, self).__init__() | |
| 132 self.value = value | |
| 133 def __str__(self): | |
| 134 return "obsolete heap profile dump version: %s" % repr(self.value) | |
| 135 | |
| 136 | |
| 137 class ListAttribute(ExclusiveRangeDict.RangeAttribute): | |
| 138 """Represents a list for an attribute in range_dict.ExclusiveRangeDict.""" | |
| 139 def __init__(self): | |
| 140 super(ListAttribute, self).__init__() | |
| 141 self._list = [] | |
| 142 | |
| 143 def __str__(self): | |
| 144 return str(self._list) | |
| 145 | |
| 146 def __repr__(self): | |
| 147 return 'ListAttribute' + str(self._list) | |
| 148 | |
| 149 def __len__(self): | |
| 150 return len(self._list) | |
| 151 | |
| 152 def __iter__(self): | |
| 153 for x in self._list: | |
| 154 yield x | |
| 155 | |
| 156 def __getitem__(self, index): | |
| 157 return self._list[index] | |
| 158 | |
| 159 def __setitem__(self, index, value): | |
| 160 if index >= len(self._list): | |
| 161 self._list.extend([None] * (index + 1 - len(self._list))) | |
| 162 self._list[index] = value | |
| 163 | |
| 164 def copy(self): | |
| 165 new_list = ListAttribute() | |
| 166 for index, item in enumerate(self._list): | |
| 167 new_list[index] = copy.deepcopy(item) | |
| 168 return new_list | |
| 169 | |
| 170 | |
| 171 class ProcMapsEntryAttribute(ExclusiveRangeDict.RangeAttribute): | |
| 172 """Represents an entry of /proc/maps in range_dict.ExclusiveRangeDict.""" | |
| 173 _DUMMY_ENTRY = proc_maps.ProcMapsEntry( | |
| 174 0, # begin | |
| 175 0, # end | |
| 176 '-', # readable | |
| 177 '-', # writable | |
| 178 '-', # executable | |
| 179 '-', # private | |
| 180 0, # offset | |
| 181 '00', # major | |
| 182 '00', # minor | |
| 183 0, # inode | |
| 184 '' # name | |
| 185 ) | |
| 186 | |
| 187 def __init__(self): | |
| 188 super(ProcMapsEntryAttribute, self).__init__() | |
| 189 self._entry = self._DUMMY_ENTRY.as_dict() | |
| 190 | |
| 191 def __str__(self): | |
| 192 return str(self._entry) | |
| 193 | |
| 194 def __repr__(self): | |
| 195 return 'ProcMapsEntryAttribute' + str(self._entry) | |
| 196 | |
| 197 def __getitem__(self, key): | |
| 198 return self._entry[key] | |
| 199 | |
| 200 def __setitem__(self, key, value): | |
| 201 if key not in self._entry: | |
| 202 raise KeyError(key) | |
| 203 self._entry[key] = value | |
| 204 | |
| 205 def copy(self): | |
| 206 new_entry = ProcMapsEntryAttribute() | |
| 207 for key, value in self._entry.iteritems(): | |
| 208 new_entry[key] = copy.deepcopy(value) | |
| 209 return new_entry | |
| 210 | |
| 211 | |
| 212 def skip_while(index, max_index, skipping_condition): | |
| 213 """Increments |index| until |skipping_condition|(|index|) is False. | |
| 214 | |
| 215 Returns: | |
| 216 A pair of an integer indicating a line number after skipped, and a | |
| 217 boolean value which is True if found a line which skipping_condition | |
| 218 is False for. | |
| 219 """ | |
| 220 while skipping_condition(index): | |
| 221 index += 1 | |
| 222 if index >= max_index: | |
| 223 return index, False | |
| 224 return index, True | |
| 225 | |
| 226 | |
| 227 class SymbolDataSources(object): | |
| 228 """Manages symbol data sources in a process. | |
| 229 | |
| 230 The symbol data sources consist of maps (/proc/<pid>/maps), nm, readelf and | |
| 231 so on. They are collected into a directory '|prefix|.symmap' from the binary | |
| 232 files by 'prepare()' with tools/find_runtime_symbols/prepare_symbol_info.py. | |
| 233 | |
| 234 Binaries are not mandatory to profile. The prepared data sources work in | |
| 235 place of the binary even if the binary has been overwritten with another | |
| 236 binary. | |
| 237 | |
| 238 Note that loading the symbol data sources takes a long time. They are often | |
| 239 very big. So, the 'dmprof' profiler is designed to use 'SymbolMappingCache' | |
| 240 which caches actually used symbols. | |
| 241 """ | |
| 242 def __init__(self, prefix, alternative_dirs=None): | |
| 243 self._prefix = prefix | |
| 244 self._prepared_symbol_data_sources_path = None | |
| 245 self._loaded_symbol_data_sources = None | |
| 246 self._alternative_dirs = alternative_dirs or {} | |
| 247 | |
| 248 def prepare(self): | |
| 249 """Prepares symbol data sources by extracting mapping from a binary. | |
| 250 | |
| 251 The prepared symbol data sources are stored in a directory. The directory | |
| 252 name is stored in |self._prepared_symbol_data_sources_path|. | |
| 253 | |
| 254 Returns: | |
| 255 True if succeeded. | |
| 256 """ | |
| 257 LOGGER.info('Preparing symbol mapping...') | |
| 258 self._prepared_symbol_data_sources_path, used_tempdir = ( | |
| 259 prepare_symbol_info.prepare_symbol_info( | |
| 260 self._prefix + '.maps', | |
| 261 output_dir_path=self._prefix + '.symmap', | |
| 262 alternative_dirs=self._alternative_dirs, | |
| 263 use_tempdir=True, | |
| 264 use_source_file_name=True)) | |
| 265 if self._prepared_symbol_data_sources_path: | |
| 266 LOGGER.info(' Prepared symbol mapping.') | |
| 267 if used_tempdir: | |
| 268 LOGGER.warn(' Using a temporary directory for symbol mapping.') | |
| 269 LOGGER.warn(' Delete it by yourself.') | |
| 270 LOGGER.warn(' Or, move the directory by yourself to use it later.') | |
| 271 return True | |
| 272 else: | |
| 273 LOGGER.warn(' Failed to prepare symbol mapping.') | |
| 274 return False | |
| 275 | |
| 276 def get(self): | |
| 277 """Returns the prepared symbol data sources. | |
| 278 | |
| 279 Returns: | |
| 280 The prepared symbol data sources. None if failed. | |
| 281 """ | |
| 282 if not self._prepared_symbol_data_sources_path and not self.prepare(): | |
| 283 return None | |
| 284 if not self._loaded_symbol_data_sources: | |
| 285 LOGGER.info('Loading symbol mapping...') | |
| 286 self._loaded_symbol_data_sources = ( | |
| 287 find_runtime_symbols.RuntimeSymbolsInProcess.load( | |
| 288 self._prepared_symbol_data_sources_path)) | |
| 289 return self._loaded_symbol_data_sources | |
| 290 | |
| 291 def path(self): | |
| 292 """Returns the path of the prepared symbol data sources if possible.""" | |
| 293 if not self._prepared_symbol_data_sources_path and not self.prepare(): | |
| 294 return None | |
| 295 return self._prepared_symbol_data_sources_path | |
| 296 | |
| 297 | |
| 298 class SymbolFinder(object): | |
| 299 """Finds corresponding symbols from addresses. | |
| 300 | |
| 301 This class does only 'find()' symbols from a specified |address_list|. | |
| 302 It is introduced to make a finder mockable. | |
| 303 """ | |
| 304 def __init__(self, symbol_type, symbol_data_sources): | |
| 305 self._symbol_type = symbol_type | |
| 306 self._symbol_data_sources = symbol_data_sources | |
| 307 | |
| 308 def find(self, address_list): | |
| 309 return find_runtime_symbols.find_runtime_symbols( | |
| 310 self._symbol_type, self._symbol_data_sources.get(), address_list) | |
| 311 | |
| 312 | |
| 313 class SymbolMappingCache(object): | |
| 314 """Caches mapping from actually used addresses to symbols. | |
| 315 | |
| 316 'update()' updates the cache from the original symbol data sources via | |
| 317 'SymbolFinder'. Symbols can be looked up by the method 'lookup()'. | |
| 318 """ | |
| 319 def __init__(self): | |
| 320 self._symbol_mapping_caches = { | |
| 321 FUNCTION_SYMBOLS: {}, | |
| 322 SOURCEFILE_SYMBOLS: {}, | |
| 323 TYPEINFO_SYMBOLS: {}, | |
| 324 } | |
| 325 | |
| 326 def update(self, symbol_type, bucket_set, symbol_finder, cache_f): | |
| 327 """Updates symbol mapping cache on memory and in a symbol cache file. | |
| 328 | |
| 329 It reads cached symbol mapping from a symbol cache file |cache_f| if it | |
| 330 exists. Unresolved addresses are then resolved and added to the cache | |
| 331 both on memory and in the symbol cache file with using 'SymbolFinder'. | |
| 332 | |
| 333 A cache file is formatted as follows: | |
| 334 <Address> <Symbol> | |
| 335 <Address> <Symbol> | |
| 336 <Address> <Symbol> | |
| 337 ... | |
| 338 | |
| 339 Args: | |
| 340 symbol_type: A type of symbols to update. It should be one of | |
| 341 FUNCTION_SYMBOLS, SOURCEFILE_SYMBOLS and TYPEINFO_SYMBOLS. | |
| 342 bucket_set: A BucketSet object. | |
| 343 symbol_finder: A SymbolFinder object to find symbols. | |
| 344 cache_f: A readable and writable IO object of the symbol cache file. | |
| 345 """ | |
| 346 cache_f.seek(0, os.SEEK_SET) | |
| 347 self._load(cache_f, symbol_type) | |
| 348 | |
| 349 unresolved_addresses = sorted( | |
| 350 address for address in bucket_set.iter_addresses(symbol_type) | |
| 351 if address not in self._symbol_mapping_caches[symbol_type]) | |
| 352 | |
| 353 if not unresolved_addresses: | |
| 354 LOGGER.info('No need to resolve any more addresses.') | |
| 355 return | |
| 356 | |
| 357 cache_f.seek(0, os.SEEK_END) | |
| 358 LOGGER.info('Loading %d unresolved addresses.' % | |
| 359 len(unresolved_addresses)) | |
| 360 symbol_dict = symbol_finder.find(unresolved_addresses) | |
| 361 | |
| 362 for address, symbol in symbol_dict.iteritems(): | |
| 363 stripped_symbol = symbol.strip() or '?' | |
| 364 self._symbol_mapping_caches[symbol_type][address] = stripped_symbol | |
| 365 cache_f.write('%x %s\n' % (address, stripped_symbol)) | |
| 366 | |
| 367 def lookup(self, symbol_type, address): | |
| 368 """Looks up a symbol for a given |address|. | |
| 369 | |
| 370 Args: | |
| 371 symbol_type: A type of symbols to update. It should be one of | |
| 372 FUNCTION_SYMBOLS, SOURCEFILE_SYMBOLS and TYPEINFO_SYMBOLS. | |
| 373 address: An integer that represents an address. | |
| 374 | |
| 375 Returns: | |
| 376 A string that represents a symbol. | |
| 377 """ | |
| 378 return self._symbol_mapping_caches[symbol_type].get(address) | |
| 379 | |
| 380 def _load(self, cache_f, symbol_type): | |
| 381 try: | |
| 382 for line in cache_f: | |
| 383 items = line.rstrip().split(None, 1) | |
| 384 if len(items) == 1: | |
| 385 items.append('??') | |
| 386 self._symbol_mapping_caches[symbol_type][int(items[0], 16)] = items[1] | |
| 387 LOGGER.info('Loaded %d entries from symbol cache.' % | |
| 388 len(self._symbol_mapping_caches[symbol_type])) | |
| 389 except IOError as e: | |
| 390 LOGGER.info('The symbol cache file is invalid: %s' % e) | |
| 391 | |
| 392 | |
| 393 class Rule(object): | |
| 394 """Represents one matching rule in a policy file.""" | |
| 395 | |
| 396 def __init__(self, | |
| 397 name, | |
| 398 allocator_type, | |
| 399 stackfunction_pattern=None, | |
| 400 stacksourcefile_pattern=None, | |
| 401 typeinfo_pattern=None, | |
| 402 mappedpathname_pattern=None, | |
| 403 mappedpermission_pattern=None, | |
| 404 sharedwith=None): | |
| 405 self._name = name | |
| 406 self._allocator_type = allocator_type | |
| 407 | |
| 408 self._stackfunction_pattern = None | |
| 409 if stackfunction_pattern: | |
| 410 self._stackfunction_pattern = re.compile( | |
| 411 stackfunction_pattern + r'\Z') | |
| 412 | |
| 413 self._stacksourcefile_pattern = None | |
| 414 if stacksourcefile_pattern: | |
| 415 self._stacksourcefile_pattern = re.compile( | |
| 416 stacksourcefile_pattern + r'\Z') | |
| 417 | |
| 418 self._typeinfo_pattern = None | |
| 419 if typeinfo_pattern: | |
| 420 self._typeinfo_pattern = re.compile(typeinfo_pattern + r'\Z') | |
| 421 | |
| 422 self._mappedpathname_pattern = None | |
| 423 if mappedpathname_pattern: | |
| 424 self._mappedpathname_pattern = re.compile(mappedpathname_pattern + r'\Z') | |
| 425 | |
| 426 self._mappedpermission_pattern = None | |
| 427 if mappedpermission_pattern: | |
| 428 self._mappedpermission_pattern = re.compile( | |
| 429 mappedpermission_pattern + r'\Z') | |
| 430 | |
| 431 self._sharedwith = [] | |
| 432 if sharedwith: | |
| 433 self._sharedwith = sharedwith | |
| 434 | |
| 435 @property | |
| 436 def name(self): | |
| 437 return self._name | |
| 438 | |
| 439 @property | |
| 440 def allocator_type(self): | |
| 441 return self._allocator_type | |
| 442 | |
| 443 @property | |
| 444 def stackfunction_pattern(self): | |
| 445 return self._stackfunction_pattern | |
| 446 | |
| 447 @property | |
| 448 def stacksourcefile_pattern(self): | |
| 449 return self._stacksourcefile_pattern | |
| 450 | |
| 451 @property | |
| 452 def typeinfo_pattern(self): | |
| 453 return self._typeinfo_pattern | |
| 454 | |
| 455 @property | |
| 456 def mappedpathname_pattern(self): | |
| 457 return self._mappedpathname_pattern | |
| 458 | |
| 459 @property | |
| 460 def mappedpermission_pattern(self): | |
| 461 return self._mappedpermission_pattern | |
| 462 | |
| 463 @property | |
| 464 def sharedwith(self): | |
| 465 return self._sharedwith | |
| 466 | |
| 467 | |
| 468 class Policy(object): | |
| 469 """Represents a policy, a content of a policy file.""" | |
| 470 | |
| 471 def __init__(self, rules, version, components): | |
| 472 self._rules = rules | |
| 473 self._version = version | |
| 474 self._components = components | |
| 475 | |
| 476 @property | |
| 477 def rules(self): | |
| 478 return self._rules | |
| 479 | |
| 480 @property | |
| 481 def version(self): | |
| 482 return self._version | |
| 483 | |
| 484 @property | |
| 485 def components(self): | |
| 486 return self._components | |
| 487 | |
| 488 def find_rule(self, component_name): | |
| 489 """Finds a rule whose name is |component_name|. """ | |
| 490 for rule in self._rules: | |
| 491 if rule.name == component_name: | |
| 492 return rule | |
| 493 return None | |
| 494 | |
| 495 def find_malloc(self, bucket): | |
| 496 """Finds a matching component name which a given |bucket| belongs to. | |
| 497 | |
| 498 Args: | |
| 499 bucket: A Bucket object to be searched for. | |
| 500 | |
| 501 Returns: | |
| 502 A string representing a component name. | |
| 503 """ | |
| 504 assert not bucket or bucket.allocator_type == 'malloc' | |
| 505 | |
| 506 if not bucket: | |
| 507 return 'no-bucket' | |
| 508 if bucket.component_cache: | |
| 509 return bucket.component_cache | |
| 510 | |
| 511 stackfunction = bucket.symbolized_joined_stackfunction | |
| 512 stacksourcefile = bucket.symbolized_joined_stacksourcefile | |
| 513 typeinfo = bucket.symbolized_typeinfo | |
| 514 if typeinfo.startswith('0x'): | |
| 515 typeinfo = bucket.typeinfo_name | |
| 516 | |
| 517 for rule in self._rules: | |
| 518 if (rule.allocator_type == 'malloc' and | |
| 519 (not rule.stackfunction_pattern or | |
| 520 rule.stackfunction_pattern.match(stackfunction)) and | |
| 521 (not rule.stacksourcefile_pattern or | |
| 522 rule.stacksourcefile_pattern.match(stacksourcefile)) and | |
| 523 (not rule.typeinfo_pattern or rule.typeinfo_pattern.match(typeinfo))): | |
| 524 bucket.component_cache = rule.name | |
| 525 return rule.name | |
| 526 | |
| 527 assert False | |
| 528 | |
| 529 def find_mmap(self, region, bucket_set, | |
| 530 pageframe=None, group_pfn_counts=None): | |
| 531 """Finds a matching component which a given mmap |region| belongs to. | |
| 532 | |
| 533 It uses |bucket_set| to match with backtraces. If |pageframe| is given, | |
| 534 it considers memory sharing among processes. | |
| 535 | |
| 536 NOTE: Don't use Bucket's |component_cache| for mmap regions because they're | |
| 537 classified not only with bucket information (mappedpathname for example). | |
| 538 | |
| 539 Args: | |
| 540 region: A tuple representing a memory region. | |
| 541 bucket_set: A BucketSet object to look up backtraces. | |
| 542 pageframe: A PageFrame object representing a pageframe maybe including | |
| 543 a pagecount. | |
| 544 group_pfn_counts: A dict mapping a PFN to the number of times the | |
| 545 the pageframe is mapped by the known "group (Chrome)" processes. | |
| 546 | |
| 547 Returns: | |
| 548 A string representing a component name. | |
| 549 """ | |
| 550 assert region[0] == 'hooked' | |
| 551 bucket = bucket_set.get(region[1]['bucket_id']) | |
| 552 assert not bucket or bucket.allocator_type == 'mmap' | |
| 553 | |
| 554 if not bucket: | |
| 555 return 'no-bucket', None | |
| 556 | |
| 557 stackfunction = bucket.symbolized_joined_stackfunction | |
| 558 stacksourcefile = bucket.symbolized_joined_stacksourcefile | |
| 559 sharedwith = self._categorize_pageframe(pageframe, group_pfn_counts) | |
| 560 | |
| 561 for rule in self._rules: | |
| 562 if (rule.allocator_type == 'mmap' and | |
| 563 (not rule.stackfunction_pattern or | |
| 564 rule.stackfunction_pattern.match(stackfunction)) and | |
| 565 (not rule.stacksourcefile_pattern or | |
| 566 rule.stacksourcefile_pattern.match(stacksourcefile)) and | |
| 567 (not rule.mappedpathname_pattern or | |
| 568 rule.mappedpathname_pattern.match(region[1]['vma']['name'])) and | |
| 569 (not rule.mappedpermission_pattern or | |
| 570 rule.mappedpermission_pattern.match( | |
| 571 region[1]['vma']['readable'] + | |
| 572 region[1]['vma']['writable'] + | |
| 573 region[1]['vma']['executable'] + | |
| 574 region[1]['vma']['private'])) and | |
| 575 (not rule.sharedwith or | |
| 576 not pageframe or sharedwith in rule.sharedwith)): | |
| 577 return rule.name, bucket | |
| 578 | |
| 579 assert False | |
| 580 | |
| 581 def find_unhooked(self, region, pageframe=None, group_pfn_counts=None): | |
| 582 """Finds a matching component which a given unhooked |region| belongs to. | |
| 583 | |
| 584 If |pageframe| is given, it considers memory sharing among processes. | |
| 585 | |
| 586 Args: | |
| 587 region: A tuple representing a memory region. | |
| 588 pageframe: A PageFrame object representing a pageframe maybe including | |
| 589 a pagecount. | |
| 590 group_pfn_counts: A dict mapping a PFN to the number of times the | |
| 591 the pageframe is mapped by the known "group (Chrome)" processes. | |
| 592 | |
| 593 Returns: | |
| 594 A string representing a component name. | |
| 595 """ | |
| 596 assert region[0] == 'unhooked' | |
| 597 sharedwith = self._categorize_pageframe(pageframe, group_pfn_counts) | |
| 598 | |
| 599 for rule in self._rules: | |
| 600 if (rule.allocator_type == 'unhooked' and | |
| 601 (not rule.mappedpathname_pattern or | |
| 602 rule.mappedpathname_pattern.match(region[1]['vma']['name'])) and | |
| 603 (not rule.mappedpermission_pattern or | |
| 604 rule.mappedpermission_pattern.match( | |
| 605 region[1]['vma']['readable'] + | |
| 606 region[1]['vma']['writable'] + | |
| 607 region[1]['vma']['executable'] + | |
| 608 region[1]['vma']['private'])) and | |
| 609 (not rule.sharedwith or | |
| 610 not pageframe or sharedwith in rule.sharedwith)): | |
| 611 return rule.name | |
| 612 | |
| 613 assert False | |
| 614 | |
| 615 @staticmethod | |
| 616 def load(filename, filetype): | |
| 617 """Loads a policy file of |filename| in a |format|. | |
| 618 | |
| 619 Args: | |
| 620 filename: A filename to be loaded. | |
| 621 filetype: A string to specify a type of the file. Only 'json' is | |
| 622 supported for now. | |
| 623 | |
| 624 Returns: | |
| 625 A loaded Policy object. | |
| 626 """ | |
| 627 with open(os.path.join(BASE_PATH, filename)) as policy_f: | |
| 628 return Policy.parse(policy_f, filetype) | |
| 629 | |
| 630 @staticmethod | |
| 631 def parse(policy_f, filetype): | |
| 632 """Parses a policy file content in a |format|. | |
| 633 | |
| 634 Args: | |
| 635 policy_f: An IO object to be loaded. | |
| 636 filetype: A string to specify a type of the file. Only 'json' is | |
| 637 supported for now. | |
| 638 | |
| 639 Returns: | |
| 640 A loaded Policy object. | |
| 641 """ | |
| 642 if filetype == 'json': | |
| 643 return Policy._parse_json(policy_f) | |
| 644 else: | |
| 645 return None | |
| 646 | |
| 647 @staticmethod | |
| 648 def _parse_json(policy_f): | |
| 649 """Parses policy file in json format. | |
| 650 | |
| 651 A policy file contains component's names and their stacktrace pattern | |
| 652 written in regular expression. Those patterns are matched against each | |
| 653 symbols of each stacktraces in the order written in the policy file | |
| 654 | |
| 655 Args: | |
| 656 policy_f: A File/IO object to read. | |
| 657 | |
| 658 Returns: | |
| 659 A loaded policy object. | |
| 660 """ | |
| 661 policy = json.load(policy_f) | |
| 662 | |
| 663 rules = [] | |
| 664 for rule in policy['rules']: | |
| 665 stackfunction = rule.get('stackfunction') or rule.get('stacktrace') | |
| 666 stacksourcefile = rule.get('stacksourcefile') | |
| 667 rules.append(Rule( | |
| 668 rule['name'], | |
| 669 rule['allocator'], # allocator_type | |
| 670 stackfunction, | |
| 671 stacksourcefile, | |
| 672 rule['typeinfo'] if 'typeinfo' in rule else None, | |
| 673 rule.get('mappedpathname'), | |
| 674 rule.get('mappedpermission'), | |
| 675 rule.get('sharedwith'))) | |
| 676 | |
| 677 return Policy(rules, policy['version'], policy['components']) | |
| 678 | |
| 679 @staticmethod | |
| 680 def _categorize_pageframe(pageframe, group_pfn_counts): | |
| 681 """Categorizes a pageframe based on its sharing status. | |
| 682 | |
| 683 Returns: | |
| 684 'private' if |pageframe| is not shared with other processes. 'group' | |
| 685 if |pageframe| is shared only with group (Chrome-related) processes. | |
| 686 'others' if |pageframe| is shared with non-group processes. | |
| 687 """ | |
| 688 if not pageframe: | |
| 689 return 'private' | |
| 690 | |
| 691 if pageframe.pagecount: | |
| 692 if pageframe.pagecount == 1: | |
| 693 return 'private' | |
| 694 elif pageframe.pagecount <= group_pfn_counts.get(pageframe.pfn, 0) + 1: | |
| 695 return 'group' | |
| 696 else: | |
| 697 return 'others' | |
| 698 else: | |
| 699 if pageframe.pfn in group_pfn_counts: | |
| 700 return 'group' | |
| 701 else: | |
| 702 return 'private' | |
| 703 | |
| 704 | |
| 705 class PolicySet(object): | |
| 706 """Represents a set of policies.""" | |
| 707 | |
| 708 def __init__(self, policy_directory): | |
| 709 self._policy_directory = policy_directory | |
| 710 | |
| 711 @staticmethod | |
| 712 def load(labels=None): | |
| 713 """Loads a set of policies via the "default policy directory". | |
| 714 | |
| 715 The "default policy directory" contains pairs of policies and their labels. | |
| 716 For example, a policy "policy.l0.json" is labeled "l0" in the default | |
| 717 policy directory "policies.json". | |
| 718 | |
| 719 All policies in the directory are loaded by default. Policies can be | |
| 720 limited by |labels|. | |
| 721 | |
| 722 Args: | |
| 723 labels: An array that contains policy labels to be loaded. | |
| 724 | |
| 725 Returns: | |
| 726 A PolicySet object. | |
| 727 """ | |
| 728 default_policy_directory = PolicySet._load_default_policy_directory() | |
| 729 if labels: | |
| 730 specified_policy_directory = {} | |
| 731 for label in labels: | |
| 732 if label in default_policy_directory: | |
| 733 specified_policy_directory[label] = default_policy_directory[label] | |
| 734 # TODO(dmikurube): Load an un-labeled policy file. | |
| 735 return PolicySet._load_policies(specified_policy_directory) | |
| 736 else: | |
| 737 return PolicySet._load_policies(default_policy_directory) | |
| 738 | |
| 739 def __len__(self): | |
| 740 return len(self._policy_directory) | |
| 741 | |
| 742 def __iter__(self): | |
| 743 for label in self._policy_directory: | |
| 744 yield label | |
| 745 | |
| 746 def __getitem__(self, label): | |
| 747 return self._policy_directory[label] | |
| 748 | |
| 749 @staticmethod | |
| 750 def _load_default_policy_directory(): | |
| 751 with open(POLICIES_JSON_PATH, mode='r') as policies_f: | |
| 752 default_policy_directory = json.load(policies_f) | |
| 753 return default_policy_directory | |
| 754 | |
| 755 @staticmethod | |
| 756 def _load_policies(directory): | |
| 757 LOGGER.info('Loading policy files.') | |
| 758 policies = {} | |
| 759 for label in directory: | |
| 760 LOGGER.info(' %s: %s' % (label, directory[label]['file'])) | |
| 761 loaded = Policy.load(directory[label]['file'], directory[label]['format']) | |
| 762 if loaded: | |
| 763 policies[label] = loaded | |
| 764 return PolicySet(policies) | |
| 765 | |
| 766 | |
| 767 class Bucket(object): | |
| 768 """Represents a bucket, which is a unit of memory block classification.""" | |
| 769 | |
| 770 def __init__(self, stacktrace, allocator_type, typeinfo, typeinfo_name): | |
| 771 self._stacktrace = stacktrace | |
| 772 self._allocator_type = allocator_type | |
| 773 self._typeinfo = typeinfo | |
| 774 self._typeinfo_name = typeinfo_name | |
| 775 | |
| 776 self._symbolized_stackfunction = stacktrace | |
| 777 self._symbolized_joined_stackfunction = '' | |
| 778 self._symbolized_stacksourcefile = stacktrace | |
| 779 self._symbolized_joined_stacksourcefile = '' | |
| 780 self._symbolized_typeinfo = typeinfo_name | |
| 781 | |
| 782 self.component_cache = '' | |
| 783 | |
| 784 def __str__(self): | |
| 785 result = [] | |
| 786 result.append(self._allocator_type) | |
| 787 if self._symbolized_typeinfo == 'no typeinfo': | |
| 788 result.append('tno_typeinfo') | |
| 789 else: | |
| 790 result.append('t' + self._symbolized_typeinfo) | |
| 791 result.append('n' + self._typeinfo_name) | |
| 792 result.extend(['%s(@%s)' % (function, sourcefile) | |
| 793 for function, sourcefile | |
| 794 in zip(self._symbolized_stackfunction, | |
| 795 self._symbolized_stacksourcefile)]) | |
| 796 return ' '.join(result) | |
| 797 | |
| 798 def symbolize(self, symbol_mapping_cache): | |
| 799 """Makes a symbolized stacktrace and typeinfo with |symbol_mapping_cache|. | |
| 800 | |
| 801 Args: | |
| 802 symbol_mapping_cache: A SymbolMappingCache object. | |
| 803 """ | |
| 804 # TODO(dmikurube): Fill explicitly with numbers if symbol not found. | |
| 805 self._symbolized_stackfunction = [ | |
| 806 symbol_mapping_cache.lookup(FUNCTION_SYMBOLS, address) | |
| 807 for address in self._stacktrace] | |
| 808 self._symbolized_joined_stackfunction = ' '.join( | |
| 809 self._symbolized_stackfunction) | |
| 810 self._symbolized_stacksourcefile = [ | |
| 811 symbol_mapping_cache.lookup(SOURCEFILE_SYMBOLS, address) | |
| 812 for address in self._stacktrace] | |
| 813 self._symbolized_joined_stacksourcefile = ' '.join( | |
| 814 self._symbolized_stacksourcefile) | |
| 815 if not self._typeinfo: | |
| 816 self._symbolized_typeinfo = 'no typeinfo' | |
| 817 else: | |
| 818 self._symbolized_typeinfo = symbol_mapping_cache.lookup( | |
| 819 TYPEINFO_SYMBOLS, self._typeinfo) | |
| 820 if not self._symbolized_typeinfo: | |
| 821 self._symbolized_typeinfo = 'no typeinfo' | |
| 822 | |
| 823 def clear_component_cache(self): | |
| 824 self.component_cache = '' | |
| 825 | |
| 826 @property | |
| 827 def stacktrace(self): | |
| 828 return self._stacktrace | |
| 829 | |
| 830 @property | |
| 831 def allocator_type(self): | |
| 832 return self._allocator_type | |
| 833 | |
| 834 @property | |
| 835 def typeinfo(self): | |
| 836 return self._typeinfo | |
| 837 | |
| 838 @property | |
| 839 def typeinfo_name(self): | |
| 840 return self._typeinfo_name | |
| 841 | |
| 842 @property | |
| 843 def symbolized_stackfunction(self): | |
| 844 return self._symbolized_stackfunction | |
| 845 | |
| 846 @property | |
| 847 def symbolized_joined_stackfunction(self): | |
| 848 return self._symbolized_joined_stackfunction | |
| 849 | |
| 850 @property | |
| 851 def symbolized_stacksourcefile(self): | |
| 852 return self._symbolized_stacksourcefile | |
| 853 | |
| 854 @property | |
| 855 def symbolized_joined_stacksourcefile(self): | |
| 856 return self._symbolized_joined_stacksourcefile | |
| 857 | |
| 858 @property | |
| 859 def symbolized_typeinfo(self): | |
| 860 return self._symbolized_typeinfo | |
| 861 | |
| 862 | |
| 863 class BucketSet(object): | |
| 864 """Represents a set of bucket.""" | |
| 865 def __init__(self): | |
| 866 self._buckets = {} | |
| 867 self._code_addresses = set() | |
| 868 self._typeinfo_addresses = set() | |
| 869 | |
| 870 def load(self, prefix): | |
| 871 """Loads all related bucket files. | |
| 872 | |
| 873 Args: | |
| 874 prefix: A prefix string for bucket file names. | |
| 875 """ | |
| 876 LOGGER.info('Loading bucket files.') | |
| 877 | |
| 878 n = 0 | |
| 879 skipped = 0 | |
| 880 while True: | |
| 881 path = '%s.%04d.buckets' % (prefix, n) | |
| 882 if not os.path.exists(path) or not os.stat(path).st_size: | |
| 883 if skipped > 10: | |
| 884 break | |
| 885 n += 1 | |
| 886 skipped += 1 | |
| 887 continue | |
| 888 LOGGER.info(' %s' % path) | |
| 889 with open(path, 'r') as f: | |
| 890 self._load_file(f) | |
| 891 n += 1 | |
| 892 skipped = 0 | |
| 893 | |
| 894 def _load_file(self, bucket_f): | |
| 895 for line in bucket_f: | |
| 896 words = line.split() | |
| 897 typeinfo = None | |
| 898 typeinfo_name = '' | |
| 899 stacktrace_begin = 2 | |
| 900 for index, word in enumerate(words): | |
| 901 if index < 2: | |
| 902 continue | |
| 903 if word[0] == 't': | |
| 904 typeinfo = int(word[1:], 16) | |
| 905 self._typeinfo_addresses.add(typeinfo) | |
| 906 elif word[0] == 'n': | |
| 907 typeinfo_name = word[1:] | |
| 908 else: | |
| 909 stacktrace_begin = index | |
| 910 break | |
| 911 stacktrace = [int(address, 16) for address in words[stacktrace_begin:]] | |
| 912 for frame in stacktrace: | |
| 913 self._code_addresses.add(frame) | |
| 914 self._buckets[int(words[0])] = Bucket( | |
| 915 stacktrace, words[1], typeinfo, typeinfo_name) | |
| 916 | |
| 917 def __iter__(self): | |
| 918 for bucket_id, bucket_content in self._buckets.iteritems(): | |
| 919 yield bucket_id, bucket_content | |
| 920 | |
| 921 def __getitem__(self, bucket_id): | |
| 922 return self._buckets[bucket_id] | |
| 923 | |
| 924 def get(self, bucket_id): | |
| 925 return self._buckets.get(bucket_id) | |
| 926 | |
| 927 def symbolize(self, symbol_mapping_cache): | |
| 928 for bucket_content in self._buckets.itervalues(): | |
| 929 bucket_content.symbolize(symbol_mapping_cache) | |
| 930 | |
| 931 def clear_component_cache(self): | |
| 932 for bucket_content in self._buckets.itervalues(): | |
| 933 bucket_content.clear_component_cache() | |
| 934 | |
| 935 def iter_addresses(self, symbol_type): | |
| 936 if symbol_type in [FUNCTION_SYMBOLS, SOURCEFILE_SYMBOLS]: | |
| 937 for function in self._code_addresses: | |
| 938 yield function | |
| 939 else: | |
| 940 for function in self._typeinfo_addresses: | |
| 941 yield function | |
| 942 | |
| 943 | |
| 944 class PageFrame(object): | |
| 945 """Represents a pageframe and maybe its shared count.""" | |
| 946 def __init__(self, pfn, size, pagecount, start_truncated, end_truncated): | |
| 947 self._pfn = pfn | |
| 948 self._size = size | |
| 949 self._pagecount = pagecount | |
| 950 self._start_truncated = start_truncated | |
| 951 self._end_truncated = end_truncated | |
| 952 | |
| 953 def __str__(self): | |
| 954 result = str() | |
| 955 if self._start_truncated: | |
| 956 result += '<' | |
| 957 result += '%06x#%d' % (self._pfn, self._pagecount) | |
| 958 if self._end_truncated: | |
| 959 result += '>' | |
| 960 return result | |
| 961 | |
| 962 def __repr__(self): | |
| 963 return str(self) | |
| 964 | |
| 965 @staticmethod | |
| 966 def parse(encoded_pfn, size): | |
| 967 start = 0 | |
| 968 end = len(encoded_pfn) | |
| 969 end_truncated = False | |
| 970 if encoded_pfn.endswith('>'): | |
| 971 end = len(encoded_pfn) - 1 | |
| 972 end_truncated = True | |
| 973 pagecount_found = encoded_pfn.find('#') | |
| 974 pagecount = None | |
| 975 if pagecount_found >= 0: | |
| 976 encoded_pagecount = 'AAA' + encoded_pfn[pagecount_found+1 : end] | |
| 977 pagecount = struct.unpack( | |
| 978 '>I', '\x00' + encoded_pagecount.decode('base64'))[0] | |
| 979 end = pagecount_found | |
| 980 start_truncated = False | |
| 981 if encoded_pfn.startswith('<'): | |
| 982 start = 1 | |
| 983 start_truncated = True | |
| 984 | |
| 985 pfn = struct.unpack( | |
| 986 '>I', '\x00' + (encoded_pfn[start:end]).decode('base64'))[0] | |
| 987 | |
| 988 return PageFrame(pfn, size, pagecount, start_truncated, end_truncated) | |
| 989 | |
| 990 @property | |
| 991 def pfn(self): | |
| 992 return self._pfn | |
| 993 | |
| 994 @property | |
| 995 def size(self): | |
| 996 return self._size | |
| 997 | |
| 998 def set_size(self, size): | |
| 999 self._size = size | |
| 1000 | |
| 1001 @property | |
| 1002 def pagecount(self): | |
| 1003 return self._pagecount | |
| 1004 | |
| 1005 @property | |
| 1006 def start_truncated(self): | |
| 1007 return self._start_truncated | |
| 1008 | |
| 1009 @property | |
| 1010 def end_truncated(self): | |
| 1011 return self._end_truncated | |
| 1012 | |
| 1013 | |
| 1014 class PFNCounts(object): | |
| 1015 """Represents counts of PFNs in a process.""" | |
| 1016 | |
| 1017 _PATH_PATTERN = re.compile(r'^(.*)\.([0-9]+)\.([0-9]+)\.heap$') | |
| 1018 | |
| 1019 def __init__(self, path, modified_time): | |
| 1020 matched = self._PATH_PATTERN.match(path) | |
| 1021 if matched: | |
| 1022 self._pid = int(matched.group(2)) | |
| 1023 else: | |
| 1024 self._pid = 0 | |
| 1025 self._command_line = '' | |
| 1026 self._pagesize = 4096 | |
| 1027 self._path = path | |
| 1028 self._pfn_meta = '' | |
| 1029 self._pfnset = {} | |
| 1030 self._reason = '' | |
| 1031 self._time = modified_time | |
| 1032 | |
| 1033 @staticmethod | |
| 1034 def load(path, log_header='Loading PFNs from a heap profile dump: '): | |
| 1035 pfnset = PFNCounts(path, float(os.stat(path).st_mtime)) | |
| 1036 LOGGER.info('%s%s' % (log_header, path)) | |
| 1037 | |
| 1038 with open(path, 'r') as pfnset_f: | |
| 1039 pfnset.load_file(pfnset_f) | |
| 1040 | |
| 1041 return pfnset | |
| 1042 | |
| 1043 @property | |
| 1044 def path(self): | |
| 1045 return self._path | |
| 1046 | |
| 1047 @property | |
| 1048 def pid(self): | |
| 1049 return self._pid | |
| 1050 | |
| 1051 @property | |
| 1052 def time(self): | |
| 1053 return self._time | |
| 1054 | |
| 1055 @property | |
| 1056 def reason(self): | |
| 1057 return self._reason | |
| 1058 | |
| 1059 @property | |
| 1060 def iter_pfn(self): | |
| 1061 for pfn, count in self._pfnset.iteritems(): | |
| 1062 yield pfn, count | |
| 1063 | |
| 1064 def load_file(self, pfnset_f): | |
| 1065 prev_pfn_end_truncated = None | |
| 1066 for line in pfnset_f: | |
| 1067 line = line.strip() | |
| 1068 if line.startswith('GLOBAL_STATS:') or line.startswith('STACKTRACES:'): | |
| 1069 break | |
| 1070 elif line.startswith('PF: '): | |
| 1071 for encoded_pfn in line[3:].split(): | |
| 1072 page_frame = PageFrame.parse(encoded_pfn, self._pagesize) | |
| 1073 if page_frame.start_truncated and ( | |
| 1074 not prev_pfn_end_truncated or | |
| 1075 prev_pfn_end_truncated != page_frame.pfn): | |
| 1076 LOGGER.error('Broken page frame number: %s.' % encoded_pfn) | |
| 1077 self._pfnset[page_frame.pfn] = self._pfnset.get(page_frame.pfn, 0) + 1 | |
| 1078 if page_frame.end_truncated: | |
| 1079 prev_pfn_end_truncated = page_frame.pfn | |
| 1080 else: | |
| 1081 prev_pfn_end_truncated = None | |
| 1082 elif line.startswith('PageSize: '): | |
| 1083 self._pagesize = int(line[10:]) | |
| 1084 elif line.startswith('PFN: '): | |
| 1085 self._pfn_meta = line[5:] | |
| 1086 elif line.startswith('PageFrame: '): | |
| 1087 self._pfn_meta = line[11:] | |
| 1088 elif line.startswith('Time: '): | |
| 1089 self._time = float(line[6:]) | |
| 1090 elif line.startswith('CommandLine: '): | |
| 1091 self._command_line = line[13:] | |
| 1092 elif line.startswith('Reason: '): | |
| 1093 self._reason = line[8:] | |
| 1094 | |
| 1095 | |
| 1096 class Dump(object): | |
| 1097 """Represents a heap profile dump.""" | |
| 1098 | |
| 1099 _PATH_PATTERN = re.compile(r'^(.*)\.([0-9]+)\.([0-9]+)\.heap$') | |
| 1100 | |
| 1101 _HOOK_PATTERN = re.compile( | |
| 1102 r'^ ([ \(])([a-f0-9]+)([ \)])-([ \(])([a-f0-9]+)([ \)])\s+' | |
| 1103 r'(hooked|unhooked)\s+(.+)$', re.IGNORECASE) | |
| 1104 | |
| 1105 _HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / ' | |
| 1106 '(?P<RESERVED>[0-9]+) @ (?P<BUCKETID>[0-9]+)') | |
| 1107 _UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / ' | |
| 1108 '(?P<RESERVED>[0-9]+)') | |
| 1109 | |
| 1110 _OLD_HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) @ (?P<BUCKETID>[0-9]+)') | |
| 1111 _OLD_UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) (?P<COMMITTED>[0-9]+)') | |
| 1112 | |
| 1113 _TIME_PATTERN_FORMAT = re.compile( | |
| 1114 r'^Time: ([0-9]+/[0-9]+/[0-9]+ [0-9]+:[0-9]+:[0-9]+)(\.[0-9]+)?') | |
| 1115 _TIME_PATTERN_SECONDS = re.compile(r'^Time: ([0-9]+)$') | |
| 1116 | |
| 1117 def __init__(self, path, modified_time): | |
| 1118 self._path = path | |
| 1119 matched = self._PATH_PATTERN.match(path) | |
| 1120 self._pid = int(matched.group(2)) | |
| 1121 self._count = int(matched.group(3)) | |
| 1122 self._time = modified_time | |
| 1123 self._map = {} | |
| 1124 self._procmaps = ExclusiveRangeDict(ProcMapsEntryAttribute) | |
| 1125 self._stacktrace_lines = [] | |
| 1126 self._global_stats = {} # used only in apply_policy | |
| 1127 | |
| 1128 self._run_id = '' | |
| 1129 self._pagesize = 4096 | |
| 1130 self._pageframe_length = 0 | |
| 1131 self._pageframe_encoding = '' | |
| 1132 self._has_pagecount = False | |
| 1133 | |
| 1134 self._version = '' | |
| 1135 self._lines = [] | |
| 1136 | |
| 1137 @property | |
| 1138 def path(self): | |
| 1139 return self._path | |
| 1140 | |
| 1141 @property | |
| 1142 def count(self): | |
| 1143 return self._count | |
| 1144 | |
| 1145 @property | |
| 1146 def time(self): | |
| 1147 return self._time | |
| 1148 | |
| 1149 @property | |
| 1150 def iter_map(self): | |
| 1151 for region in sorted(self._map.iteritems()): | |
| 1152 yield region[0], region[1] | |
| 1153 | |
| 1154 def iter_procmaps(self): | |
| 1155 for begin, end, attr in self._map.iter_range(): | |
| 1156 yield begin, end, attr | |
| 1157 | |
| 1158 @property | |
| 1159 def iter_stacktrace(self): | |
| 1160 for line in self._stacktrace_lines: | |
| 1161 yield line | |
| 1162 | |
| 1163 def global_stat(self, name): | |
| 1164 return self._global_stats[name] | |
| 1165 | |
| 1166 @property | |
| 1167 def run_id(self): | |
| 1168 return self._run_id | |
| 1169 | |
| 1170 @property | |
| 1171 def pagesize(self): | |
| 1172 return self._pagesize | |
| 1173 | |
| 1174 @property | |
| 1175 def pageframe_length(self): | |
| 1176 return self._pageframe_length | |
| 1177 | |
| 1178 @property | |
| 1179 def pageframe_encoding(self): | |
| 1180 return self._pageframe_encoding | |
| 1181 | |
| 1182 @property | |
| 1183 def has_pagecount(self): | |
| 1184 return self._has_pagecount | |
| 1185 | |
| 1186 @staticmethod | |
| 1187 def load(path, log_header='Loading a heap profile dump: '): | |
| 1188 """Loads a heap profile dump. | |
| 1189 | |
| 1190 Args: | |
| 1191 path: A file path string to load. | |
| 1192 log_header: A preceding string for log messages. | |
| 1193 | |
| 1194 Returns: | |
| 1195 A loaded Dump object. | |
| 1196 | |
| 1197 Raises: | |
| 1198 ParsingException for invalid heap profile dumps. | |
| 1199 """ | |
| 1200 dump = Dump(path, os.stat(path).st_mtime) | |
| 1201 with open(path, 'r') as f: | |
| 1202 dump.load_file(f, log_header) | |
| 1203 return dump | |
| 1204 | |
| 1205 def load_file(self, f, log_header): | |
| 1206 self._lines = [line for line in f | |
| 1207 if line and not line.startswith('#')] | |
| 1208 | |
| 1209 try: | |
| 1210 self._version, ln = self._parse_version() | |
| 1211 self._parse_meta_information() | |
| 1212 if self._version == DUMP_DEEP_6: | |
| 1213 self._parse_mmap_list() | |
| 1214 self._parse_global_stats() | |
| 1215 self._extract_stacktrace_lines(ln) | |
| 1216 except EmptyDumpException: | |
| 1217 LOGGER.info('%s%s ...ignored an empty dump.' % (log_header, self._path)) | |
| 1218 except ParsingException, e: | |
| 1219 LOGGER.error('%s%s ...error %s' % (log_header, self._path, e)) | |
| 1220 raise | |
| 1221 else: | |
| 1222 LOGGER.info('%s%s (version:%s)' % (log_header, self._path, self._version)) | |
| 1223 | |
| 1224 def _parse_version(self): | |
| 1225 """Parses a version string in self._lines. | |
| 1226 | |
| 1227 Returns: | |
| 1228 A pair of (a string representing a version of the stacktrace dump, | |
| 1229 and an integer indicating a line number next to the version string). | |
| 1230 | |
| 1231 Raises: | |
| 1232 ParsingException for invalid dump versions. | |
| 1233 """ | |
| 1234 version = '' | |
| 1235 | |
| 1236 # Skip until an identifiable line. | |
| 1237 headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ') | |
| 1238 if not self._lines: | |
| 1239 raise EmptyDumpException('Empty heap dump file.') | |
| 1240 (ln, found) = skip_while( | |
| 1241 0, len(self._lines), | |
| 1242 lambda n: not self._lines[n].startswith(headers)) | |
| 1243 if not found: | |
| 1244 raise InvalidDumpException('No version header.') | |
| 1245 | |
| 1246 # Identify a version. | |
| 1247 if self._lines[ln].startswith('heap profile: '): | |
| 1248 version = self._lines[ln][13:].strip() | |
| 1249 if version in (DUMP_DEEP_5, DUMP_DEEP_6): | |
| 1250 (ln, _) = skip_while( | |
| 1251 ln, len(self._lines), | |
| 1252 lambda n: self._lines[n] != 'STACKTRACES:\n') | |
| 1253 elif version in DUMP_DEEP_OBSOLETE: | |
| 1254 raise ObsoleteDumpVersionException(version) | |
| 1255 else: | |
| 1256 raise InvalidDumpException('Invalid version: %s' % version) | |
| 1257 elif self._lines[ln] == 'STACKTRACES:\n': | |
| 1258 raise ObsoleteDumpVersionException(DUMP_DEEP_1) | |
| 1259 elif self._lines[ln] == 'MMAP_STACKTRACES:\n': | |
| 1260 raise ObsoleteDumpVersionException(DUMP_DEEP_2) | |
| 1261 | |
| 1262 return (version, ln) | |
| 1263 | |
| 1264 def _parse_global_stats(self): | |
| 1265 """Parses lines in self._lines as global stats.""" | |
| 1266 (ln, _) = skip_while( | |
| 1267 0, len(self._lines), | |
| 1268 lambda n: self._lines[n] != 'GLOBAL_STATS:\n') | |
| 1269 | |
| 1270 global_stat_names = [ | |
| 1271 'total', 'absent', 'file-exec', 'file-nonexec', 'anonymous', 'stack', | |
| 1272 'other', 'nonprofiled-absent', 'nonprofiled-anonymous', | |
| 1273 'nonprofiled-file-exec', 'nonprofiled-file-nonexec', | |
| 1274 'nonprofiled-stack', 'nonprofiled-other', | |
| 1275 'profiled-mmap', 'profiled-malloc'] | |
| 1276 | |
| 1277 for prefix in global_stat_names: | |
| 1278 (ln, _) = skip_while( | |
| 1279 ln, len(self._lines), | |
| 1280 lambda n: self._lines[n].split()[0] != prefix) | |
| 1281 words = self._lines[ln].split() | |
| 1282 self._global_stats[prefix + '_virtual'] = int(words[-2]) | |
| 1283 self._global_stats[prefix + '_committed'] = int(words[-1]) | |
| 1284 | |
| 1285 def _parse_meta_information(self): | |
| 1286 """Parses lines in self._lines for meta information.""" | |
| 1287 (ln, found) = skip_while( | |
| 1288 0, len(self._lines), | |
| 1289 lambda n: self._lines[n] != 'META:\n') | |
| 1290 if not found: | |
| 1291 return | |
| 1292 ln += 1 | |
| 1293 | |
| 1294 while True: | |
| 1295 if self._lines[ln].startswith('Time:'): | |
| 1296 matched_seconds = self._TIME_PATTERN_SECONDS.match(self._lines[ln]) | |
| 1297 matched_format = self._TIME_PATTERN_FORMAT.match(self._lines[ln]) | |
| 1298 if matched_format: | |
| 1299 self._time = time.mktime(datetime.datetime.strptime( | |
| 1300 matched_format.group(1), '%Y/%m/%d %H:%M:%S').timetuple()) | |
| 1301 if matched_format.group(2): | |
| 1302 self._time += float(matched_format.group(2)[1:]) / 1000.0 | |
| 1303 elif matched_seconds: | |
| 1304 self._time = float(matched_seconds.group(1)) | |
| 1305 elif self._lines[ln].startswith('Reason:'): | |
| 1306 pass # Nothing to do for 'Reason:' | |
| 1307 elif self._lines[ln].startswith('PageSize: '): | |
| 1308 self._pagesize = int(self._lines[ln][10:]) | |
| 1309 elif self._lines[ln].startswith('CommandLine:'): | |
| 1310 pass | |
| 1311 elif (self._lines[ln].startswith('PageFrame: ') or | |
| 1312 self._lines[ln].startswith('PFN: ')): | |
| 1313 if self._lines[ln].startswith('PageFrame: '): | |
| 1314 words = self._lines[ln][11:].split(',') | |
| 1315 else: | |
| 1316 words = self._lines[ln][5:].split(',') | |
| 1317 for word in words: | |
| 1318 if word == '24': | |
| 1319 self._pageframe_length = 24 | |
| 1320 elif word == 'Base64': | |
| 1321 self._pageframe_encoding = 'base64' | |
| 1322 elif word == 'PageCount': | |
| 1323 self._has_pagecount = True | |
| 1324 elif self._lines[ln].startswith('RunID: '): | |
| 1325 self._run_id = self._lines[ln][7:].strip() | |
| 1326 elif (self._lines[ln].startswith('MMAP_LIST:') or | |
| 1327 self._lines[ln].startswith('GLOBAL_STATS:')): | |
| 1328 # Skip until "MMAP_LIST:" or "GLOBAL_STATS" is found. | |
| 1329 break | |
| 1330 else: | |
| 1331 pass | |
| 1332 ln += 1 | |
| 1333 | |
| 1334 def _parse_mmap_list(self): | |
| 1335 """Parses lines in self._lines as a mmap list.""" | |
| 1336 (ln, found) = skip_while( | |
| 1337 0, len(self._lines), | |
| 1338 lambda n: self._lines[n] != 'MMAP_LIST:\n') | |
| 1339 if not found: | |
| 1340 return {} | |
| 1341 | |
| 1342 ln += 1 | |
| 1343 self._map = {} | |
| 1344 current_vma = {} | |
| 1345 pageframe_list = [] | |
| 1346 while True: | |
| 1347 entry = proc_maps.ProcMaps.parse_line(self._lines[ln]) | |
| 1348 if entry: | |
| 1349 current_vma = {} | |
| 1350 for _, _, attr in self._procmaps.iter_range(entry.begin, entry.end): | |
| 1351 for key, value in entry.as_dict().iteritems(): | |
| 1352 attr[key] = value | |
| 1353 current_vma[key] = value | |
| 1354 ln += 1 | |
| 1355 continue | |
| 1356 | |
| 1357 if self._lines[ln].startswith(' PF: '): | |
| 1358 for pageframe in self._lines[ln][5:].split(): | |
| 1359 pageframe_list.append(PageFrame.parse(pageframe, self._pagesize)) | |
| 1360 ln += 1 | |
| 1361 continue | |
| 1362 | |
| 1363 matched = self._HOOK_PATTERN.match(self._lines[ln]) | |
| 1364 if not matched: | |
| 1365 break | |
| 1366 # 2: starting address | |
| 1367 # 5: end address | |
| 1368 # 7: hooked or unhooked | |
| 1369 # 8: additional information | |
| 1370 if matched.group(7) == 'hooked': | |
| 1371 submatched = self._HOOKED_PATTERN.match(matched.group(8)) | |
| 1372 if not submatched: | |
| 1373 submatched = self._OLD_HOOKED_PATTERN.match(matched.group(8)) | |
| 1374 elif matched.group(7) == 'unhooked': | |
| 1375 submatched = self._UNHOOKED_PATTERN.match(matched.group(8)) | |
| 1376 if not submatched: | |
| 1377 submatched = self._OLD_UNHOOKED_PATTERN.match(matched.group(8)) | |
| 1378 else: | |
| 1379 assert matched.group(7) in ['hooked', 'unhooked'] | |
| 1380 | |
| 1381 submatched_dict = submatched.groupdict() | |
| 1382 region_info = { 'vma': current_vma } | |
| 1383 if submatched_dict.get('TYPE'): | |
| 1384 region_info['type'] = submatched_dict['TYPE'].strip() | |
| 1385 if submatched_dict.get('COMMITTED'): | |
| 1386 region_info['committed'] = int(submatched_dict['COMMITTED']) | |
| 1387 if submatched_dict.get('RESERVED'): | |
| 1388 region_info['reserved'] = int(submatched_dict['RESERVED']) | |
| 1389 if submatched_dict.get('BUCKETID'): | |
| 1390 region_info['bucket_id'] = int(submatched_dict['BUCKETID']) | |
| 1391 | |
| 1392 if matched.group(1) == '(': | |
| 1393 start = current_vma['begin'] | |
| 1394 else: | |
| 1395 start = int(matched.group(2), 16) | |
| 1396 if matched.group(4) == '(': | |
| 1397 end = current_vma['end'] | |
| 1398 else: | |
| 1399 end = int(matched.group(5), 16) | |
| 1400 | |
| 1401 if pageframe_list and pageframe_list[0].start_truncated: | |
| 1402 pageframe_list[0].set_size( | |
| 1403 pageframe_list[0].size - start % self._pagesize) | |
| 1404 if pageframe_list and pageframe_list[-1].end_truncated: | |
| 1405 pageframe_list[-1].set_size( | |
| 1406 pageframe_list[-1].size - (self._pagesize - end % self._pagesize)) | |
| 1407 region_info['pageframe'] = pageframe_list | |
| 1408 pageframe_list = [] | |
| 1409 | |
| 1410 self._map[(start, end)] = (matched.group(7), region_info) | |
| 1411 ln += 1 | |
| 1412 | |
| 1413 def _extract_stacktrace_lines(self, line_number): | |
| 1414 """Extracts the position of stacktrace lines. | |
| 1415 | |
| 1416 Valid stacktrace lines are stored into self._stacktrace_lines. | |
| 1417 | |
| 1418 Args: | |
| 1419 line_number: A line number to start parsing in lines. | |
| 1420 | |
| 1421 Raises: | |
| 1422 ParsingException for invalid dump versions. | |
| 1423 """ | |
| 1424 if self._version in (DUMP_DEEP_5, DUMP_DEEP_6): | |
| 1425 (line_number, _) = skip_while( | |
| 1426 line_number, len(self._lines), | |
| 1427 lambda n: not self._lines[n].split()[0].isdigit()) | |
| 1428 stacktrace_start = line_number | |
| 1429 (line_number, _) = skip_while( | |
| 1430 line_number, len(self._lines), | |
| 1431 lambda n: self._check_stacktrace_line(self._lines[n])) | |
| 1432 self._stacktrace_lines = self._lines[stacktrace_start:line_number] | |
| 1433 | |
| 1434 elif self._version in DUMP_DEEP_OBSOLETE: | |
| 1435 raise ObsoleteDumpVersionException(self._version) | |
| 1436 | |
| 1437 else: | |
| 1438 raise InvalidDumpException('Invalid version: %s' % self._version) | |
| 1439 | |
| 1440 @staticmethod | |
| 1441 def _check_stacktrace_line(stacktrace_line): | |
| 1442 """Checks if a given stacktrace_line is valid as stacktrace. | |
| 1443 | |
| 1444 Args: | |
| 1445 stacktrace_line: A string to be checked. | |
| 1446 | |
| 1447 Returns: | |
| 1448 True if the given stacktrace_line is valid. | |
| 1449 """ | |
| 1450 words = stacktrace_line.split() | |
| 1451 if len(words) < BUCKET_ID + 1: | |
| 1452 return False | |
| 1453 if words[BUCKET_ID - 1] != '@': | |
| 1454 return False | |
| 1455 return True | |
| 1456 | |
| 1457 | |
| 1458 class DumpList(object): | |
| 1459 """Represents a sequence of heap profile dumps.""" | |
| 1460 | |
| 1461 def __init__(self, dump_list): | |
| 1462 self._dump_list = dump_list | |
| 1463 | |
| 1464 @staticmethod | |
| 1465 def load(path_list): | |
| 1466 LOGGER.info('Loading heap dump profiles.') | |
| 1467 dump_list = [] | |
| 1468 for path in path_list: | |
| 1469 dump_list.append(Dump.load(path, ' ')) | |
| 1470 return DumpList(dump_list) | |
| 1471 | |
| 1472 def __len__(self): | |
| 1473 return len(self._dump_list) | |
| 1474 | |
| 1475 def __iter__(self): | |
| 1476 for dump in self._dump_list: | |
| 1477 yield dump | |
| 1478 | |
| 1479 def __getitem__(self, index): | |
| 1480 return self._dump_list[index] | |
| 1481 | |
| 1482 | |
| 1483 class Unit(object): | |
| 1484 """Represents a minimum unit of memory usage categorization. | |
| 1485 | |
| 1486 It is supposed to be inherited for some different spaces like the entire | |
| 1487 virtual memory and malloc arena. Such different spaces are called "worlds" | |
| 1488 in dmprof. (For example, the "vm" world and the "malloc" world.) | |
| 1489 """ | |
| 1490 def __init__(self, unit_id, size): | |
| 1491 self._unit_id = unit_id | |
| 1492 self._size = size | |
| 1493 | |
| 1494 @property | |
| 1495 def unit_id(self): | |
| 1496 return self._unit_id | |
| 1497 | |
| 1498 @property | |
| 1499 def size(self): | |
| 1500 return self._size | |
| 1501 | |
| 1502 | |
| 1503 class VMUnit(Unit): | |
| 1504 """Represents a Unit for a memory region on virtual memory.""" | |
| 1505 def __init__(self, unit_id, committed, reserved, mmap, region, | |
| 1506 pageframe=None, group_pfn_counts=None): | |
| 1507 super(VMUnit, self).__init__(unit_id, committed) | |
| 1508 self._reserved = reserved | |
| 1509 self._mmap = mmap | |
| 1510 self._region = region | |
| 1511 self._pageframe = pageframe | |
| 1512 self._group_pfn_counts = group_pfn_counts | |
| 1513 | |
| 1514 @property | |
| 1515 def committed(self): | |
| 1516 return self._size | |
| 1517 | |
| 1518 @property | |
| 1519 def reserved(self): | |
| 1520 return self._reserved | |
| 1521 | |
| 1522 @property | |
| 1523 def mmap(self): | |
| 1524 return self._mmap | |
| 1525 | |
| 1526 @property | |
| 1527 def region(self): | |
| 1528 return self._region | |
| 1529 | |
| 1530 @property | |
| 1531 def pageframe(self): | |
| 1532 return self._pageframe | |
| 1533 | |
| 1534 @property | |
| 1535 def group_pfn_counts(self): | |
| 1536 return self._group_pfn_counts | |
| 1537 | |
| 1538 | |
| 1539 class MMapUnit(VMUnit): | |
| 1540 """Represents a Unit for a mmap'ed region.""" | |
| 1541 def __init__(self, unit_id, committed, reserved, region, bucket_set, | |
| 1542 pageframe=None, group_pfn_counts=None): | |
| 1543 super(MMapUnit, self).__init__(unit_id, committed, reserved, True, | |
| 1544 region, pageframe, group_pfn_counts) | |
| 1545 self._bucket_set = bucket_set | |
| 1546 | |
| 1547 def __repr__(self): | |
| 1548 return str(self.region) | |
| 1549 | |
| 1550 @property | |
| 1551 def bucket_set(self): | |
| 1552 return self._bucket_set | |
| 1553 | |
| 1554 | |
| 1555 class UnhookedUnit(VMUnit): | |
| 1556 """Represents a Unit for a non-mmap'ed memory region on virtual memory.""" | |
| 1557 def __init__(self, unit_id, committed, reserved, region, | |
| 1558 pageframe=None, group_pfn_counts=None): | |
| 1559 super(UnhookedUnit, self).__init__(unit_id, committed, reserved, False, | |
| 1560 region, pageframe, group_pfn_counts) | |
| 1561 | |
| 1562 def __repr__(self): | |
| 1563 return str(self.region) | |
| 1564 | |
| 1565 | |
| 1566 class MallocUnit(Unit): | |
| 1567 """Represents a Unit for a malloc'ed memory block.""" | |
| 1568 def __init__(self, unit_id, size, alloc_count, free_count, bucket): | |
| 1569 super(MallocUnit, self).__init__(unit_id, size) | |
| 1570 self._bucket = bucket | |
| 1571 self._alloc_count = alloc_count | |
| 1572 self._free_count = free_count | |
| 1573 | |
| 1574 def __repr__(self): | |
| 1575 return str(self.bucket) | |
| 1576 | |
| 1577 @property | |
| 1578 def bucket(self): | |
| 1579 return self._bucket | |
| 1580 | |
| 1581 @property | |
| 1582 def alloc_count(self): | |
| 1583 return self._alloc_count | |
| 1584 | |
| 1585 @property | |
| 1586 def free_count(self): | |
| 1587 return self._free_count | |
| 1588 | |
| 1589 | |
| 1590 class UnitSet(object): | |
| 1591 """Represents an iterable set of Units.""" | |
| 1592 def __init__(self, world): | |
| 1593 self._units = {} | |
| 1594 self._world = world | |
| 1595 | |
| 1596 def __repr__(self): | |
| 1597 return str(self._units) | |
| 1598 | |
| 1599 def __iter__(self): | |
| 1600 for unit_id in sorted(self._units): | |
| 1601 yield self._units[unit_id] | |
| 1602 | |
| 1603 def append(self, unit, overwrite=False): | |
| 1604 if not overwrite and unit.unit_id in self._units: | |
| 1605 LOGGER.error('The unit id=%s already exists.' % str(unit.unit_id)) | |
| 1606 self._units[unit.unit_id] = unit | |
| 1607 | |
| 1608 | |
| 1609 class AbstractRule(object): | |
| 1610 """An abstract class for rules to be matched with units.""" | |
| 1611 def __init__(self, dct): | |
| 1612 self._name = dct['name'] | |
| 1613 self._hidden = dct.get('hidden', False) | |
| 1614 self._subworlds = dct.get('subworlds', []) | |
| 1615 | |
| 1616 def match(self, unit): | |
| 1617 raise NotImplementedError() | |
| 1618 | |
| 1619 @property | |
| 1620 def name(self): | |
| 1621 return self._name | |
| 1622 | |
| 1623 @property | |
| 1624 def hidden(self): | |
| 1625 return self._hidden | |
| 1626 | |
| 1627 def iter_subworld(self): | |
| 1628 for subworld in self._subworlds: | |
| 1629 yield subworld | |
| 1630 | |
| 1631 | |
| 1632 class VMRule(AbstractRule): | |
| 1633 """Represents a Rule to match with virtual memory regions.""" | |
| 1634 def __init__(self, dct): | |
| 1635 super(VMRule, self).__init__(dct) | |
| 1636 self._backtrace_function = dct.get('backtrace_function', None) | |
| 1637 if self._backtrace_function: | |
| 1638 self._backtrace_function = re.compile(self._backtrace_function) | |
| 1639 self._backtrace_sourcefile = dct.get('backtrace_sourcefile', None) | |
| 1640 if self._backtrace_sourcefile: | |
| 1641 self._backtrace_sourcefile = re.compile(self._backtrace_sourcefile) | |
| 1642 self._mmap = dct.get('mmap', None) | |
| 1643 self._sharedwith = dct.get('sharedwith', []) | |
| 1644 self._mapped_pathname = dct.get('mapped_pathname', None) | |
| 1645 if self._mapped_pathname: | |
| 1646 self._mapped_pathname = re.compile(self._mapped_pathname) | |
| 1647 self._mapped_permission = dct.get('mapped_permission', None) | |
| 1648 if self._mapped_permission: | |
| 1649 self._mapped_permission = re.compile(self._mapped_permission) | |
| 1650 | |
| 1651 def __repr__(self): | |
| 1652 result = cStringIO.StringIO() | |
| 1653 result.write('{"%s"=>' % self._name) | |
| 1654 attributes = [] | |
| 1655 attributes.append('mmap: %s' % self._mmap) | |
| 1656 if self._backtrace_function: | |
| 1657 attributes.append('backtrace_function: "%s"' % | |
| 1658 self._backtrace_function.pattern) | |
| 1659 if self._sharedwith: | |
| 1660 attributes.append('sharedwith: "%s"' % self._sharedwith) | |
| 1661 if self._mapped_pathname: | |
| 1662 attributes.append('mapped_pathname: "%s"' % self._mapped_pathname.pattern) | |
| 1663 if self._mapped_permission: | |
| 1664 attributes.append('mapped_permission: "%s"' % | |
| 1665 self._mapped_permission.pattern) | |
| 1666 result.write('%s}' % ', '.join(attributes)) | |
| 1667 return result.getvalue() | |
| 1668 | |
| 1669 def match(self, unit): | |
| 1670 if unit.mmap: | |
| 1671 assert unit.region[0] == 'hooked' | |
| 1672 bucket = unit.bucket_set.get(unit.region[1]['bucket_id']) | |
| 1673 assert bucket | |
| 1674 assert bucket.allocator_type == 'mmap' | |
| 1675 | |
| 1676 stackfunction = bucket.symbolized_joined_stackfunction | |
| 1677 stacksourcefile = bucket.symbolized_joined_stacksourcefile | |
| 1678 | |
| 1679 # TODO(dmikurube): Support shared memory. | |
| 1680 sharedwith = None | |
| 1681 | |
| 1682 if self._mmap == False: # (self._mmap == None) should go through. | |
| 1683 return False | |
| 1684 if (self._backtrace_function and | |
| 1685 not self._backtrace_function.match(stackfunction)): | |
| 1686 return False | |
| 1687 if (self._backtrace_sourcefile and | |
| 1688 not self._backtrace_sourcefile.match(stacksourcefile)): | |
| 1689 return False | |
| 1690 if (self._mapped_pathname and | |
| 1691 not self._mapped_pathname.match(unit.region[1]['vma']['name'])): | |
| 1692 return False | |
| 1693 if (self._mapped_permission and | |
| 1694 not self._mapped_permission.match( | |
| 1695 unit.region[1]['vma']['readable'] + | |
| 1696 unit.region[1]['vma']['writable'] + | |
| 1697 unit.region[1]['vma']['executable'] + | |
| 1698 unit.region[1]['vma']['private'])): | |
| 1699 return False | |
| 1700 if (self._sharedwith and | |
| 1701 unit.pageframe and sharedwith not in self._sharedwith): | |
| 1702 return False | |
| 1703 | |
| 1704 return True | |
| 1705 | |
| 1706 else: | |
| 1707 assert unit.region[0] == 'unhooked' | |
| 1708 | |
| 1709 # TODO(dmikurube): Support shared memory. | |
| 1710 sharedwith = None | |
| 1711 | |
| 1712 if self._mmap == True: # (self._mmap == None) should go through. | |
| 1713 return False | |
| 1714 if (self._mapped_pathname and | |
| 1715 not self._mapped_pathname.match(unit.region[1]['vma']['name'])): | |
| 1716 return False | |
| 1717 if (self._mapped_permission and | |
| 1718 not self._mapped_permission.match( | |
| 1719 unit.region[1]['vma']['readable'] + | |
| 1720 unit.region[1]['vma']['writable'] + | |
| 1721 unit.region[1]['vma']['executable'] + | |
| 1722 unit.region[1]['vma']['private'])): | |
| 1723 return False | |
| 1724 if (self._sharedwith and | |
| 1725 unit.pageframe and sharedwith not in self._sharedwith): | |
| 1726 return False | |
| 1727 | |
| 1728 return True | |
| 1729 | |
| 1730 | |
| 1731 class MallocRule(AbstractRule): | |
| 1732 """Represents a Rule to match with malloc'ed blocks.""" | |
| 1733 def __init__(self, dct): | |
| 1734 super(MallocRule, self).__init__(dct) | |
| 1735 self._backtrace_function = dct.get('backtrace_function', None) | |
| 1736 if self._backtrace_function: | |
| 1737 self._backtrace_function = re.compile(self._backtrace_function) | |
| 1738 self._backtrace_sourcefile = dct.get('backtrace_sourcefile', None) | |
| 1739 if self._backtrace_sourcefile: | |
| 1740 self._backtrace_sourcefile = re.compile(self._backtrace_sourcefile) | |
| 1741 self._typeinfo = dct.get('typeinfo', None) | |
| 1742 if self._typeinfo: | |
| 1743 self._typeinfo = re.compile(self._typeinfo) | |
| 1744 | |
| 1745 def __repr__(self): | |
| 1746 result = cStringIO.StringIO() | |
| 1747 result.write('{"%s"=>' % self._name) | |
| 1748 attributes = [] | |
| 1749 if self._backtrace_function: | |
| 1750 attributes.append('backtrace_function: "%s"' % self._backtrace_function) | |
| 1751 if self._typeinfo: | |
| 1752 attributes.append('typeinfo: "%s"' % self._typeinfo) | |
| 1753 result.write('%s}' % ', '.join(attributes)) | |
| 1754 return result.getvalue() | |
| 1755 | |
| 1756 def match(self, unit): | |
| 1757 assert unit.bucket.allocator_type == 'malloc' | |
| 1758 | |
| 1759 stackfunction = unit.bucket.symbolized_joined_stackfunction | |
| 1760 stacksourcefile = unit.bucket.symbolized_joined_stacksourcefile | |
| 1761 typeinfo = unit.bucket.symbolized_typeinfo | |
| 1762 if typeinfo.startswith('0x'): | |
| 1763 typeinfo = unit.bucket.typeinfo_name | |
| 1764 | |
| 1765 return ((not self._backtrace_function or | |
| 1766 self._backtrace_function.match(stackfunction)) and | |
| 1767 (not self._backtrace_sourcefile or | |
| 1768 self._backtrace_sourcefile.match(stacksourcefile)) and | |
| 1769 (not self._typeinfo or self._typeinfo.match(typeinfo))) | |
| 1770 | |
| 1771 | |
| 1772 class NoBucketMallocRule(MallocRule): | |
| 1773 """Represents a Rule that small ignorable units match with.""" | |
| 1774 def __init__(self): | |
| 1775 super(NoBucketMallocRule, self).__init__({'name': 'tc-no-bucket'}) | |
| 1776 self._no_bucket = True | |
| 1777 | |
| 1778 @property | |
| 1779 def no_bucket(self): | |
| 1780 return self._no_bucket | |
| 1781 | |
| 1782 | |
| 1783 class AbstractSorter(object): | |
| 1784 """An abstract class for classifying Units with a set of Rules.""" | |
| 1785 def __init__(self, dct): | |
| 1786 self._type = 'sorter' | |
| 1787 self._version = dct['version'] | |
| 1788 self._world = dct['world'] | |
| 1789 self._name = dct['name'] | |
| 1790 self._order = dct['order'] | |
| 1791 | |
| 1792 self._rules = [] | |
| 1793 for rule in dct['rules']: | |
| 1794 if dct['world'] == 'vm': | |
| 1795 self._rules.append(VMRule(rule)) | |
| 1796 elif dct['world'] == 'malloc': | |
| 1797 self._rules.append(MallocRule(rule)) | |
| 1798 else: | |
| 1799 LOGGER.error('Unknown sorter world type') | |
| 1800 | |
| 1801 def __repr__(self): | |
| 1802 result = cStringIO.StringIO() | |
| 1803 result.write('world=%s' % self._world) | |
| 1804 result.write('order=%s' % self._order) | |
| 1805 result.write('rules:') | |
| 1806 for rule in self._rules: | |
| 1807 result.write(' %s' % rule) | |
| 1808 return result.getvalue() | |
| 1809 | |
| 1810 @staticmethod | |
| 1811 def load(filename): | |
| 1812 with open(filename) as sorter_f: | |
| 1813 sorter_dict = json.load(sorter_f) | |
| 1814 if sorter_dict['world'] == 'vm': | |
| 1815 return VMSorter(sorter_dict) | |
| 1816 elif sorter_dict['world'] == 'malloc': | |
| 1817 return MallocSorter(sorter_dict) | |
| 1818 else: | |
| 1819 LOGGER.error('Unknown sorter world type') | |
| 1820 return None | |
| 1821 | |
| 1822 @property | |
| 1823 def world(self): | |
| 1824 return self._world | |
| 1825 | |
| 1826 @property | |
| 1827 def name(self): | |
| 1828 return self._name | |
| 1829 | |
| 1830 def find(self, unit): | |
| 1831 raise NotImplementedError() | |
| 1832 | |
| 1833 def find_rule(self, name): | |
| 1834 """Finds a rule whose name is |name|. """ | |
| 1835 for rule in self._rules: | |
| 1836 if rule.name == name: | |
| 1837 return rule | |
| 1838 return None | |
| 1839 | |
| 1840 | |
| 1841 class VMSorter(AbstractSorter): | |
| 1842 """Represents a Sorter for memory regions on virtual memory.""" | |
| 1843 def __init__(self, dct): | |
| 1844 assert dct['world'] == 'vm' | |
| 1845 super(VMSorter, self).__init__(dct) | |
| 1846 | |
| 1847 def find(self, unit): | |
| 1848 for rule in self._rules: | |
| 1849 if rule.match(unit): | |
| 1850 return rule | |
| 1851 assert False | |
| 1852 | |
| 1853 | |
| 1854 class MallocSorter(AbstractSorter): | |
| 1855 """Represents a Sorter for malloc'ed blocks.""" | |
| 1856 def __init__(self, dct): | |
| 1857 assert dct['world'] == 'malloc' | |
| 1858 super(MallocSorter, self).__init__(dct) | |
| 1859 self._no_bucket_rule = NoBucketMallocRule() | |
| 1860 | |
| 1861 def find(self, unit): | |
| 1862 if not unit.bucket: | |
| 1863 return self._no_bucket_rule | |
| 1864 assert unit.bucket.allocator_type == 'malloc' | |
| 1865 | |
| 1866 if unit.bucket.component_cache: | |
| 1867 return unit.bucket.component_cache | |
| 1868 | |
| 1869 for rule in self._rules: | |
| 1870 if rule.match(unit): | |
| 1871 unit.bucket.component_cache = rule | |
| 1872 return rule | |
| 1873 assert False | |
| 1874 | |
| 1875 | |
| 1876 class SorterSet(object): | |
| 1877 """Represents an iterable set of Sorters.""" | |
| 1878 def __init__(self, additional=None, default=None): | |
| 1879 if not additional: | |
| 1880 additional = [] | |
| 1881 if not default: | |
| 1882 default = DEFAULT_SORTERS | |
| 1883 self._sorters = {} | |
| 1884 for filename in default + additional: | |
| 1885 sorter = AbstractSorter.load(filename) | |
| 1886 if sorter.world not in self._sorters: | |
| 1887 self._sorters[sorter.world] = [] | |
| 1888 self._sorters[sorter.world].append(sorter) | |
| 1889 | |
| 1890 def __repr__(self): | |
| 1891 result = cStringIO.StringIO() | |
| 1892 result.write(self._sorters) | |
| 1893 return result.getvalue() | |
| 1894 | |
| 1895 def __iter__(self): | |
| 1896 for sorters in self._sorters.itervalues(): | |
| 1897 for sorter in sorters: | |
| 1898 yield sorter | |
| 1899 | |
| 1900 def iter_world(self, world): | |
| 1901 for sorter in self._sorters.get(world, []): | |
| 1902 yield sorter | |
| 1903 | |
| 1904 | |
| 1905 class Command(object): | |
| 1906 """Subclasses are a subcommand for this executable. | |
| 1907 | |
| 1908 See COMMANDS in main(). | |
| 1909 """ | |
| 1910 _DEVICE_LIB_BASEDIRS = ['/data/data/', '/data/app-lib/', '/data/local/tmp'] | |
| 1911 | |
| 1912 def __init__(self, usage): | |
| 1913 self._parser = optparse.OptionParser(usage) | |
| 1914 | |
| 1915 @staticmethod | |
| 1916 def load_basic_files( | |
| 1917 dump_path, multiple, no_dump=False, alternative_dirs=None): | |
| 1918 prefix = Command._find_prefix(dump_path) | |
| 1919 # If the target process is estimated to be working on Android, converts | |
| 1920 # a path in the Android device to a path estimated to be corresponding in | |
| 1921 # the host. Use --alternative-dirs to specify the conversion manually. | |
| 1922 if not alternative_dirs: | |
| 1923 alternative_dirs = Command._estimate_alternative_dirs(prefix) | |
| 1924 if alternative_dirs: | |
| 1925 for device, host in alternative_dirs.iteritems(): | |
| 1926 LOGGER.info('Assuming %s on device as %s on host' % (device, host)) | |
| 1927 symbol_data_sources = SymbolDataSources(prefix, alternative_dirs) | |
| 1928 symbol_data_sources.prepare() | |
| 1929 bucket_set = BucketSet() | |
| 1930 bucket_set.load(prefix) | |
| 1931 if not no_dump: | |
| 1932 if multiple: | |
| 1933 dump_list = DumpList.load(Command._find_all_dumps(dump_path)) | |
| 1934 else: | |
| 1935 dump = Dump.load(dump_path) | |
| 1936 symbol_mapping_cache = SymbolMappingCache() | |
| 1937 with open(prefix + '.cache.function', 'a+') as cache_f: | |
| 1938 symbol_mapping_cache.update( | |
| 1939 FUNCTION_SYMBOLS, bucket_set, | |
| 1940 SymbolFinder(FUNCTION_SYMBOLS, symbol_data_sources), cache_f) | |
| 1941 with open(prefix + '.cache.typeinfo', 'a+') as cache_f: | |
| 1942 symbol_mapping_cache.update( | |
| 1943 TYPEINFO_SYMBOLS, bucket_set, | |
| 1944 SymbolFinder(TYPEINFO_SYMBOLS, symbol_data_sources), cache_f) | |
| 1945 with open(prefix + '.cache.sourcefile', 'a+') as cache_f: | |
| 1946 symbol_mapping_cache.update( | |
| 1947 SOURCEFILE_SYMBOLS, bucket_set, | |
| 1948 SymbolFinder(SOURCEFILE_SYMBOLS, symbol_data_sources), cache_f) | |
| 1949 bucket_set.symbolize(symbol_mapping_cache) | |
| 1950 if no_dump: | |
| 1951 return bucket_set | |
| 1952 elif multiple: | |
| 1953 return (bucket_set, dump_list) | |
| 1954 else: | |
| 1955 return (bucket_set, dump) | |
| 1956 | |
| 1957 @staticmethod | |
| 1958 def _find_prefix(path): | |
| 1959 return re.sub('\.[0-9][0-9][0-9][0-9]\.heap', '', path) | |
| 1960 | |
| 1961 @staticmethod | |
| 1962 def _estimate_alternative_dirs(prefix): | |
| 1963 """Estimates a path in host from a corresponding path in target device. | |
| 1964 | |
| 1965 For Android, dmprof.py should find symbol information from binaries in | |
| 1966 the host instead of the Android device because dmprof.py doesn't run on | |
| 1967 the Android device. This method estimates a path in the host | |
| 1968 corresponding to a path in the Android device. | |
| 1969 | |
| 1970 Returns: | |
| 1971 A dict that maps a path in the Android device to a path in the host. | |
| 1972 If a file in Command._DEVICE_LIB_BASEDIRS is found in /proc/maps, it | |
| 1973 assumes the process was running on Android and maps the path to | |
| 1974 "out/Debug/lib" in the Chromium directory. An empty dict is returned | |
| 1975 unless Android. | |
| 1976 """ | |
| 1977 device_lib_path_candidates = set() | |
| 1978 | |
| 1979 with open(prefix + '.maps') as maps_f: | |
| 1980 maps = proc_maps.ProcMaps.load(maps_f) | |
| 1981 for entry in maps: | |
| 1982 name = entry.as_dict()['name'] | |
| 1983 if any([base_dir in name for base_dir in Command._DEVICE_LIB_BASEDIRS]): | |
| 1984 device_lib_path_candidates.add(os.path.dirname(name)) | |
| 1985 | |
| 1986 if len(device_lib_path_candidates) == 1: | |
| 1987 return {device_lib_path_candidates.pop(): os.path.join( | |
| 1988 CHROME_SRC_PATH, 'out', 'Debug', 'lib')} | |
| 1989 else: | |
| 1990 return {} | |
| 1991 | |
| 1992 @staticmethod | |
| 1993 def _find_all_dumps(dump_path): | |
| 1994 prefix = Command._find_prefix(dump_path) | |
| 1995 dump_path_list = [dump_path] | |
| 1996 | |
| 1997 n = int(dump_path[len(dump_path) - 9 : len(dump_path) - 5]) | |
| 1998 n += 1 | |
| 1999 skipped = 0 | |
| 2000 while True: | |
| 2001 p = '%s.%04d.heap' % (prefix, n) | |
| 2002 if os.path.exists(p) and os.stat(p).st_size: | |
| 2003 dump_path_list.append(p) | |
| 2004 else: | |
| 2005 if skipped > 10: | |
| 2006 break | |
| 2007 skipped += 1 | |
| 2008 n += 1 | |
| 2009 | |
| 2010 return dump_path_list | |
| 2011 | |
| 2012 @staticmethod | |
| 2013 def _find_all_buckets(dump_path): | |
| 2014 prefix = Command._find_prefix(dump_path) | |
| 2015 bucket_path_list = [] | |
| 2016 | |
| 2017 n = 0 | |
| 2018 while True: | |
| 2019 path = '%s.%04d.buckets' % (prefix, n) | |
| 2020 if not os.path.exists(path): | |
| 2021 if n > 10: | |
| 2022 break | |
| 2023 n += 1 | |
| 2024 continue | |
| 2025 bucket_path_list.append(path) | |
| 2026 n += 1 | |
| 2027 | |
| 2028 return bucket_path_list | |
| 2029 | |
| 2030 def _parse_args(self, sys_argv, required): | |
| 2031 options, args = self._parser.parse_args(sys_argv) | |
| 2032 if len(args) < required + 1: | |
| 2033 self._parser.error('needs %d argument(s).\n' % required) | |
| 2034 return None | |
| 2035 return (options, args) | |
| 2036 | |
| 2037 @staticmethod | |
| 2038 def _parse_policy_list(options_policy): | |
| 2039 if options_policy: | |
| 2040 return options_policy.split(',') | |
| 2041 else: | |
| 2042 return None | |
| 2043 | |
| 2044 | |
| 2045 class BucketsCommand(Command): | |
| 2046 def __init__(self): | |
| 2047 super(BucketsCommand, self).__init__('Usage: %prog buckets <first-dump>') | |
| 2048 | |
| 2049 def do(self, sys_argv, out=sys.stdout): | |
| 2050 _, args = self._parse_args(sys_argv, 1) | |
| 2051 dump_path = args[1] | |
| 2052 bucket_set = Command.load_basic_files(dump_path, True, True) | |
| 2053 | |
| 2054 BucketsCommand._output(bucket_set, out) | |
| 2055 return 0 | |
| 2056 | |
| 2057 @staticmethod | |
| 2058 def _output(bucket_set, out): | |
| 2059 """Prints all buckets with resolving symbols. | |
| 2060 | |
| 2061 Args: | |
| 2062 bucket_set: A BucketSet object. | |
| 2063 out: An IO object to output. | |
| 2064 """ | |
| 2065 for bucket_id, bucket in sorted(bucket_set): | |
| 2066 out.write('%d: %s\n' % (bucket_id, bucket)) | |
| 2067 | |
| 2068 | |
| 2069 class StacktraceCommand(Command): | |
| 2070 def __init__(self): | |
| 2071 super(StacktraceCommand, self).__init__( | |
| 2072 'Usage: %prog stacktrace <dump>') | |
| 2073 | |
| 2074 def do(self, sys_argv): | |
| 2075 _, args = self._parse_args(sys_argv, 1) | |
| 2076 dump_path = args[1] | |
| 2077 (bucket_set, dump) = Command.load_basic_files(dump_path, False) | |
| 2078 | |
| 2079 StacktraceCommand._output(dump, bucket_set, sys.stdout) | |
| 2080 return 0 | |
| 2081 | |
| 2082 @staticmethod | |
| 2083 def _output(dump, bucket_set, out): | |
| 2084 """Outputs a given stacktrace. | |
| 2085 | |
| 2086 Args: | |
| 2087 bucket_set: A BucketSet object. | |
| 2088 out: A file object to output. | |
| 2089 """ | |
| 2090 for line in dump.iter_stacktrace: | |
| 2091 words = line.split() | |
| 2092 bucket = bucket_set.get(int(words[BUCKET_ID])) | |
| 2093 if not bucket: | |
| 2094 continue | |
| 2095 for i in range(0, BUCKET_ID - 1): | |
| 2096 out.write(words[i] + ' ') | |
| 2097 for frame in bucket.symbolized_stackfunction: | |
| 2098 out.write(frame + ' ') | |
| 2099 out.write('\n') | |
| 2100 | |
| 2101 | |
| 2102 class PolicyCommands(Command): | |
| 2103 def __init__(self, command): | |
| 2104 super(PolicyCommands, self).__init__( | |
| 2105 'Usage: %%prog %s [-p POLICY] <first-dump> [shared-first-dumps...]' % | |
| 2106 command) | |
| 2107 self._parser.add_option('-p', '--policy', type='string', dest='policy', | |
| 2108 help='profile with POLICY', metavar='POLICY') | |
| 2109 self._parser.add_option('--alternative-dirs', dest='alternative_dirs', | |
| 2110 metavar='/path/on/target@/path/on/host[:...]', | |
| 2111 help='Read files in /path/on/host/ instead of ' | |
| 2112 'files in /path/on/target/.') | |
| 2113 | |
| 2114 def _set_up(self, sys_argv): | |
| 2115 options, args = self._parse_args(sys_argv, 1) | |
| 2116 dump_path = args[1] | |
| 2117 shared_first_dump_paths = args[2:] | |
| 2118 alternative_dirs_dict = {} | |
| 2119 if options.alternative_dirs: | |
| 2120 for alternative_dir_pair in options.alternative_dirs.split(':'): | |
| 2121 target_path, host_path = alternative_dir_pair.split('@', 1) | |
| 2122 alternative_dirs_dict[target_path] = host_path | |
| 2123 (bucket_set, dumps) = Command.load_basic_files( | |
| 2124 dump_path, True, alternative_dirs=alternative_dirs_dict) | |
| 2125 | |
| 2126 pfn_counts_dict = {} | |
| 2127 for shared_first_dump_path in shared_first_dump_paths: | |
| 2128 shared_dumps = Command._find_all_dumps(shared_first_dump_path) | |
| 2129 for shared_dump in shared_dumps: | |
| 2130 pfn_counts = PFNCounts.load(shared_dump) | |
| 2131 if pfn_counts.pid not in pfn_counts_dict: | |
| 2132 pfn_counts_dict[pfn_counts.pid] = [] | |
| 2133 pfn_counts_dict[pfn_counts.pid].append(pfn_counts) | |
| 2134 | |
| 2135 policy_set = PolicySet.load(Command._parse_policy_list(options.policy)) | |
| 2136 return policy_set, dumps, pfn_counts_dict, bucket_set | |
| 2137 | |
| 2138 @staticmethod | |
| 2139 def _apply_policy(dump, pfn_counts_dict, policy, bucket_set, first_dump_time): | |
| 2140 """Aggregates the total memory size of each component. | |
| 2141 | |
| 2142 Iterate through all stacktraces and attribute them to one of the components | |
| 2143 based on the policy. It is important to apply policy in right order. | |
| 2144 | |
| 2145 Args: | |
| 2146 dump: A Dump object. | |
| 2147 pfn_counts_dict: A dict mapping a pid to a list of PFNCounts. | |
| 2148 policy: A Policy object. | |
| 2149 bucket_set: A BucketSet object. | |
| 2150 first_dump_time: An integer representing time when the first dump is | |
| 2151 dumped. | |
| 2152 | |
| 2153 Returns: | |
| 2154 A dict mapping components and their corresponding sizes. | |
| 2155 """ | |
| 2156 LOGGER.info(' %s' % dump.path) | |
| 2157 all_pfn_dict = {} | |
| 2158 if pfn_counts_dict: | |
| 2159 LOGGER.info(' shared with...') | |
| 2160 for pid, pfnset_list in pfn_counts_dict.iteritems(): | |
| 2161 closest_pfnset_index = None | |
| 2162 closest_pfnset_difference = 1024.0 | |
| 2163 for index, pfnset in enumerate(pfnset_list): | |
| 2164 time_difference = pfnset.time - dump.time | |
| 2165 if time_difference >= 3.0: | |
| 2166 break | |
| 2167 elif ((time_difference < 0.0 and pfnset.reason != 'Exiting') or | |
| 2168 (0.0 <= time_difference and time_difference < 3.0)): | |
| 2169 closest_pfnset_index = index | |
| 2170 closest_pfnset_difference = time_difference | |
| 2171 elif time_difference < 0.0 and pfnset.reason == 'Exiting': | |
| 2172 closest_pfnset_index = None | |
| 2173 break | |
| 2174 if closest_pfnset_index: | |
| 2175 for pfn, count in pfnset_list[closest_pfnset_index].iter_pfn: | |
| 2176 all_pfn_dict[pfn] = all_pfn_dict.get(pfn, 0) + count | |
| 2177 LOGGER.info(' %s (time difference = %f)' % | |
| 2178 (pfnset_list[closest_pfnset_index].path, | |
| 2179 closest_pfnset_difference)) | |
| 2180 else: | |
| 2181 LOGGER.info(' (no match with pid:%d)' % pid) | |
| 2182 | |
| 2183 sizes = dict((c, 0) for c in policy.components) | |
| 2184 | |
| 2185 PolicyCommands._accumulate_malloc(dump, policy, bucket_set, sizes) | |
| 2186 verify_global_stats = PolicyCommands._accumulate_maps( | |
| 2187 dump, all_pfn_dict, policy, bucket_set, sizes) | |
| 2188 | |
| 2189 # TODO(dmikurube): Remove the verifying code when GLOBAL_STATS is removed. | |
| 2190 # http://crbug.com/245603. | |
| 2191 for verify_key, verify_value in verify_global_stats.iteritems(): | |
| 2192 dump_value = dump.global_stat('%s_committed' % verify_key) | |
| 2193 if dump_value != verify_value: | |
| 2194 LOGGER.warn('%25s: %12d != %d (%d)' % ( | |
| 2195 verify_key, dump_value, verify_value, dump_value - verify_value)) | |
| 2196 | |
| 2197 sizes['mmap-no-log'] = ( | |
| 2198 dump.global_stat('profiled-mmap_committed') - | |
| 2199 sizes['mmap-total-log']) | |
| 2200 sizes['mmap-total-record'] = dump.global_stat('profiled-mmap_committed') | |
| 2201 sizes['mmap-total-record-vm'] = dump.global_stat('profiled-mmap_virtual') | |
| 2202 | |
| 2203 sizes['tc-no-log'] = ( | |
| 2204 dump.global_stat('profiled-malloc_committed') - | |
| 2205 sizes['tc-total-log']) | |
| 2206 sizes['tc-total-record'] = dump.global_stat('profiled-malloc_committed') | |
| 2207 sizes['tc-unused'] = ( | |
| 2208 sizes['mmap-tcmalloc'] - | |
| 2209 dump.global_stat('profiled-malloc_committed')) | |
| 2210 if sizes['tc-unused'] < 0: | |
| 2211 LOGGER.warn(' Assuming tc-unused=0 as it is negative: %d (bytes)' % | |
| 2212 sizes['tc-unused']) | |
| 2213 sizes['tc-unused'] = 0 | |
| 2214 sizes['tc-total'] = sizes['mmap-tcmalloc'] | |
| 2215 | |
| 2216 # TODO(dmikurube): global_stat will be deprecated. | |
| 2217 # See http://crbug.com/245603. | |
| 2218 for key, value in { | |
| 2219 'total': 'total_committed', | |
| 2220 'filemapped': 'file_committed', | |
| 2221 'absent': 'absent_committed', | |
| 2222 'file-exec': 'file-exec_committed', | |
| 2223 'file-nonexec': 'file-nonexec_committed', | |
| 2224 'anonymous': 'anonymous_committed', | |
| 2225 'stack': 'stack_committed', | |
| 2226 'other': 'other_committed', | |
| 2227 'unhooked-absent': 'nonprofiled-absent_committed', | |
| 2228 'total-vm': 'total_virtual', | |
| 2229 'filemapped-vm': 'file_virtual', | |
| 2230 'anonymous-vm': 'anonymous_virtual', | |
| 2231 'other-vm': 'other_virtual' }.iteritems(): | |
| 2232 if key in sizes: | |
| 2233 sizes[key] = dump.global_stat(value) | |
| 2234 | |
| 2235 if 'mustbezero' in sizes: | |
| 2236 removed_list = ( | |
| 2237 'profiled-mmap_committed', | |
| 2238 'nonprofiled-absent_committed', | |
| 2239 'nonprofiled-anonymous_committed', | |
| 2240 'nonprofiled-file-exec_committed', | |
| 2241 'nonprofiled-file-nonexec_committed', | |
| 2242 'nonprofiled-stack_committed', | |
| 2243 'nonprofiled-other_committed') | |
| 2244 sizes['mustbezero'] = ( | |
| 2245 dump.global_stat('total_committed') - | |
| 2246 sum(dump.global_stat(removed) for removed in removed_list)) | |
| 2247 if 'total-exclude-profiler' in sizes: | |
| 2248 sizes['total-exclude-profiler'] = ( | |
| 2249 dump.global_stat('total_committed') - | |
| 2250 (sizes['mmap-profiler'] + sizes['mmap-type-profiler'])) | |
| 2251 if 'hour' in sizes: | |
| 2252 sizes['hour'] = (dump.time - first_dump_time) / 60.0 / 60.0 | |
| 2253 if 'minute' in sizes: | |
| 2254 sizes['minute'] = (dump.time - first_dump_time) / 60.0 | |
| 2255 if 'second' in sizes: | |
| 2256 sizes['second'] = dump.time - first_dump_time | |
| 2257 | |
| 2258 return sizes | |
| 2259 | |
| 2260 @staticmethod | |
| 2261 def _accumulate_malloc(dump, policy, bucket_set, sizes): | |
| 2262 for line in dump.iter_stacktrace: | |
| 2263 words = line.split() | |
| 2264 bucket = bucket_set.get(int(words[BUCKET_ID])) | |
| 2265 if not bucket or bucket.allocator_type == 'malloc': | |
| 2266 component_match = policy.find_malloc(bucket) | |
| 2267 elif bucket.allocator_type == 'mmap': | |
| 2268 continue | |
| 2269 else: | |
| 2270 assert False | |
| 2271 sizes[component_match] += int(words[COMMITTED]) | |
| 2272 | |
| 2273 assert not component_match.startswith('mmap-') | |
| 2274 if component_match.startswith('tc-'): | |
| 2275 sizes['tc-total-log'] += int(words[COMMITTED]) | |
| 2276 else: | |
| 2277 sizes['other-total-log'] += int(words[COMMITTED]) | |
| 2278 | |
| 2279 @staticmethod | |
| 2280 def _accumulate_maps(dump, pfn_dict, policy, bucket_set, sizes): | |
| 2281 # TODO(dmikurube): Remove the dict when GLOBAL_STATS is removed. | |
| 2282 # http://crbug.com/245603. | |
| 2283 global_stats = { | |
| 2284 'total': 0, | |
| 2285 'file-exec': 0, | |
| 2286 'file-nonexec': 0, | |
| 2287 'anonymous': 0, | |
| 2288 'stack': 0, | |
| 2289 'other': 0, | |
| 2290 'nonprofiled-file-exec': 0, | |
| 2291 'nonprofiled-file-nonexec': 0, | |
| 2292 'nonprofiled-anonymous': 0, | |
| 2293 'nonprofiled-stack': 0, | |
| 2294 'nonprofiled-other': 0, | |
| 2295 'profiled-mmap': 0, | |
| 2296 } | |
| 2297 | |
| 2298 for key, value in dump.iter_map: | |
| 2299 # TODO(dmikurube): Remove the subtotal code when GLOBAL_STATS is removed. | |
| 2300 # It's temporary verification code for transition described in | |
| 2301 # http://crbug.com/245603. | |
| 2302 committed = 0 | |
| 2303 if 'committed' in value[1]: | |
| 2304 committed = value[1]['committed'] | |
| 2305 global_stats['total'] += committed | |
| 2306 key = 'other' | |
| 2307 name = value[1]['vma']['name'] | |
| 2308 if name.startswith('/'): | |
| 2309 if value[1]['vma']['executable'] == 'x': | |
| 2310 key = 'file-exec' | |
| 2311 else: | |
| 2312 key = 'file-nonexec' | |
| 2313 elif name == '[stack]': | |
| 2314 key = 'stack' | |
| 2315 elif name == '': | |
| 2316 key = 'anonymous' | |
| 2317 global_stats[key] += committed | |
| 2318 if value[0] == 'unhooked': | |
| 2319 global_stats['nonprofiled-' + key] += committed | |
| 2320 if value[0] == 'hooked': | |
| 2321 global_stats['profiled-mmap'] += committed | |
| 2322 | |
| 2323 if value[0] == 'unhooked': | |
| 2324 if pfn_dict and dump.pageframe_length: | |
| 2325 for pageframe in value[1]['pageframe']: | |
| 2326 component_match = policy.find_unhooked(value, pageframe, pfn_dict) | |
| 2327 sizes[component_match] += pageframe.size | |
| 2328 else: | |
| 2329 component_match = policy.find_unhooked(value) | |
| 2330 sizes[component_match] += int(value[1]['committed']) | |
| 2331 elif value[0] == 'hooked': | |
| 2332 if pfn_dict and dump.pageframe_length: | |
| 2333 for pageframe in value[1]['pageframe']: | |
| 2334 component_match, _ = policy.find_mmap( | |
| 2335 value, bucket_set, pageframe, pfn_dict) | |
| 2336 sizes[component_match] += pageframe.size | |
| 2337 assert not component_match.startswith('tc-') | |
| 2338 if component_match.startswith('mmap-'): | |
| 2339 sizes['mmap-total-log'] += pageframe.size | |
| 2340 else: | |
| 2341 sizes['other-total-log'] += pageframe.size | |
| 2342 else: | |
| 2343 component_match, _ = policy.find_mmap(value, bucket_set) | |
| 2344 sizes[component_match] += int(value[1]['committed']) | |
| 2345 if component_match.startswith('mmap-'): | |
| 2346 sizes['mmap-total-log'] += int(value[1]['committed']) | |
| 2347 else: | |
| 2348 sizes['other-total-log'] += int(value[1]['committed']) | |
| 2349 else: | |
| 2350 LOGGER.error('Unrecognized mapping status: %s' % value[0]) | |
| 2351 | |
| 2352 return global_stats | |
| 2353 | |
| 2354 | |
| 2355 class CSVCommand(PolicyCommands): | |
| 2356 def __init__(self): | |
| 2357 super(CSVCommand, self).__init__('csv') | |
| 2358 | |
| 2359 def do(self, sys_argv): | |
| 2360 policy_set, dumps, pfn_counts_dict, bucket_set = self._set_up(sys_argv) | |
| 2361 return CSVCommand._output( | |
| 2362 policy_set, dumps, pfn_counts_dict, bucket_set, sys.stdout) | |
| 2363 | |
| 2364 @staticmethod | |
| 2365 def _output(policy_set, dumps, pfn_counts_dict, bucket_set, out): | |
| 2366 max_components = 0 | |
| 2367 for label in policy_set: | |
| 2368 max_components = max(max_components, len(policy_set[label].components)) | |
| 2369 | |
| 2370 for label in sorted(policy_set): | |
| 2371 components = policy_set[label].components | |
| 2372 if len(policy_set) > 1: | |
| 2373 out.write('%s%s\n' % (label, ',' * (max_components - 1))) | |
| 2374 out.write('%s%s\n' % ( | |
| 2375 ','.join(components), ',' * (max_components - len(components)))) | |
| 2376 | |
| 2377 LOGGER.info('Applying a policy %s to...' % label) | |
| 2378 for dump in dumps: | |
| 2379 component_sizes = PolicyCommands._apply_policy( | |
| 2380 dump, pfn_counts_dict, policy_set[label], bucket_set, dumps[0].time) | |
| 2381 s = [] | |
| 2382 for c in components: | |
| 2383 if c in ('hour', 'minute', 'second'): | |
| 2384 s.append('%05.5f' % (component_sizes[c])) | |
| 2385 else: | |
| 2386 s.append('%05.5f' % (component_sizes[c] / 1024.0 / 1024.0)) | |
| 2387 out.write('%s%s\n' % ( | |
| 2388 ','.join(s), ',' * (max_components - len(components)))) | |
| 2389 | |
| 2390 bucket_set.clear_component_cache() | |
| 2391 | |
| 2392 return 0 | |
| 2393 | |
| 2394 | |
| 2395 class JSONCommand(PolicyCommands): | |
| 2396 def __init__(self): | |
| 2397 super(JSONCommand, self).__init__('json') | |
| 2398 | |
| 2399 def do(self, sys_argv): | |
| 2400 policy_set, dumps, pfn_counts_dict, bucket_set = self._set_up(sys_argv) | |
| 2401 return JSONCommand._output( | |
| 2402 policy_set, dumps, pfn_counts_dict, bucket_set, sys.stdout) | |
| 2403 | |
| 2404 @staticmethod | |
| 2405 def _output(policy_set, dumps, pfn_counts_dict, bucket_set, out): | |
| 2406 json_base = { | |
| 2407 'version': 'JSON_DEEP_2', | |
| 2408 'policies': {}, | |
| 2409 } | |
| 2410 | |
| 2411 for label in sorted(policy_set): | |
| 2412 json_base['policies'][label] = { | |
| 2413 'legends': policy_set[label].components, | |
| 2414 'snapshots': [], | |
| 2415 } | |
| 2416 | |
| 2417 LOGGER.info('Applying a policy %s to...' % label) | |
| 2418 for dump in dumps: | |
| 2419 component_sizes = PolicyCommands._apply_policy( | |
| 2420 dump, pfn_counts_dict, policy_set[label], bucket_set, dumps[0].time) | |
| 2421 component_sizes['dump_path'] = dump.path | |
| 2422 component_sizes['dump_time'] = datetime.datetime.fromtimestamp( | |
| 2423 dump.time).strftime('%Y-%m-%d %H:%M:%S') | |
| 2424 json_base['policies'][label]['snapshots'].append(component_sizes) | |
| 2425 | |
| 2426 bucket_set.clear_component_cache() | |
| 2427 | |
| 2428 json.dump(json_base, out, indent=2, sort_keys=True) | |
| 2429 | |
| 2430 return 0 | |
| 2431 | |
| 2432 | |
| 2433 class ListCommand(PolicyCommands): | |
| 2434 def __init__(self): | |
| 2435 super(ListCommand, self).__init__('list') | |
| 2436 | |
| 2437 def do(self, sys_argv): | |
| 2438 policy_set, dumps, pfn_counts_dict, bucket_set = self._set_up(sys_argv) | |
| 2439 return ListCommand._output( | |
| 2440 policy_set, dumps, pfn_counts_dict, bucket_set, sys.stdout) | |
| 2441 | |
| 2442 @staticmethod | |
| 2443 def _output(policy_set, dumps, pfn_counts_dict, bucket_set, out): | |
| 2444 for label in sorted(policy_set): | |
| 2445 LOGGER.info('Applying a policy %s to...' % label) | |
| 2446 for dump in dumps: | |
| 2447 component_sizes = PolicyCommands._apply_policy( | |
| 2448 dump, pfn_counts_dict, policy_set[label], bucket_set, dump.time) | |
| 2449 out.write('%s for %s:\n' % (label, dump.path)) | |
| 2450 for c in policy_set[label].components: | |
| 2451 if c in ['hour', 'minute', 'second']: | |
| 2452 out.write('%40s %12.3f\n' % (c, component_sizes[c])) | |
| 2453 else: | |
| 2454 out.write('%40s %12d\n' % (c, component_sizes[c])) | |
| 2455 | |
| 2456 bucket_set.clear_component_cache() | |
| 2457 | |
| 2458 return 0 | |
| 2459 | |
| 2460 | |
| 2461 class MapCommand(Command): | |
| 2462 def __init__(self): | |
| 2463 super(MapCommand, self).__init__('Usage: %prog map <first-dump> <policy>') | |
| 2464 | |
| 2465 def do(self, sys_argv, out=sys.stdout): | |
| 2466 _, args = self._parse_args(sys_argv, 2) | |
| 2467 dump_path = args[1] | |
| 2468 target_policy = args[2] | |
| 2469 (bucket_set, dumps) = Command.load_basic_files(dump_path, True) | |
| 2470 policy_set = PolicySet.load(Command._parse_policy_list(target_policy)) | |
| 2471 | |
| 2472 MapCommand._output(dumps, bucket_set, policy_set[target_policy], out) | |
| 2473 return 0 | |
| 2474 | |
| 2475 @staticmethod | |
| 2476 def _output(dumps, bucket_set, policy, out): | |
| 2477 """Prints all stacktraces in a given component of given depth. | |
| 2478 | |
| 2479 Args: | |
| 2480 dumps: A list of Dump objects. | |
| 2481 bucket_set: A BucketSet object. | |
| 2482 policy: A Policy object. | |
| 2483 out: An IO object to output. | |
| 2484 """ | |
| 2485 max_dump_count = 0 | |
| 2486 range_dict = ExclusiveRangeDict(ListAttribute) | |
| 2487 for dump in dumps: | |
| 2488 max_dump_count = max(max_dump_count, dump.count) | |
| 2489 for key, value in dump.iter_map: | |
| 2490 for begin, end, attr in range_dict.iter_range(key[0], key[1]): | |
| 2491 attr[dump.count] = value | |
| 2492 | |
| 2493 max_dump_count_digit = len(str(max_dump_count)) | |
| 2494 for begin, end, attr in range_dict.iter_range(): | |
| 2495 out.write('%x-%x\n' % (begin, end)) | |
| 2496 if len(attr) < max_dump_count: | |
| 2497 attr[max_dump_count] = None | |
| 2498 for index, value in enumerate(attr[1:]): | |
| 2499 out.write(' #%0*d: ' % (max_dump_count_digit, index + 1)) | |
| 2500 if not value: | |
| 2501 out.write('None\n') | |
| 2502 elif value[0] == 'hooked': | |
| 2503 component_match, _ = policy.find_mmap(value, bucket_set) | |
| 2504 out.write('%s @ %d\n' % (component_match, value[1]['bucket_id'])) | |
| 2505 else: | |
| 2506 component_match = policy.find_unhooked(value) | |
| 2507 region_info = value[1] | |
| 2508 size = region_info['committed'] | |
| 2509 out.write('%s [%d bytes] %s%s%s%s %s\n' % ( | |
| 2510 component_match, size, value[1]['vma']['readable'], | |
| 2511 value[1]['vma']['writable'], value[1]['vma']['executable'], | |
| 2512 value[1]['vma']['private'], value[1]['vma']['name'])) | |
| 2513 | |
| 2514 | |
| 2515 class ExpandCommand(Command): | |
| 2516 def __init__(self): | |
| 2517 super(ExpandCommand, self).__init__( | |
| 2518 'Usage: %prog expand <dump> <policy> <component> <depth>') | |
| 2519 | |
| 2520 def do(self, sys_argv): | |
| 2521 _, args = self._parse_args(sys_argv, 4) | |
| 2522 dump_path = args[1] | |
| 2523 target_policy = args[2] | |
| 2524 component_name = args[3] | |
| 2525 depth = args[4] | |
| 2526 (bucket_set, dump) = Command.load_basic_files(dump_path, False) | |
| 2527 policy_set = PolicySet.load(Command._parse_policy_list(target_policy)) | |
| 2528 | |
| 2529 ExpandCommand._output(dump, policy_set[target_policy], bucket_set, | |
| 2530 component_name, int(depth), sys.stdout) | |
| 2531 return 0 | |
| 2532 | |
| 2533 @staticmethod | |
| 2534 def _output(dump, policy, bucket_set, component_name, depth, out): | |
| 2535 """Prints all stacktraces in a given component of given depth. | |
| 2536 | |
| 2537 Args: | |
| 2538 dump: A Dump object. | |
| 2539 policy: A Policy object. | |
| 2540 bucket_set: A BucketSet object. | |
| 2541 component_name: A name of component for filtering. | |
| 2542 depth: An integer representing depth to be printed. | |
| 2543 out: An IO object to output. | |
| 2544 """ | |
| 2545 sizes = {} | |
| 2546 | |
| 2547 ExpandCommand._accumulate( | |
| 2548 dump, policy, bucket_set, component_name, depth, sizes) | |
| 2549 | |
| 2550 sorted_sizes_list = sorted( | |
| 2551 sizes.iteritems(), key=(lambda x: x[1]), reverse=True) | |
| 2552 total = 0 | |
| 2553 # TODO(dmikurube): Better formatting. | |
| 2554 for size_pair in sorted_sizes_list: | |
| 2555 out.write('%10d %s\n' % (size_pair[1], size_pair[0])) | |
| 2556 total += size_pair[1] | |
| 2557 LOGGER.info('total: %d\n' % total) | |
| 2558 | |
| 2559 @staticmethod | |
| 2560 def _add_size(precedence, bucket, depth, committed, sizes): | |
| 2561 stacktrace_sequence = precedence | |
| 2562 for function, sourcefile in zip( | |
| 2563 bucket.symbolized_stackfunction[ | |
| 2564 0 : min(len(bucket.symbolized_stackfunction), 1 + depth)], | |
| 2565 bucket.symbolized_stacksourcefile[ | |
| 2566 0 : min(len(bucket.symbolized_stacksourcefile), 1 + depth)]): | |
| 2567 stacktrace_sequence += '%s(@%s) ' % (function, sourcefile) | |
| 2568 if not stacktrace_sequence in sizes: | |
| 2569 sizes[stacktrace_sequence] = 0 | |
| 2570 sizes[stacktrace_sequence] += committed | |
| 2571 | |
| 2572 @staticmethod | |
| 2573 def _accumulate(dump, policy, bucket_set, component_name, depth, sizes): | |
| 2574 rule = policy.find_rule(component_name) | |
| 2575 if not rule: | |
| 2576 pass | |
| 2577 elif rule.allocator_type == 'malloc': | |
| 2578 for line in dump.iter_stacktrace: | |
| 2579 words = line.split() | |
| 2580 bucket = bucket_set.get(int(words[BUCKET_ID])) | |
| 2581 if not bucket or bucket.allocator_type == 'malloc': | |
| 2582 component_match = policy.find_malloc(bucket) | |
| 2583 elif bucket.allocator_type == 'mmap': | |
| 2584 continue | |
| 2585 else: | |
| 2586 assert False | |
| 2587 if component_match == component_name: | |
| 2588 precedence = '' | |
| 2589 precedence += '(alloc=%d) ' % int(words[ALLOC_COUNT]) | |
| 2590 precedence += '(free=%d) ' % int(words[FREE_COUNT]) | |
| 2591 if bucket.typeinfo: | |
| 2592 precedence += '(type=%s) ' % bucket.symbolized_typeinfo | |
| 2593 precedence += '(type.name=%s) ' % bucket.typeinfo_name | |
| 2594 ExpandCommand._add_size(precedence, bucket, depth, | |
| 2595 int(words[COMMITTED]), sizes) | |
| 2596 elif rule.allocator_type == 'mmap': | |
| 2597 for _, region in dump.iter_map: | |
| 2598 if region[0] != 'hooked': | |
| 2599 continue | |
| 2600 component_match, bucket = policy.find_mmap(region, bucket_set) | |
| 2601 if component_match == component_name: | |
| 2602 ExpandCommand._add_size('', bucket, depth, | |
| 2603 region[1]['committed'], sizes) | |
| 2604 | |
| 2605 | |
| 2606 class PProfCommand(Command): | |
| 2607 def __init__(self): | |
| 2608 super(PProfCommand, self).__init__( | |
| 2609 'Usage: %prog pprof [-c COMPONENT] <dump> <policy>') | |
| 2610 self._parser.add_option('-c', '--component', type='string', | |
| 2611 dest='component', | |
| 2612 help='restrict to COMPONENT', metavar='COMPONENT') | |
| 2613 | |
| 2614 def do(self, sys_argv): | |
| 2615 options, args = self._parse_args(sys_argv, 2) | |
| 2616 | |
| 2617 dump_path = args[1] | |
| 2618 target_policy = args[2] | |
| 2619 component = options.component | |
| 2620 | |
| 2621 (bucket_set, dump) = Command.load_basic_files(dump_path, False) | |
| 2622 policy_set = PolicySet.load(Command._parse_policy_list(target_policy)) | |
| 2623 | |
| 2624 with open(Command._find_prefix(dump_path) + '.maps', 'r') as maps_f: | |
| 2625 maps_lines = maps_f.readlines() | |
| 2626 PProfCommand._output( | |
| 2627 dump, policy_set[target_policy], bucket_set, maps_lines, component, | |
| 2628 sys.stdout) | |
| 2629 | |
| 2630 return 0 | |
| 2631 | |
| 2632 @staticmethod | |
| 2633 def _output(dump, policy, bucket_set, maps_lines, component_name, out): | |
| 2634 """Converts the heap profile dump so it can be processed by pprof. | |
| 2635 | |
| 2636 Args: | |
| 2637 dump: A Dump object. | |
| 2638 policy: A Policy object. | |
| 2639 bucket_set: A BucketSet object. | |
| 2640 maps_lines: A list of strings containing /proc/.../maps. | |
| 2641 component_name: A name of component for filtering. | |
| 2642 out: An IO object to output. | |
| 2643 """ | |
| 2644 out.write('heap profile: ') | |
| 2645 com_committed, com_allocs = PProfCommand._accumulate( | |
| 2646 dump, policy, bucket_set, component_name) | |
| 2647 | |
| 2648 out.write('%6d: %8s [%6d: %8s] @ heapprofile\n' % ( | |
| 2649 com_allocs, com_committed, com_allocs, com_committed)) | |
| 2650 | |
| 2651 PProfCommand._output_stacktrace_lines( | |
| 2652 dump, policy, bucket_set, component_name, out) | |
| 2653 | |
| 2654 out.write('MAPPED_LIBRARIES:\n') | |
| 2655 for line in maps_lines: | |
| 2656 out.write(line) | |
| 2657 | |
| 2658 @staticmethod | |
| 2659 def _accumulate(dump, policy, bucket_set, component_name): | |
| 2660 """Accumulates size of committed chunks and the number of allocated chunks. | |
| 2661 | |
| 2662 Args: | |
| 2663 dump: A Dump object. | |
| 2664 policy: A Policy object. | |
| 2665 bucket_set: A BucketSet object. | |
| 2666 component_name: A name of component for filtering. | |
| 2667 | |
| 2668 Returns: | |
| 2669 Two integers which are the accumulated size of committed regions and the | |
| 2670 number of allocated chunks, respectively. | |
| 2671 """ | |
| 2672 com_committed = 0 | |
| 2673 com_allocs = 0 | |
| 2674 | |
| 2675 for _, region in dump.iter_map: | |
| 2676 if region[0] != 'hooked': | |
| 2677 continue | |
| 2678 component_match, bucket = policy.find_mmap(region, bucket_set) | |
| 2679 | |
| 2680 if (component_name and component_name != component_match) or ( | |
| 2681 region[1]['committed'] == 0): | |
| 2682 continue | |
| 2683 | |
| 2684 com_committed += region[1]['committed'] | |
| 2685 com_allocs += 1 | |
| 2686 | |
| 2687 for line in dump.iter_stacktrace: | |
| 2688 words = line.split() | |
| 2689 bucket = bucket_set.get(int(words[BUCKET_ID])) | |
| 2690 if not bucket or bucket.allocator_type == 'malloc': | |
| 2691 component_match = policy.find_malloc(bucket) | |
| 2692 elif bucket.allocator_type == 'mmap': | |
| 2693 continue | |
| 2694 else: | |
| 2695 assert False | |
| 2696 if (not bucket or | |
| 2697 (component_name and component_name != component_match)): | |
| 2698 continue | |
| 2699 | |
| 2700 com_committed += int(words[COMMITTED]) | |
| 2701 com_allocs += int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]) | |
| 2702 | |
| 2703 return com_committed, com_allocs | |
| 2704 | |
| 2705 @staticmethod | |
| 2706 def _output_stacktrace_lines(dump, policy, bucket_set, component_name, out): | |
| 2707 """Prints information of stacktrace lines for pprof. | |
| 2708 | |
| 2709 Args: | |
| 2710 dump: A Dump object. | |
| 2711 policy: A Policy object. | |
| 2712 bucket_set: A BucketSet object. | |
| 2713 component_name: A name of component for filtering. | |
| 2714 out: An IO object to output. | |
| 2715 """ | |
| 2716 for _, region in dump.iter_map: | |
| 2717 if region[0] != 'hooked': | |
| 2718 continue | |
| 2719 component_match, bucket = policy.find_mmap(region, bucket_set) | |
| 2720 | |
| 2721 if (component_name and component_name != component_match) or ( | |
| 2722 region[1]['committed'] == 0): | |
| 2723 continue | |
| 2724 | |
| 2725 out.write(' 1: %8s [ 1: %8s] @' % ( | |
| 2726 region[1]['committed'], region[1]['committed'])) | |
| 2727 for address in bucket.stacktrace: | |
| 2728 out.write(' 0x%016x' % address) | |
| 2729 out.write('\n') | |
| 2730 | |
| 2731 for line in dump.iter_stacktrace: | |
| 2732 words = line.split() | |
| 2733 bucket = bucket_set.get(int(words[BUCKET_ID])) | |
| 2734 if not bucket or bucket.allocator_type == 'malloc': | |
| 2735 component_match = policy.find_malloc(bucket) | |
| 2736 elif bucket.allocator_type == 'mmap': | |
| 2737 continue | |
| 2738 else: | |
| 2739 assert False | |
| 2740 if (not bucket or | |
| 2741 (component_name and component_name != component_match)): | |
| 2742 continue | |
| 2743 | |
| 2744 out.write('%6d: %8s [%6d: %8s] @' % ( | |
| 2745 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]), | |
| 2746 words[COMMITTED], | |
| 2747 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]), | |
| 2748 words[COMMITTED])) | |
| 2749 for address in bucket.stacktrace: | |
| 2750 out.write(' 0x%016x' % address) | |
| 2751 out.write('\n') | |
| 2752 | |
| 2753 | |
| 2754 class UploadCommand(Command): | |
| 2755 def __init__(self): | |
| 2756 super(UploadCommand, self).__init__( | |
| 2757 'Usage: %prog upload [--gsutil path/to/gsutil] ' | |
| 2758 '<first-dump> <destination-gs-path>') | |
| 2759 self._parser.add_option('--gsutil', default='gsutil', | |
| 2760 help='path to GSUTIL', metavar='GSUTIL') | |
| 2761 | |
| 2762 def do(self, sys_argv): | |
| 2763 options, args = self._parse_args(sys_argv, 2) | |
| 2764 dump_path = args[1] | |
| 2765 gs_path = args[2] | |
| 2766 | |
| 2767 dump_files = Command._find_all_dumps(dump_path) | |
| 2768 bucket_files = Command._find_all_buckets(dump_path) | |
| 2769 prefix = Command._find_prefix(dump_path) | |
| 2770 symbol_data_sources = SymbolDataSources(prefix) | |
| 2771 symbol_data_sources.prepare() | |
| 2772 symbol_path = symbol_data_sources.path() | |
| 2773 | |
| 2774 handle_zip, filename_zip = tempfile.mkstemp('.zip', 'dmprof') | |
| 2775 os.close(handle_zip) | |
| 2776 | |
| 2777 try: | |
| 2778 file_zip = zipfile.ZipFile(filename_zip, 'w', zipfile.ZIP_DEFLATED) | |
| 2779 for filename in dump_files: | |
| 2780 file_zip.write(filename, os.path.basename(os.path.abspath(filename))) | |
| 2781 for filename in bucket_files: | |
| 2782 file_zip.write(filename, os.path.basename(os.path.abspath(filename))) | |
| 2783 | |
| 2784 symbol_basename = os.path.basename(os.path.abspath(symbol_path)) | |
| 2785 for filename in os.listdir(symbol_path): | |
| 2786 if not filename.startswith('.'): | |
| 2787 file_zip.write(os.path.join(symbol_path, filename), | |
| 2788 os.path.join(symbol_basename, os.path.basename( | |
| 2789 os.path.abspath(filename)))) | |
| 2790 file_zip.close() | |
| 2791 | |
| 2792 returncode = UploadCommand._run_gsutil( | |
| 2793 options.gsutil, 'cp', '-a', 'public-read', filename_zip, gs_path) | |
| 2794 finally: | |
| 2795 os.remove(filename_zip) | |
| 2796 | |
| 2797 return returncode | |
| 2798 | |
| 2799 @staticmethod | |
| 2800 def _run_gsutil(gsutil, *args): | |
| 2801 """Run gsutil as a subprocess. | |
| 2802 | |
| 2803 Args: | |
| 2804 *args: Arguments to pass to gsutil. The first argument should be an | |
| 2805 operation such as ls, cp or cat. | |
| 2806 Returns: | |
| 2807 The return code from the process. | |
| 2808 """ | |
| 2809 command = [gsutil] + list(args) | |
| 2810 LOGGER.info("Running: %s", command) | |
| 2811 | |
| 2812 try: | |
| 2813 return subprocess.call(command) | |
| 2814 except OSError, e: | |
| 2815 LOGGER.error('Error to run gsutil: %s', e) | |
| 2816 | |
| 2817 | |
| 2818 class CatCommand(Command): | |
| 2819 def __init__(self): | |
| 2820 super(CatCommand, self).__init__('Usage: %prog cat <first-dump>') | |
| 2821 self._parser.add_option('--alternative-dirs', dest='alternative_dirs', | |
| 2822 metavar='/path/on/target@/path/on/host[:...]', | |
| 2823 help='Read files in /path/on/host/ instead of ' | |
| 2824 'files in /path/on/target/.') | |
| 2825 self._parser.add_option('--indent', dest='indent', action='store_true', | |
| 2826 help='Indent the output.') | |
| 2827 | |
| 2828 def do(self, sys_argv): | |
| 2829 options, args = self._parse_args(sys_argv, 1) | |
| 2830 dump_path = args[1] | |
| 2831 # TODO(dmikurube): Support shared memory. | |
| 2832 alternative_dirs_dict = {} | |
| 2833 if options.alternative_dirs: | |
| 2834 for alternative_dir_pair in options.alternative_dirs.split(':'): | |
| 2835 target_path, host_path = alternative_dir_pair.split('@', 1) | |
| 2836 alternative_dirs_dict[target_path] = host_path | |
| 2837 (bucket_set, dumps) = Command.load_basic_files( | |
| 2838 dump_path, True, alternative_dirs=alternative_dirs_dict) | |
| 2839 | |
| 2840 json_root = OrderedDict() | |
| 2841 json_root['version'] = 1 | |
| 2842 json_root['run_id'] = None | |
| 2843 for dump in dumps: | |
| 2844 if json_root['run_id'] and json_root['run_id'] != dump.run_id: | |
| 2845 LOGGER.error('Inconsistent heap profile dumps.') | |
| 2846 json_root['run_id'] = '' | |
| 2847 break | |
| 2848 json_root['run_id'] = dump.run_id | |
| 2849 json_root['snapshots'] = [] | |
| 2850 | |
| 2851 # Load all sorters. | |
| 2852 sorters = SorterSet() | |
| 2853 | |
| 2854 for dump in dumps: | |
| 2855 json_root['snapshots'].append( | |
| 2856 self._fill_snapshot(dump, bucket_set, sorters)) | |
| 2857 | |
| 2858 if options.indent: | |
| 2859 json.dump(json_root, sys.stdout, indent=2) | |
| 2860 else: | |
| 2861 json.dump(json_root, sys.stdout) | |
| 2862 print '' | |
| 2863 | |
| 2864 @staticmethod | |
| 2865 def _fill_snapshot(dump, bucket_set, sorters): | |
| 2866 root = OrderedDict() | |
| 2867 root['time'] = dump.time | |
| 2868 root['worlds'] = OrderedDict() | |
| 2869 root['worlds']['vm'] = CatCommand._fill_world( | |
| 2870 dump, bucket_set, sorters, 'vm') | |
| 2871 root['worlds']['malloc'] = CatCommand._fill_world( | |
| 2872 dump, bucket_set, sorters, 'malloc') | |
| 2873 return root | |
| 2874 | |
| 2875 @staticmethod | |
| 2876 def _fill_world(dump, bucket_set, sorters, world): | |
| 2877 root = OrderedDict() | |
| 2878 | |
| 2879 root['name'] = 'world' | |
| 2880 if world == 'vm': | |
| 2881 root['unit_fields'] = ['committed', 'reserved'] | |
| 2882 elif world == 'malloc': | |
| 2883 root['unit_fields'] = ['size', 'alloc_count', 'free_count'] | |
| 2884 | |
| 2885 # Make { vm | malloc } units with their sizes. | |
| 2886 root['units'] = OrderedDict() | |
| 2887 unit_set = UnitSet(world) | |
| 2888 if world == 'vm': | |
| 2889 for unit in CatCommand._iterate_vm_unit(dump, None, bucket_set): | |
| 2890 unit_set.append(unit) | |
| 2891 for unit in unit_set: | |
| 2892 root['units'][unit.unit_id] = [unit.committed, unit.reserved] | |
| 2893 elif world == 'malloc': | |
| 2894 for unit in CatCommand._iterate_malloc_unit(dump, bucket_set): | |
| 2895 unit_set.append(unit) | |
| 2896 for unit in unit_set: | |
| 2897 root['units'][unit.unit_id] = [ | |
| 2898 unit.size, unit.alloc_count, unit.free_count] | |
| 2899 | |
| 2900 # Iterate for { vm | malloc } sorters. | |
| 2901 root['breakdown'] = OrderedDict() | |
| 2902 for sorter in sorters.iter_world(world): | |
| 2903 breakdown = OrderedDict() | |
| 2904 for unit in unit_set: | |
| 2905 found = sorter.find(unit) | |
| 2906 if found.name not in breakdown: | |
| 2907 category = OrderedDict() | |
| 2908 category['name'] = found.name | |
| 2909 category['color'] = 'random' | |
| 2910 subworlds = {} | |
| 2911 for subworld in found.iter_subworld(): | |
| 2912 subworlds[subworld] = False | |
| 2913 if subworlds: | |
| 2914 category['subworlds'] = subworlds | |
| 2915 if found.hidden: | |
| 2916 category['hidden'] = True | |
| 2917 category['units'] = [] | |
| 2918 breakdown[found.name] = category | |
| 2919 breakdown[found.name]['units'].append(unit.unit_id) | |
| 2920 root['breakdown'][sorter.name] = breakdown | |
| 2921 | |
| 2922 return root | |
| 2923 | |
| 2924 @staticmethod | |
| 2925 def _iterate_vm_unit(dump, pfn_dict, bucket_set): | |
| 2926 unit_id = 0 | |
| 2927 for _, region in dump.iter_map: | |
| 2928 unit_id += 1 | |
| 2929 if region[0] == 'unhooked': | |
| 2930 if pfn_dict and dump.pageframe_length: | |
| 2931 for pageframe in region[1]['pageframe']: | |
| 2932 yield UnhookedUnit(unit_id, pageframe.size, pageframe.size, | |
| 2933 region, pageframe, pfn_dict) | |
| 2934 else: | |
| 2935 yield UnhookedUnit(unit_id, | |
| 2936 int(region[1]['committed']), | |
| 2937 int(region[1]['reserved']), | |
| 2938 region) | |
| 2939 elif region[0] == 'hooked': | |
| 2940 if pfn_dict and dump.pageframe_length: | |
| 2941 for pageframe in region[1]['pageframe']: | |
| 2942 yield MMapUnit(unit_id, | |
| 2943 pageframe.size, | |
| 2944 pageframe.size, | |
| 2945 region, bucket_set, pageframe, pfn_dict) | |
| 2946 else: | |
| 2947 yield MMapUnit(unit_id, | |
| 2948 int(region[1]['committed']), | |
| 2949 int(region[1]['reserved']), | |
| 2950 region, | |
| 2951 bucket_set) | |
| 2952 else: | |
| 2953 LOGGER.error('Unrecognized mapping status: %s' % region[0]) | |
| 2954 | |
| 2955 @staticmethod | |
| 2956 def _iterate_malloc_unit(dump, bucket_set): | |
| 2957 for line in dump.iter_stacktrace: | |
| 2958 words = line.split() | |
| 2959 bucket = bucket_set.get(int(words[BUCKET_ID])) | |
| 2960 if bucket and bucket.allocator_type == 'malloc': | |
| 2961 yield MallocUnit(int(words[BUCKET_ID]), | |
| 2962 int(words[COMMITTED]), | |
| 2963 int(words[ALLOC_COUNT]), | |
| 2964 int(words[FREE_COUNT]), | |
| 2965 bucket) | |
| 2966 elif not bucket: | |
| 2967 # 'Not-found' buckets are all assumed as malloc buckets. | |
| 2968 yield MallocUnit(int(words[BUCKET_ID]), | |
| 2969 int(words[COMMITTED]), | |
| 2970 int(words[ALLOC_COUNT]), | |
| 2971 int(words[FREE_COUNT]), | |
| 2972 None) | |
| 2973 | 18 |
| 2974 | 19 |
| 2975 def main(): | 20 def main(): |
| 2976 COMMANDS = { | 21 COMMANDS = { |
| 2977 'buckets': BucketsCommand, | 22 'buckets': subcommands.BucketsCommand, |
| 2978 'cat': CatCommand, | 23 'cat': subcommands.CatCommand, |
| 2979 'csv': CSVCommand, | 24 'csv': subcommands.CSVCommand, |
| 2980 'expand': ExpandCommand, | 25 'expand': subcommands.ExpandCommand, |
| 2981 'json': JSONCommand, | 26 'json': subcommands.JSONCommand, |
| 2982 'list': ListCommand, | 27 'list': subcommands.ListCommand, |
| 2983 'map': MapCommand, | 28 'map': subcommands.MapCommand, |
| 2984 'pprof': PProfCommand, | 29 'pprof': subcommands.PProfCommand, |
| 2985 'stacktrace': StacktraceCommand, | 30 'stacktrace': subcommands.StacktraceCommand, |
| 2986 'upload': UploadCommand, | 31 'upload': subcommands.UploadCommand, |
| 2987 } | 32 } |
| 2988 | 33 |
| 2989 if len(sys.argv) < 2 or (not sys.argv[1] in COMMANDS): | 34 if len(sys.argv) < 2 or (not sys.argv[1] in COMMANDS): |
| 2990 sys.stderr.write("""Usage: dmprof <command> [options] [<args>] | 35 sys.stderr.write("""Usage: dmprof <command> [options] [<args>] |
| 2991 | 36 |
| 2992 Commands: | 37 Commands: |
| 2993 buckets Dump a bucket list with resolving symbols | 38 buckets Dump a bucket list with resolving symbols |
| 2994 cat Categorize memory usage (under development) | 39 cat Categorize memory usage (under development) |
| 2995 csv Classify memory usage in CSV | 40 csv Classify memory usage in CSV |
| 2996 expand Show all stacktraces contained in the specified component | 41 expand Show all stacktraces contained in the specified component |
| (...skipping 30 matching lines...) Expand all Loading... |
| 3027 errorcode = COMMANDS[action]().do(sys.argv) | 72 errorcode = COMMANDS[action]().do(sys.argv) |
| 3028 except ParsingException, e: | 73 except ParsingException, e: |
| 3029 errorcode = 1 | 74 errorcode = 1 |
| 3030 sys.stderr.write('Exit by parsing error: %s\n' % e) | 75 sys.stderr.write('Exit by parsing error: %s\n' % e) |
| 3031 | 76 |
| 3032 return errorcode | 77 return errorcode |
| 3033 | 78 |
| 3034 | 79 |
| 3035 if __name__ == '__main__': | 80 if __name__ == '__main__': |
| 3036 sys.exit(main()) | 81 sys.exit(main()) |
| OLD | NEW |