| OLD | NEW |
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env bash |
| 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
| 5 | 5 |
| 6 """The deep heap profiler script for Chrome.""" | 6 # Re-direct the arguments to dmprof.py. |
| 7 | 7 |
| 8 from datetime import datetime | 8 BASEDIR=`dirname $0` |
| 9 import json | 9 ARGV="$@" |
| 10 import logging | |
| 11 import optparse | |
| 12 import os | |
| 13 import re | |
| 14 import shutil | |
| 15 import subprocess | |
| 16 import sys | |
| 17 import tempfile | |
| 18 | 10 |
| 19 BASE_PATH = os.path.dirname(os.path.abspath(__file__)) | 11 PYTHONPATH=$BASEDIR/../python/google python \ |
| 20 FIND_RUNTIME_SYMBOLS_PATH = os.path.join( | 12 "$BASEDIR/dmprof.py" $ARGV |
| 21 BASE_PATH, os.pardir, 'find_runtime_symbols') | |
| 22 sys.path.append(FIND_RUNTIME_SYMBOLS_PATH) | |
| 23 | |
| 24 from find_runtime_symbols import find_runtime_symbols_list | |
| 25 from find_runtime_symbols import find_runtime_typeinfo_symbols_list | |
| 26 from find_runtime_symbols import RuntimeSymbolsInProcess | |
| 27 from prepare_symbol_info import prepare_symbol_info | |
| 28 | |
| 29 BUCKET_ID = 5 | |
| 30 VIRTUAL = 0 | |
| 31 COMMITTED = 1 | |
| 32 ALLOC_COUNT = 2 | |
| 33 FREE_COUNT = 3 | |
| 34 NULL_REGEX = re.compile('') | |
| 35 | |
| 36 LOGGER = logging.getLogger('dmprof') | |
| 37 POLICIES_JSON_PATH = os.path.join(BASE_PATH, 'policies.json') | |
| 38 FUNCTION_ADDRESS = 'function' | |
| 39 TYPEINFO_ADDRESS = 'typeinfo' | |
| 40 | |
| 41 | |
| 42 # Heap Profile Dump versions | |
| 43 | |
| 44 # DUMP_DEEP_[1-4] are obsolete. | |
| 45 # DUMP_DEEP_2+ distinct mmap regions and malloc chunks. | |
| 46 # DUMP_DEEP_3+ don't include allocation functions in their stack dumps. | |
| 47 # DUMP_DEEP_4+ support comments with '#' and global stats "nonprofiled-*". | |
| 48 # DUMP_DEEP_[1-2] should be processed by POLICY_DEEP_1. | |
| 49 # DUMP_DEEP_[3-4] should be processed by POLICY_DEEP_2 or POLICY_DEEP_3. | |
| 50 DUMP_DEEP_1 = 'DUMP_DEEP_1' | |
| 51 DUMP_DEEP_2 = 'DUMP_DEEP_2' | |
| 52 DUMP_DEEP_3 = 'DUMP_DEEP_3' | |
| 53 DUMP_DEEP_4 = 'DUMP_DEEP_4' | |
| 54 | |
| 55 DUMP_DEEP_OBSOLETE = (DUMP_DEEP_1, DUMP_DEEP_2, DUMP_DEEP_3, DUMP_DEEP_4) | |
| 56 | |
| 57 # DUMP_DEEP_5 doesn't separate sections for malloc and mmap. | |
| 58 # malloc and mmap are identified in bucket files. | |
| 59 # DUMP_DEEP_5 should be processed by POLICY_DEEP_4. | |
| 60 DUMP_DEEP_5 = 'DUMP_DEEP_5' | |
| 61 | |
| 62 | |
| 63 # Heap Profile Policy versions | |
| 64 | |
| 65 # POLICY_DEEP_1 DOES NOT include allocation_type columns. | |
| 66 # mmap regions are distincted w/ mmap frames in the pattern column. | |
| 67 POLICY_DEEP_1 = 'POLICY_DEEP_1' | |
| 68 | |
| 69 # POLICY_DEEP_2 DOES include allocation_type columns. | |
| 70 # mmap regions are distincted w/ the allocation_type column. | |
| 71 POLICY_DEEP_2 = 'POLICY_DEEP_2' | |
| 72 | |
| 73 # POLICY_DEEP_3 is in JSON format. | |
| 74 POLICY_DEEP_3 = 'POLICY_DEEP_3' | |
| 75 | |
| 76 # POLICY_DEEP_3 contains typeinfo. | |
| 77 POLICY_DEEP_4 = 'POLICY_DEEP_4' | |
| 78 | |
| 79 | |
| 80 class EmptyDumpException(Exception): | |
| 81 def __init__(self, value): | |
| 82 self.value = value | |
| 83 def __str__(self): | |
| 84 return repr(self.value) | |
| 85 | |
| 86 | |
| 87 class ParsingException(Exception): | |
| 88 def __init__(self, value): | |
| 89 self.value = value | |
| 90 def __str__(self): | |
| 91 return repr(self.value) | |
| 92 | |
| 93 | |
| 94 class InvalidDumpException(ParsingException): | |
| 95 def __init__(self, value): | |
| 96 self.value = value | |
| 97 def __str__(self): | |
| 98 return "invalid heap profile dump: %s" % repr(self.value) | |
| 99 | |
| 100 | |
| 101 class ObsoleteDumpVersionException(ParsingException): | |
| 102 def __init__(self, value): | |
| 103 self.value = value | |
| 104 def __str__(self): | |
| 105 return "obsolete heap profile dump version: %s" % repr(self.value) | |
| 106 | |
| 107 | |
| 108 def skip_while(index, max_index, skipping_condition): | |
| 109 """Increments |index| until |skipping_condition|(|index|) is False. | |
| 110 | |
| 111 Returns: | |
| 112 A pair of an integer indicating a line number after skipped, and a | |
| 113 boolean value which is True if found a line which skipping_condition | |
| 114 is False for. | |
| 115 """ | |
| 116 while skipping_condition(index): | |
| 117 index += 1 | |
| 118 if index >= max_index: | |
| 119 return index, False | |
| 120 return index, True | |
| 121 | |
| 122 | |
| 123 class SymbolMapping(object): | |
| 124 """Manages all symbol information on process memory mapping. | |
| 125 | |
| 126 The symbol information consists of all symbols in the binary files obtained | |
| 127 by find_runtime_symbols/prepare_symbol_info.py which uses /proc/<pid>/maps, | |
| 128 nm and so on. It is minimum requisite information to run dmprof. | |
| 129 | |
| 130 The information is prepared in a directory "|prefix|.symmap" by prepare(). | |
| 131 The directory is more portable than Chromium binaries. Users can save it | |
| 132 and re-analyze with the portable one. | |
| 133 | |
| 134 Note that loading the symbol information takes a long time. It is very big | |
| 135 in general -- it doesn't know which functions are called and which types are | |
| 136 used actually. Used symbols can be cached in the "SymbolCache" class. | |
| 137 """ | |
| 138 def __init__(self, prefix): | |
| 139 self._prefix = prefix | |
| 140 self._prepared_symbol_mapping_path = None | |
| 141 self._loaded_symbol_mapping = None | |
| 142 | |
| 143 def prepare(self): | |
| 144 """Extracts symbol mapping from binaries and prepares it to use. | |
| 145 | |
| 146 The symbol mapping is stored in a directory whose name is stored in | |
| 147 |self._prepared_symbol_mapping_path|. | |
| 148 | |
| 149 Returns: | |
| 150 True if succeeded. | |
| 151 """ | |
| 152 LOGGER.info('Preparing symbol mapping...') | |
| 153 self._prepared_symbol_mapping_path, used_tempdir = prepare_symbol_info( | |
| 154 self._prefix + '.maps', self._prefix + '.symmap', True) | |
| 155 if self._prepared_symbol_mapping_path: | |
| 156 LOGGER.info(' Prepared symbol mapping.') | |
| 157 if used_tempdir: | |
| 158 LOGGER.warn(' Using a temporary directory for symbol mapping.') | |
| 159 LOGGER.warn(' Delete it by yourself.') | |
| 160 LOGGER.warn(' Or, move the directory by yourself to use it later.') | |
| 161 return True | |
| 162 else: | |
| 163 LOGGER.warn(' Failed to prepare symbol mapping.') | |
| 164 return False | |
| 165 | |
| 166 def get(self): | |
| 167 """Returns symbol mapping. | |
| 168 | |
| 169 Returns: | |
| 170 Loaded symbol mapping. None if failed. | |
| 171 """ | |
| 172 if not self._prepared_symbol_mapping_path and not self.prepare(): | |
| 173 return None | |
| 174 if not self._loaded_symbol_mapping: | |
| 175 LOGGER.info('Loading symbol mapping...') | |
| 176 self._loaded_symbol_mapping = RuntimeSymbolsInProcess.load( | |
| 177 self._prepared_symbol_mapping_path) | |
| 178 return self._loaded_symbol_mapping | |
| 179 | |
| 180 | |
| 181 class SymbolCache(object): | |
| 182 """Manages cache of used symbol mapping. | |
| 183 | |
| 184 The original symbol mapping is by "SymbolMapping" (maps, nm and readelf for | |
| 185 examples), and "SymbolCache" just caches "how dmprof interprets the address" | |
| 186 to speed-up another analysis for the same binary and profile dumps. | |
| 187 Handling all symbol mapping takes a long time in "SymbolMapping". | |
| 188 "SymbolCache" caches used symbol mapping on memory and in files. | |
| 189 """ | |
| 190 def __init__(self, prefix): | |
| 191 self._prefix = prefix | |
| 192 self._symbol_cache_paths = { | |
| 193 FUNCTION_ADDRESS: prefix + '.funcsym', | |
| 194 TYPEINFO_ADDRESS: prefix + '.typesym', | |
| 195 } | |
| 196 self._find_runtime_symbols_functions = { | |
| 197 FUNCTION_ADDRESS: find_runtime_symbols_list, | |
| 198 TYPEINFO_ADDRESS: find_runtime_typeinfo_symbols_list, | |
| 199 } | |
| 200 self._symbol_caches = { | |
| 201 FUNCTION_ADDRESS: {}, | |
| 202 TYPEINFO_ADDRESS: {}, | |
| 203 } | |
| 204 | |
| 205 def update(self, address_type, bucket_set, symbol_mapping): | |
| 206 """Updates symbol mapping on memory and in a ".*sym" cache file. | |
| 207 | |
| 208 It reads cached symbol mapping from a ".*sym" file if it exists. Then, | |
| 209 it looks up unresolved addresses from a given "SymbolMapping". Finally, | |
| 210 both symbol mappings on memory and in the ".*sym" cache file are updated. | |
| 211 | |
| 212 Symbol files are formatted as follows: | |
| 213 <Address> <Symbol> | |
| 214 <Address> <Symbol> | |
| 215 <Address> <Symbol> | |
| 216 ... | |
| 217 | |
| 218 Args: | |
| 219 address_type: A type of addresses to update. It should be one of | |
| 220 FUNCTION_ADDRESS or TYPEINFO_ADDRESS. | |
| 221 bucket_set: A BucketSet object. | |
| 222 symbol_mapping: A SymbolMapping object. | |
| 223 """ | |
| 224 self._load(address_type) | |
| 225 | |
| 226 unresolved_addresses = sorted( | |
| 227 address for address in bucket_set.iter_addresses(address_type) | |
| 228 if address not in self._symbol_caches[address_type]) | |
| 229 | |
| 230 if not unresolved_addresses: | |
| 231 LOGGER.info('No need to resolve any more addresses.') | |
| 232 return | |
| 233 | |
| 234 symbol_cache_path = self._symbol_cache_paths[address_type] | |
| 235 with open(symbol_cache_path, mode='a+') as symbol_f: | |
| 236 LOGGER.info('Loading %d unresolved addresses.' % | |
| 237 len(unresolved_addresses)) | |
| 238 symbol_list = self._find_runtime_symbols_functions[address_type]( | |
| 239 symbol_mapping.get(), unresolved_addresses) | |
| 240 | |
| 241 for address, symbol in zip(unresolved_addresses, symbol_list): | |
| 242 stripped_symbol = symbol.strip() or '??' | |
| 243 self._symbol_caches[address_type][address] = stripped_symbol | |
| 244 symbol_f.write('%x %s\n' % (address, stripped_symbol)) | |
| 245 | |
| 246 def lookup(self, address_type, address): | |
| 247 """Looks up a symbol for a given |address|. | |
| 248 | |
| 249 Args: | |
| 250 address_type: A type of addresses to lookup. It should be one of | |
| 251 FUNCTION_ADDRESS or TYPEINFO_ADDRESS. | |
| 252 address: An integer that represents an address. | |
| 253 | |
| 254 Returns: | |
| 255 A string that represents a symbol. | |
| 256 """ | |
| 257 return self._symbol_caches[address_type].get(address) | |
| 258 | |
| 259 def _load(self, address_type): | |
| 260 symbol_cache_path = self._symbol_cache_paths[address_type] | |
| 261 try: | |
| 262 with open(symbol_cache_path, mode='r') as symbol_f: | |
| 263 for line in symbol_f: | |
| 264 items = line.rstrip().split(None, 1) | |
| 265 if len(items) == 1: | |
| 266 items.append('??') | |
| 267 self._symbol_caches[address_type][int(items[0], 16)] = items[1] | |
| 268 LOGGER.info('Loaded %d entries from symbol cache.' % | |
| 269 len(self._symbol_caches[address_type])) | |
| 270 except IOError as e: | |
| 271 LOGGER.info('No valid symbol cache file is found.') | |
| 272 | |
| 273 | |
| 274 class Rule(object): | |
| 275 """Represents one matching rule in a policy file.""" | |
| 276 | |
| 277 def __init__(self, name, mmap, stacktrace_pattern, typeinfo_pattern=None): | |
| 278 self._name = name | |
| 279 self._mmap = mmap | |
| 280 self._stacktrace_pattern = re.compile(stacktrace_pattern + r'\Z') | |
| 281 if typeinfo_pattern: | |
| 282 self._typeinfo_pattern = re.compile(typeinfo_pattern + r'\Z') | |
| 283 else: | |
| 284 self._typeinfo_pattern = None | |
| 285 | |
| 286 @property | |
| 287 def name(self): | |
| 288 return self._name | |
| 289 | |
| 290 @property | |
| 291 def mmap(self): | |
| 292 return self._mmap | |
| 293 | |
| 294 @property | |
| 295 def stacktrace_pattern(self): | |
| 296 return self._stacktrace_pattern | |
| 297 | |
| 298 @property | |
| 299 def typeinfo_pattern(self): | |
| 300 return self._typeinfo_pattern | |
| 301 | |
| 302 | |
| 303 class Policy(object): | |
| 304 """Represents a policy, a content of a policy file.""" | |
| 305 | |
| 306 def __init__(self, rules, version, components): | |
| 307 self._rules = rules | |
| 308 self._version = version | |
| 309 self._components = components | |
| 310 | |
| 311 @property | |
| 312 def rules(self): | |
| 313 return self._rules | |
| 314 | |
| 315 @property | |
| 316 def version(self): | |
| 317 return self._version | |
| 318 | |
| 319 @property | |
| 320 def components(self): | |
| 321 return self._components | |
| 322 | |
| 323 def find(self, bucket): | |
| 324 """Finds a matching component name which a given |bucket| belongs to. | |
| 325 | |
| 326 Args: | |
| 327 bucket: A Bucket object to be searched for. | |
| 328 | |
| 329 Returns: | |
| 330 A string representing a component name. | |
| 331 """ | |
| 332 if not bucket: | |
| 333 return 'no-bucket' | |
| 334 if bucket.component_cache: | |
| 335 return bucket.component_cache | |
| 336 | |
| 337 stacktrace = bucket.symbolized_joined_stacktrace | |
| 338 typeinfo = bucket.symbolized_typeinfo | |
| 339 if typeinfo.startswith('0x'): | |
| 340 typeinfo = bucket.typeinfo_name | |
| 341 | |
| 342 for rule in self._rules: | |
| 343 if (bucket.mmap == rule.mmap and | |
| 344 rule.stacktrace_pattern.match(stacktrace) and | |
| 345 (not rule.typeinfo_pattern or rule.typeinfo_pattern.match(typeinfo))): | |
| 346 bucket.component_cache = rule.name | |
| 347 return rule.name | |
| 348 | |
| 349 assert False | |
| 350 | |
| 351 @staticmethod | |
| 352 def load(filename, format): | |
| 353 """Loads a policy file of |filename| in a |format|. | |
| 354 | |
| 355 Args: | |
| 356 filename: A filename to be loaded. | |
| 357 format: A string to specify a format of the file. Only 'json' is | |
| 358 supported for now. | |
| 359 | |
| 360 Returns: | |
| 361 A loaded Policy object. | |
| 362 """ | |
| 363 with open(os.path.join(BASE_PATH, filename)) as policy_f: | |
| 364 return Policy.parse(policy_f, format) | |
| 365 | |
| 366 @staticmethod | |
| 367 def parse(policy_f, format): | |
| 368 """Parses a policy file content in a |format|. | |
| 369 | |
| 370 Args: | |
| 371 policy_f: An IO object to be loaded. | |
| 372 format: A string to specify a format of the file. Only 'json' is | |
| 373 supported for now. | |
| 374 | |
| 375 Returns: | |
| 376 A loaded Policy object. | |
| 377 """ | |
| 378 if format == 'json': | |
| 379 return Policy._parse_json(policy_f) | |
| 380 else: | |
| 381 return None | |
| 382 | |
| 383 @staticmethod | |
| 384 def _parse_json(policy_f): | |
| 385 """Parses policy file in json format. | |
| 386 | |
| 387 A policy file contains component's names and their stacktrace pattern | |
| 388 written in regular expression. Those patterns are matched against each | |
| 389 symbols of each stacktraces in the order written in the policy file | |
| 390 | |
| 391 Args: | |
| 392 policy_f: A File/IO object to read. | |
| 393 | |
| 394 Returns: | |
| 395 A loaded policy object. | |
| 396 """ | |
| 397 policy = json.load(policy_f) | |
| 398 | |
| 399 rules = [] | |
| 400 for rule in policy['rules']: | |
| 401 rules.append(Rule( | |
| 402 rule['name'], | |
| 403 rule['allocator'] == 'mmap', | |
| 404 rule['stacktrace'], | |
| 405 rule['typeinfo'] if 'typeinfo' in rule else None)) | |
| 406 return Policy(rules, policy['version'], policy['components']) | |
| 407 | |
| 408 | |
| 409 class PolicySet(object): | |
| 410 """Represents a set of policies.""" | |
| 411 | |
| 412 def __init__(self, policy_directory): | |
| 413 self._policy_directory = policy_directory | |
| 414 | |
| 415 @staticmethod | |
| 416 def load(labels=None): | |
| 417 """Loads a set of policies via the "default policy directory". | |
| 418 | |
| 419 The "default policy directory" contains pairs of policies and their labels. | |
| 420 For example, a policy "policy.l0.json" is labeled "l0" in the default | |
| 421 policy directory "policies.json". | |
| 422 | |
| 423 All policies in the directory are loaded by default. Policies can be | |
| 424 limited by |labels|. | |
| 425 | |
| 426 Args: | |
| 427 labels: An array that contains policy labels to be loaded. | |
| 428 | |
| 429 Returns: | |
| 430 A PolicySet object. | |
| 431 """ | |
| 432 default_policy_directory = PolicySet._load_default_policy_directory() | |
| 433 if labels: | |
| 434 specified_policy_directory = {} | |
| 435 for label in labels: | |
| 436 if label in default_policy_directory: | |
| 437 specified_policy_directory[label] = default_policy_directory[label] | |
| 438 # TODO(dmikurube): Load an un-labeled policy file. | |
| 439 return PolicySet._load_policies(specified_policy_directory) | |
| 440 else: | |
| 441 return PolicySet._load_policies(default_policy_directory) | |
| 442 | |
| 443 def __len__(self): | |
| 444 return len(self._policy_directory) | |
| 445 | |
| 446 def __iter__(self): | |
| 447 for label in self._policy_directory: | |
| 448 yield label | |
| 449 | |
| 450 def __getitem__(self, label): | |
| 451 return self._policy_directory[label] | |
| 452 | |
| 453 @staticmethod | |
| 454 def _load_default_policy_directory(): | |
| 455 with open(POLICIES_JSON_PATH, mode='r') as policies_f: | |
| 456 default_policy_directory = json.load(policies_f) | |
| 457 return default_policy_directory | |
| 458 | |
| 459 @staticmethod | |
| 460 def _load_policies(directory): | |
| 461 LOGGER.info('Loading policy files.') | |
| 462 policies = {} | |
| 463 for label in directory: | |
| 464 LOGGER.info(' %s: %s' % (label, directory[label]['file'])) | |
| 465 loaded = Policy.load(directory[label]['file'], directory[label]['format']) | |
| 466 if loaded: | |
| 467 policies[label] = loaded | |
| 468 return PolicySet(policies) | |
| 469 | |
| 470 | |
| 471 class Bucket(object): | |
| 472 """Represents a bucket, which is a unit of memory block classification.""" | |
| 473 | |
| 474 def __init__(self, stacktrace, mmap, typeinfo, typeinfo_name): | |
| 475 self._stacktrace = stacktrace | |
| 476 self._mmap = mmap | |
| 477 self._typeinfo = typeinfo | |
| 478 self._typeinfo_name = typeinfo_name | |
| 479 | |
| 480 self._symbolized_stacktrace = stacktrace | |
| 481 self._symbolized_joined_stacktrace = '' | |
| 482 self._symbolized_typeinfo = typeinfo_name | |
| 483 | |
| 484 self.component_cache = '' | |
| 485 | |
| 486 def symbolize(self, symbol_cache): | |
| 487 """Makes a symbolized stacktrace and typeinfo with |symbol_cache|. | |
| 488 | |
| 489 Args: | |
| 490 symbol_cache: A SymbolCache object. | |
| 491 """ | |
| 492 # TODO(dmikurube): Fill explicitly with numbers if symbol not found. | |
| 493 self._symbolized_stacktrace = [ | |
| 494 symbol_cache.lookup(FUNCTION_ADDRESS, address) | |
| 495 for address in self._stacktrace] | |
| 496 self._symbolized_joined_stacktrace = ' '.join(self._symbolized_stacktrace) | |
| 497 if not self._typeinfo: | |
| 498 self._symbolized_typeinfo = 'no typeinfo' | |
| 499 else: | |
| 500 self._symbolized_typeinfo = symbol_cache.lookup( | |
| 501 TYPEINFO_ADDRESS, self._typeinfo) | |
| 502 if not self._symbolized_typeinfo: | |
| 503 self._symbolized_typeinfo = 'no typeinfo' | |
| 504 | |
| 505 def clear_component_cache(self): | |
| 506 self.component_cache = '' | |
| 507 | |
| 508 @property | |
| 509 def stacktrace(self): | |
| 510 return self._stacktrace | |
| 511 | |
| 512 @property | |
| 513 def mmap(self): | |
| 514 return self._mmap | |
| 515 | |
| 516 @property | |
| 517 def typeinfo(self): | |
| 518 return self._typeinfo | |
| 519 | |
| 520 @property | |
| 521 def typeinfo_name(self): | |
| 522 return self._typeinfo_name | |
| 523 | |
| 524 @property | |
| 525 def symbolized_stacktrace(self): | |
| 526 return self._symbolized_stacktrace | |
| 527 | |
| 528 @property | |
| 529 def symbolized_joined_stacktrace(self): | |
| 530 return self._symbolized_joined_stacktrace | |
| 531 | |
| 532 @property | |
| 533 def symbolized_typeinfo(self): | |
| 534 return self._symbolized_typeinfo | |
| 535 | |
| 536 | |
| 537 class BucketSet(object): | |
| 538 """Represents a set of bucket.""" | |
| 539 def __init__(self): | |
| 540 self._buckets = {} | |
| 541 self._addresses = { | |
| 542 FUNCTION_ADDRESS: set(), | |
| 543 TYPEINFO_ADDRESS: set(), | |
| 544 } | |
| 545 | |
| 546 @staticmethod | |
| 547 def load(prefix): | |
| 548 """Loads all related bucket files. | |
| 549 | |
| 550 Args: | |
| 551 prefix: A prefix string for bucket file names. | |
| 552 | |
| 553 Returns: | |
| 554 A loaded BucketSet object. | |
| 555 """ | |
| 556 LOGGER.info('Loading bucket files.') | |
| 557 bucket_set = BucketSet() | |
| 558 | |
| 559 n = 0 | |
| 560 while True: | |
| 561 path = '%s.%04d.buckets' % (prefix, n) | |
| 562 if not os.path.exists(path): | |
| 563 if n > 10: | |
| 564 break | |
| 565 n += 1 | |
| 566 continue | |
| 567 LOGGER.info(' %s' % path) | |
| 568 with open(path, 'r') as f: | |
| 569 bucket_set._load_file(f) | |
| 570 n += 1 | |
| 571 | |
| 572 return bucket_set | |
| 573 | |
| 574 def _load_file(self, bucket_f): | |
| 575 for line in bucket_f: | |
| 576 words = line.split() | |
| 577 typeinfo = None | |
| 578 typeinfo_name = '' | |
| 579 stacktrace_begin = 2 | |
| 580 for index, word in enumerate(words): | |
| 581 if index < 2: | |
| 582 continue | |
| 583 if word[0] == 't': | |
| 584 typeinfo = int(word[1:], 16) | |
| 585 self._addresses[TYPEINFO_ADDRESS].add(typeinfo) | |
| 586 elif word[0] == 'n': | |
| 587 typeinfo_name = word[1:] | |
| 588 else: | |
| 589 stacktrace_begin = index | |
| 590 break | |
| 591 stacktrace = [int(address, 16) for address in words[stacktrace_begin:]] | |
| 592 for frame in stacktrace: | |
| 593 self._addresses[FUNCTION_ADDRESS].add(frame) | |
| 594 self._buckets[int(words[0])] = Bucket( | |
| 595 stacktrace, words[1] == 'mmap', typeinfo, typeinfo_name) | |
| 596 | |
| 597 def __iter__(self): | |
| 598 for bucket_id, bucket_content in self._buckets.iteritems(): | |
| 599 yield bucket_id, bucket_content | |
| 600 | |
| 601 def __getitem__(self, bucket_id): | |
| 602 return self._buckets[bucket_id] | |
| 603 | |
| 604 def get(self, bucket_id): | |
| 605 return self._buckets.get(bucket_id) | |
| 606 | |
| 607 def symbolize(self, symbol_cache): | |
| 608 for bucket_content in self._buckets.itervalues(): | |
| 609 bucket_content.symbolize(symbol_cache) | |
| 610 | |
| 611 def clear_component_cache(self): | |
| 612 for bucket_content in self._buckets.itervalues(): | |
| 613 bucket_content.clear_component_cache() | |
| 614 | |
| 615 def iter_addresses(self, address_type): | |
| 616 for function in self._addresses[address_type]: | |
| 617 yield function | |
| 618 | |
| 619 | |
| 620 class Dump(object): | |
| 621 """Represents a heap profile dump.""" | |
| 622 | |
| 623 def __init__(self): | |
| 624 self._path = '' | |
| 625 self._time = None | |
| 626 self._stacktrace_lines = [] | |
| 627 self._global_stats = {} # used only in apply_policy | |
| 628 | |
| 629 self._version = '' | |
| 630 self._lines = [] | |
| 631 | |
| 632 @property | |
| 633 def path(self): | |
| 634 return self._path | |
| 635 | |
| 636 @property | |
| 637 def time(self): | |
| 638 return self._time | |
| 639 | |
| 640 @property | |
| 641 def iter_stacktrace(self): | |
| 642 for line in self._stacktrace_lines: | |
| 643 yield line | |
| 644 | |
| 645 def global_stat(self, name): | |
| 646 return self._global_stats[name] | |
| 647 | |
| 648 @staticmethod | |
| 649 def load(path, log_header='Loading a heap profile dump: '): | |
| 650 """Loads a heap profile dump. | |
| 651 | |
| 652 Args: | |
| 653 path: A file path string to load. | |
| 654 log_header: A preceding string for log messages. | |
| 655 | |
| 656 Returns: | |
| 657 A loaded Dump object. | |
| 658 | |
| 659 Raises: | |
| 660 ParsingException for invalid heap profile dumps. | |
| 661 """ | |
| 662 dump = Dump() | |
| 663 dump._path = path | |
| 664 dump._time = os.stat(dump._path).st_mtime | |
| 665 dump._version = '' | |
| 666 | |
| 667 dump._lines = [line for line in open(dump._path, 'r') | |
| 668 if line and not line.startswith('#')] | |
| 669 | |
| 670 try: | |
| 671 dump._version, ln = dump._parse_version() | |
| 672 dump._parse_global_stats() | |
| 673 dump._extract_stacktrace_lines(ln) | |
| 674 except EmptyDumpException: | |
| 675 LOGGER.info('%s%s ...ignored an empty dump.' % (log_header, path)) | |
| 676 except ParsingException, e: | |
| 677 LOGGER.error('%s%s ...error %s' % (log_header, path, e)) | |
| 678 raise | |
| 679 else: | |
| 680 LOGGER.info('%s%s (version: %s)' % (log_header, path, dump._version)) | |
| 681 | |
| 682 return dump | |
| 683 | |
| 684 def _parse_version(self): | |
| 685 """Parses a version string in self._lines. | |
| 686 | |
| 687 Returns: | |
| 688 A pair of (a string representing a version of the stacktrace dump, | |
| 689 and an integer indicating a line number next to the version string). | |
| 690 | |
| 691 Raises: | |
| 692 ParsingException for invalid dump versions. | |
| 693 """ | |
| 694 version = '' | |
| 695 | |
| 696 # Skip until an identifiable line. | |
| 697 headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ') | |
| 698 if not self._lines: | |
| 699 raise EmptyDumpException('Empty heap dump file.') | |
| 700 (ln, found) = skip_while( | |
| 701 0, len(self._lines), | |
| 702 lambda n: not self._lines[n].startswith(headers)) | |
| 703 if not found: | |
| 704 raise InvalidDumpException('No version header.') | |
| 705 | |
| 706 # Identify a version. | |
| 707 if self._lines[ln].startswith('heap profile: '): | |
| 708 version = self._lines[ln][13:].strip() | |
| 709 if version == DUMP_DEEP_5: | |
| 710 (ln, _) = skip_while( | |
| 711 ln, len(self._lines), | |
| 712 lambda n: self._lines[n] != 'STACKTRACES:\n') | |
| 713 elif version in DUMP_DEEP_OBSOLETE: | |
| 714 raise ObsoleteDumpVersionException(version) | |
| 715 else: | |
| 716 raise InvalidDumpException('Invalid version: %s' % version) | |
| 717 elif self._lines[ln] == 'STACKTRACES:\n': | |
| 718 raise ObsoleteDumpVersionException(DUMP_DEEP_1) | |
| 719 elif self._lines[ln] == 'MMAP_STACKTRACES:\n': | |
| 720 raise ObsoleteDumpVersionException(DUMP_DEEP_2) | |
| 721 | |
| 722 return (version, ln) | |
| 723 | |
| 724 def _parse_global_stats(self): | |
| 725 """Parses lines in self._lines as global stats.""" | |
| 726 (ln, _) = skip_while( | |
| 727 0, len(self._lines), | |
| 728 lambda n: self._lines[n] != 'GLOBAL_STATS:\n') | |
| 729 | |
| 730 global_stat_names = [ | |
| 731 'total', 'file-exec', 'file-nonexec', 'anonymous', 'stack', 'other', | |
| 732 'nonprofiled-absent', 'nonprofiled-anonymous', | |
| 733 'nonprofiled-file-exec', 'nonprofiled-file-nonexec', | |
| 734 'nonprofiled-stack', 'nonprofiled-other', | |
| 735 'profiled-mmap', 'profiled-malloc'] | |
| 736 | |
| 737 for prefix in global_stat_names: | |
| 738 (ln, _) = skip_while( | |
| 739 ln, len(self._lines), | |
| 740 lambda n: self._lines[n].split()[0] != prefix) | |
| 741 words = self._lines[ln].split() | |
| 742 self._global_stats[prefix + '_virtual'] = int(words[-2]) | |
| 743 self._global_stats[prefix + '_committed'] = int(words[-1]) | |
| 744 | |
| 745 def _extract_stacktrace_lines(self, line_number): | |
| 746 """Extracts the position of stacktrace lines. | |
| 747 | |
| 748 Valid stacktrace lines are stored into self._stacktrace_lines. | |
| 749 | |
| 750 Args: | |
| 751 line_number: A line number to start parsing in lines. | |
| 752 | |
| 753 Raises: | |
| 754 ParsingException for invalid dump versions. | |
| 755 """ | |
| 756 if self._version == DUMP_DEEP_5: | |
| 757 (line_number, _) = skip_while( | |
| 758 line_number, len(self._lines), | |
| 759 lambda n: not self._lines[n].split()[0].isdigit()) | |
| 760 stacktrace_start = line_number | |
| 761 (line_number, _) = skip_while( | |
| 762 line_number, len(self._lines), | |
| 763 lambda n: self._check_stacktrace_line(self._lines[n])) | |
| 764 self._stacktrace_lines = self._lines[stacktrace_start:line_number] | |
| 765 | |
| 766 elif self._version in DUMP_DEEP_OBSOLETE: | |
| 767 raise ObsoleteDumpVersionException(self._version) | |
| 768 | |
| 769 else: | |
| 770 raise InvalidDumpException('Invalid version: %s' % self._version) | |
| 771 | |
| 772 @staticmethod | |
| 773 def _check_stacktrace_line(stacktrace_line): | |
| 774 """Checks if a given stacktrace_line is valid as stacktrace. | |
| 775 | |
| 776 Args: | |
| 777 stacktrace_line: A string to be checked. | |
| 778 | |
| 779 Returns: | |
| 780 True if the given stacktrace_line is valid. | |
| 781 """ | |
| 782 words = stacktrace_line.split() | |
| 783 if len(words) < BUCKET_ID + 1: | |
| 784 return False | |
| 785 if words[BUCKET_ID - 1] != '@': | |
| 786 return False | |
| 787 return True | |
| 788 | |
| 789 | |
| 790 class DumpList(object): | |
| 791 """Represents a sequence of heap profile dumps.""" | |
| 792 | |
| 793 def __init__(self, dump_list): | |
| 794 self._dump_list = dump_list | |
| 795 | |
| 796 @staticmethod | |
| 797 def load(path_list): | |
| 798 LOGGER.info('Loading heap dump profiles.') | |
| 799 dump_list = [] | |
| 800 for path in path_list: | |
| 801 dump_list.append(Dump.load(path, ' ')) | |
| 802 return DumpList(dump_list) | |
| 803 | |
| 804 def __len__(self): | |
| 805 return len(self._dump_list) | |
| 806 | |
| 807 def __iter__(self): | |
| 808 for dump in self._dump_list: | |
| 809 yield dump | |
| 810 | |
| 811 def __getitem__(self, index): | |
| 812 return self._dump_list[index] | |
| 813 | |
| 814 | |
| 815 class Command(object): | |
| 816 """Subclasses are a subcommand for this executable. | |
| 817 | |
| 818 See COMMANDS in main(). | |
| 819 """ | |
| 820 def __init__(self, usage): | |
| 821 self._parser = optparse.OptionParser(usage) | |
| 822 | |
| 823 @staticmethod | |
| 824 def load_basic_files(dump_path, multiple): | |
| 825 prefix = Command._find_prefix(dump_path) | |
| 826 symbol_mapping = SymbolMapping(prefix) | |
| 827 symbol_mapping.prepare() | |
| 828 bucket_set = BucketSet.load(prefix) | |
| 829 if multiple: | |
| 830 dump_list = DumpList.load(Command._find_all_dumps(dump_path)) | |
| 831 else: | |
| 832 dump = Dump.load(dump_path) | |
| 833 symbol_cache = SymbolCache(prefix) | |
| 834 symbol_cache.update(FUNCTION_ADDRESS, bucket_set, symbol_mapping) | |
| 835 symbol_cache.update(TYPEINFO_ADDRESS, bucket_set, symbol_mapping) | |
| 836 bucket_set.symbolize(symbol_cache) | |
| 837 if multiple: | |
| 838 return (bucket_set, dump_list) | |
| 839 else: | |
| 840 return (bucket_set, dump) | |
| 841 | |
| 842 @staticmethod | |
| 843 def _find_prefix(path): | |
| 844 return re.sub('\.[0-9][0-9][0-9][0-9]\.heap', '', path) | |
| 845 | |
| 846 @staticmethod | |
| 847 def _find_all_dumps(dump_path): | |
| 848 prefix = Command._find_prefix(dump_path) | |
| 849 dump_path_list = [dump_path] | |
| 850 | |
| 851 n = int(dump_path[len(dump_path) - 9 : len(dump_path) - 5]) | |
| 852 n += 1 | |
| 853 while True: | |
| 854 p = '%s.%04d.heap' % (prefix, n) | |
| 855 if os.path.exists(p): | |
| 856 dump_path_list.append(p) | |
| 857 else: | |
| 858 break | |
| 859 n += 1 | |
| 860 | |
| 861 return dump_path_list | |
| 862 | |
| 863 def _parse_args(self, sys_argv, required): | |
| 864 options, args = self._parser.parse_args(sys_argv) | |
| 865 if len(args) != required + 1: | |
| 866 self._parser.error('needs %d argument(s).\n' % required) | |
| 867 return None | |
| 868 return (options, args) | |
| 869 | |
| 870 def _parse_policy_list(self, options_policy): | |
| 871 if options_policy: | |
| 872 return options_policy.split(',') | |
| 873 else: | |
| 874 return None | |
| 875 | |
| 876 | |
| 877 class StacktraceCommand(Command): | |
| 878 def __init__(self): | |
| 879 super(StacktraceCommand, self).__init__( | |
| 880 'Usage: %prog stacktrace <dump>') | |
| 881 | |
| 882 def do(self, sys_argv): | |
| 883 options, args = self._parse_args(sys_argv, 1) | |
| 884 dump_path = args[1] | |
| 885 (bucket_set, dump) = Command.load_basic_files(dump_path, False) | |
| 886 | |
| 887 StacktraceCommand._output(dump, bucket_set, sys.stdout) | |
| 888 return 0 | |
| 889 | |
| 890 @staticmethod | |
| 891 def _output(dump, bucket_set, out): | |
| 892 """Outputs a given stacktrace. | |
| 893 | |
| 894 Args: | |
| 895 bucket_set: A BucketSet object. | |
| 896 out: A file object to output. | |
| 897 """ | |
| 898 for line in dump.iter_stacktrace: | |
| 899 words = line.split() | |
| 900 bucket = bucket_set.get(int(words[BUCKET_ID])) | |
| 901 if not bucket: | |
| 902 continue | |
| 903 for i in range(0, BUCKET_ID - 1): | |
| 904 out.write(words[i] + ' ') | |
| 905 for frame in bucket.symbolized_stacktrace: | |
| 906 out.write(frame + ' ') | |
| 907 out.write('\n') | |
| 908 | |
| 909 | |
| 910 class PolicyCommands(Command): | |
| 911 def __init__(self, command): | |
| 912 super(PolicyCommands, self).__init__( | |
| 913 'Usage: %%prog %s [-p POLICY] <first-dump>' % command) | |
| 914 self._parser.add_option('-p', '--policy', type='string', dest='policy', | |
| 915 help='profile with POLICY', metavar='POLICY') | |
| 916 | |
| 917 def _set_up(self, sys_argv): | |
| 918 options, args = self._parse_args(sys_argv, 1) | |
| 919 dump_path = args[1] | |
| 920 (bucket_set, dumps) = Command.load_basic_files(dump_path, True) | |
| 921 | |
| 922 policy_set = PolicySet.load(self._parse_policy_list(options.policy)) | |
| 923 return policy_set, dumps, bucket_set | |
| 924 | |
| 925 def _apply_policy(self, dump, policy, bucket_set, first_dump_time): | |
| 926 """Aggregates the total memory size of each component. | |
| 927 | |
| 928 Iterate through all stacktraces and attribute them to one of the components | |
| 929 based on the policy. It is important to apply policy in right order. | |
| 930 | |
| 931 Args: | |
| 932 dump: A Dump object. | |
| 933 policy: A Policy object. | |
| 934 bucket_set: A BucketSet object. | |
| 935 first_dump_time: An integer representing time when the first dump is | |
| 936 dumped. | |
| 937 | |
| 938 Returns: | |
| 939 A dict mapping components and their corresponding sizes. | |
| 940 """ | |
| 941 LOGGER.info(' %s' % dump.path) | |
| 942 sizes = dict((c, 0) for c in policy.components) | |
| 943 | |
| 944 PolicyCommands._accumulate(dump, policy, bucket_set, sizes) | |
| 945 | |
| 946 sizes['mmap-no-log'] = ( | |
| 947 dump.global_stat('profiled-mmap_committed') - | |
| 948 sizes['mmap-total-log']) | |
| 949 sizes['mmap-total-record'] = dump.global_stat('profiled-mmap_committed') | |
| 950 sizes['mmap-total-record-vm'] = dump.global_stat('profiled-mmap_virtual') | |
| 951 | |
| 952 sizes['tc-no-log'] = ( | |
| 953 dump.global_stat('profiled-malloc_committed') - | |
| 954 sizes['tc-total-log']) | |
| 955 sizes['tc-total-record'] = dump.global_stat('profiled-malloc_committed') | |
| 956 sizes['tc-unused'] = ( | |
| 957 sizes['mmap-tcmalloc'] - | |
| 958 dump.global_stat('profiled-malloc_committed')) | |
| 959 sizes['tc-total'] = sizes['mmap-tcmalloc'] | |
| 960 | |
| 961 for key, value in { | |
| 962 'total': 'total_committed', | |
| 963 'filemapped': 'file_committed', | |
| 964 'file-exec': 'file-exec_committed', | |
| 965 'file-nonexec': 'file-nonexec_committed', | |
| 966 'anonymous': 'anonymous_committed', | |
| 967 'stack': 'stack_committed', | |
| 968 'other': 'other_committed', | |
| 969 'unhooked-absent': 'nonprofiled-absent_committed', | |
| 970 'unhooked-anonymous': 'nonprofiled-anonymous_committed', | |
| 971 'unhooked-file-exec': 'nonprofiled-file-exec_committed', | |
| 972 'unhooked-file-nonexec': 'nonprofiled-file-nonexec_committed', | |
| 973 'unhooked-stack': 'nonprofiled-stack_committed', | |
| 974 'unhooked-other': 'nonprofiled-other_committed', | |
| 975 'total-vm': 'total_virtual', | |
| 976 'filemapped-vm': 'file_virtual', | |
| 977 'anonymous-vm': 'anonymous_virtual', | |
| 978 'other-vm': 'other_virtual' }.iteritems(): | |
| 979 if key in sizes: | |
| 980 sizes[key] = dump.global_stat(value) | |
| 981 | |
| 982 if 'mustbezero' in sizes: | |
| 983 removed_list = ( | |
| 984 'profiled-mmap_committed', | |
| 985 'nonprofiled-absent_committed', | |
| 986 'nonprofiled-anonymous_committed', | |
| 987 'nonprofiled-file-exec_committed', | |
| 988 'nonprofiled-file-nonexec_committed', | |
| 989 'nonprofiled-stack_committed', | |
| 990 'nonprofiled-other_committed') | |
| 991 sizes['mustbezero'] = ( | |
| 992 dump.global_stat('total_committed') - | |
| 993 sum(dump.global_stat(removed) for removed in removed_list)) | |
| 994 if 'total-exclude-profiler' in sizes: | |
| 995 sizes['total-exclude-profiler'] = ( | |
| 996 dump.global_stat('total_committed') - | |
| 997 (sizes['mmap-profiler'] + sizes['mmap-type-profiler'])) | |
| 998 if 'hour' in sizes: | |
| 999 sizes['hour'] = (dump.time - first_dump_time) / 60.0 / 60.0 | |
| 1000 if 'minute' in sizes: | |
| 1001 sizes['minute'] = (dump.time - first_dump_time) / 60.0 | |
| 1002 if 'second' in sizes: | |
| 1003 sizes['second'] = dump.time - first_dump_time | |
| 1004 | |
| 1005 return sizes | |
| 1006 | |
| 1007 @staticmethod | |
| 1008 def _accumulate(dump, policy, bucket_set, sizes): | |
| 1009 for line in dump.iter_stacktrace: | |
| 1010 words = line.split() | |
| 1011 bucket = bucket_set.get(int(words[BUCKET_ID])) | |
| 1012 component_match = policy.find(bucket) | |
| 1013 sizes[component_match] += int(words[COMMITTED]) | |
| 1014 | |
| 1015 if component_match.startswith('tc-'): | |
| 1016 sizes['tc-total-log'] += int(words[COMMITTED]) | |
| 1017 elif component_match.startswith('mmap-'): | |
| 1018 sizes['mmap-total-log'] += int(words[COMMITTED]) | |
| 1019 else: | |
| 1020 sizes['other-total-log'] += int(words[COMMITTED]) | |
| 1021 | |
| 1022 | |
| 1023 class CSVCommand(PolicyCommands): | |
| 1024 def __init__(self): | |
| 1025 super(CSVCommand, self).__init__('csv') | |
| 1026 | |
| 1027 def do(self, sys_argv): | |
| 1028 policy_set, dumps, bucket_set = self._set_up(sys_argv) | |
| 1029 return self._output(policy_set, dumps, bucket_set, sys.stdout) | |
| 1030 | |
| 1031 def _output(self, policy_set, dumps, bucket_set, out): | |
| 1032 max_components = 0 | |
| 1033 for label in policy_set: | |
| 1034 max_components = max(max_components, len(policy_set[label].components)) | |
| 1035 | |
| 1036 for label in sorted(policy_set): | |
| 1037 components = policy_set[label].components | |
| 1038 if len(policy_set) > 1: | |
| 1039 out.write('%s%s\n' % (label, ',' * (max_components - 1))) | |
| 1040 out.write('%s%s\n' % ( | |
| 1041 ','.join(components), ',' * (max_components - len(components)))) | |
| 1042 | |
| 1043 LOGGER.info('Applying a policy %s to...' % label) | |
| 1044 for dump in dumps: | |
| 1045 component_sizes = self._apply_policy( | |
| 1046 dump, policy_set[label], bucket_set, dumps[0].time) | |
| 1047 s = [] | |
| 1048 for c in components: | |
| 1049 if c in ('hour', 'minute', 'second'): | |
| 1050 s.append('%05.5f' % (component_sizes[c])) | |
| 1051 else: | |
| 1052 s.append('%05.5f' % (component_sizes[c] / 1024.0 / 1024.0)) | |
| 1053 out.write('%s%s\n' % ( | |
| 1054 ','.join(s), ',' * (max_components - len(components)))) | |
| 1055 | |
| 1056 bucket_set.clear_component_cache() | |
| 1057 | |
| 1058 return 0 | |
| 1059 | |
| 1060 | |
| 1061 class JSONCommand(PolicyCommands): | |
| 1062 def __init__(self): | |
| 1063 super(JSONCommand, self).__init__('json') | |
| 1064 | |
| 1065 def do(self, sys_argv): | |
| 1066 policy_set, dumps, bucket_set = self._set_up(sys_argv) | |
| 1067 return self._output(policy_set, dumps, bucket_set, sys.stdout) | |
| 1068 | |
| 1069 def _output(self, policy_set, dumps, bucket_set, out): | |
| 1070 json_base = { | |
| 1071 'version': 'JSON_DEEP_2', | |
| 1072 'policies': {}, | |
| 1073 } | |
| 1074 | |
| 1075 for label in sorted(policy_set): | |
| 1076 json_base['policies'][label] = { | |
| 1077 'legends': policy_set[label].components, | |
| 1078 'snapshots': [], | |
| 1079 } | |
| 1080 | |
| 1081 LOGGER.info('Applying a policy %s to...' % label) | |
| 1082 for dump in dumps: | |
| 1083 component_sizes = self._apply_policy( | |
| 1084 dump, policy_set[label], bucket_set, dumps[0].time) | |
| 1085 component_sizes['dump_path'] = dump.path | |
| 1086 component_sizes['dump_time'] = datetime.fromtimestamp( | |
| 1087 dump.time).strftime('%Y-%m-%d %H:%M:%S') | |
| 1088 json_base['policies'][label]['snapshots'].append(component_sizes) | |
| 1089 | |
| 1090 bucket_set.clear_component_cache() | |
| 1091 | |
| 1092 json.dump(json_base, out, indent=2, sort_keys=True) | |
| 1093 | |
| 1094 return 0 | |
| 1095 | |
| 1096 | |
| 1097 class ListCommand(PolicyCommands): | |
| 1098 def __init__(self): | |
| 1099 super(ListCommand, self).__init__('list') | |
| 1100 | |
| 1101 def do(self, sys_argv): | |
| 1102 policy_set, dumps, bucket_set = self._set_up(sys_argv) | |
| 1103 return self._output(policy_set, dumps, bucket_set, sys.stdout) | |
| 1104 | |
| 1105 def _output(self, policy_set, dumps, bucket_set, out): | |
| 1106 for label in sorted(policy_set): | |
| 1107 LOGGER.info('Applying a policy %s to...' % label) | |
| 1108 for dump in dumps: | |
| 1109 component_sizes = self._apply_policy( | |
| 1110 dump, policy_set[label], bucket_set, dump.time) | |
| 1111 out.write('%s for %s:\n' % (label, dump.path)) | |
| 1112 for c in policy_set[label].components: | |
| 1113 if c in ['hour', 'minute', 'second']: | |
| 1114 out.write('%40s %12.3f\n' % (c, component_sizes[c])) | |
| 1115 else: | |
| 1116 out.write('%40s %12d\n' % (c, component_sizes[c])) | |
| 1117 | |
| 1118 bucket_set.clear_component_cache() | |
| 1119 | |
| 1120 return 0 | |
| 1121 | |
| 1122 | |
| 1123 class ExpandCommand(Command): | |
| 1124 def __init__(self): | |
| 1125 super(ExpandCommand, self).__init__( | |
| 1126 'Usage: %prog expand <dump> <policy> <component> <depth>') | |
| 1127 | |
| 1128 def do(self, sys_argv): | |
| 1129 options, args = self._parse_args(sys_argv, 4) | |
| 1130 dump_path = args[1] | |
| 1131 target_policy = args[2] | |
| 1132 component_name = args[3] | |
| 1133 depth = args[4] | |
| 1134 (bucket_set, dump) = Command.load_basic_files(dump_path, False) | |
| 1135 policy_set = PolicySet.load(self._parse_policy_list(target_policy)) | |
| 1136 | |
| 1137 self._output(dump, policy_set[target_policy], bucket_set, | |
| 1138 component_name, int(depth), sys.stdout) | |
| 1139 return 0 | |
| 1140 | |
| 1141 def _output(self, dump, policy, bucket_set, component_name, depth, out): | |
| 1142 """Prints all stacktraces in a given component of given depth. | |
| 1143 | |
| 1144 Args: | |
| 1145 dump: A Dump object. | |
| 1146 policy: A Policy object. | |
| 1147 bucket_set: A BucketSet object. | |
| 1148 component_name: A name of component for filtering. | |
| 1149 depth: An integer representing depth to be printed. | |
| 1150 out: An IO object to output. | |
| 1151 """ | |
| 1152 sizes = {} | |
| 1153 | |
| 1154 ExpandCommand._accumulate( | |
| 1155 dump, policy, bucket_set, component_name, depth, sizes) | |
| 1156 | |
| 1157 sorted_sizes_list = sorted( | |
| 1158 sizes.iteritems(), key=(lambda x: x[1]), reverse=True) | |
| 1159 total = 0 | |
| 1160 for size_pair in sorted_sizes_list: | |
| 1161 out.write('%10d %s\n' % (size_pair[1], size_pair[0])) | |
| 1162 total += size_pair[1] | |
| 1163 LOGGER.info('total: %d\n' % total) | |
| 1164 | |
| 1165 @staticmethod | |
| 1166 def _accumulate(dump, policy, bucket_set, component_name, depth, sizes): | |
| 1167 for line in dump.iter_stacktrace: | |
| 1168 words = line.split() | |
| 1169 bucket = bucket_set.get(int(words[BUCKET_ID])) | |
| 1170 component_match = policy.find(bucket) | |
| 1171 if component_match == component_name: | |
| 1172 stacktrace_sequence = '' | |
| 1173 if bucket.typeinfo: | |
| 1174 stacktrace_sequence += '(type=%s)' % bucket.symbolized_typeinfo | |
| 1175 stacktrace_sequence += ' (type.name=%s) ' % bucket.typeinfo_name | |
| 1176 for stack in bucket.symbolized_stacktrace[ | |
| 1177 0 : min(len(bucket.symbolized_stacktrace), 1 + depth)]: | |
| 1178 stacktrace_sequence += stack + ' ' | |
| 1179 if not stacktrace_sequence in sizes: | |
| 1180 sizes[stacktrace_sequence] = 0 | |
| 1181 sizes[stacktrace_sequence] += int(words[COMMITTED]) | |
| 1182 | |
| 1183 | |
| 1184 class PProfCommand(Command): | |
| 1185 def __init__(self): | |
| 1186 super(PProfCommand, self).__init__( | |
| 1187 'Usage: %prog pprof [-c COMPONENT] <dump> <policy>') | |
| 1188 self._parser.add_option('-c', '--component', type='string', | |
| 1189 dest='component', | |
| 1190 help='restrict to COMPONENT', metavar='COMPONENT') | |
| 1191 | |
| 1192 def do(self, sys_argv): | |
| 1193 options, args = self._parse_args(sys_argv, 2) | |
| 1194 | |
| 1195 dump_path = args[1] | |
| 1196 target_policy = args[2] | |
| 1197 component = options.component | |
| 1198 | |
| 1199 (bucket_set, dump) = Command.load_basic_files(dump_path, False) | |
| 1200 policy_set = PolicySet.load(self._parse_policy_list(target_policy)) | |
| 1201 | |
| 1202 with open(Command._find_prefix(dump_path) + '.maps', 'r') as maps_f: | |
| 1203 maps_lines = maps_f.readlines() | |
| 1204 PProfCommand._output( | |
| 1205 dump, policy_set[target_policy], bucket_set, maps_lines, component, | |
| 1206 sys.stdout) | |
| 1207 | |
| 1208 return 0 | |
| 1209 | |
| 1210 @staticmethod | |
| 1211 def _output(dump, policy, bucket_set, maps_lines, component_name, out): | |
| 1212 """Converts the heap profile dump so it can be processed by pprof. | |
| 1213 | |
| 1214 Args: | |
| 1215 dump: A Dump object. | |
| 1216 policy: A Policy object. | |
| 1217 bucket_set: A BucketSet object. | |
| 1218 maps_lines: A list of strings containing /proc/.../maps. | |
| 1219 component_name: A name of component for filtering. | |
| 1220 out: An IO object to output. | |
| 1221 """ | |
| 1222 out.write('heap profile: ') | |
| 1223 com_committed, com_allocs = PProfCommand._accumulate( | |
| 1224 dump, policy, bucket_set, component_name) | |
| 1225 | |
| 1226 out.write('%6d: %8s [%6d: %8s] @ heapprofile\n' % ( | |
| 1227 com_allocs, com_committed, com_allocs, com_committed)) | |
| 1228 | |
| 1229 PProfCommand._output_stacktrace_lines( | |
| 1230 dump, policy, bucket_set, component_name, out) | |
| 1231 | |
| 1232 out.write('MAPPED_LIBRARIES:\n') | |
| 1233 for line in maps_lines: | |
| 1234 out.write(line) | |
| 1235 | |
| 1236 @staticmethod | |
| 1237 def _accumulate(dump, policy, bucket_set, component_name): | |
| 1238 """Accumulates size of committed chunks and the number of allocated chunks. | |
| 1239 | |
| 1240 Args: | |
| 1241 dump: A Dump object. | |
| 1242 policy: A Policy object. | |
| 1243 bucket_set: A BucketSet object. | |
| 1244 component_name: A name of component for filtering. | |
| 1245 | |
| 1246 Returns: | |
| 1247 Two integers which are the accumulated size of committed regions and the | |
| 1248 number of allocated chunks, respectively. | |
| 1249 """ | |
| 1250 com_committed = 0 | |
| 1251 com_allocs = 0 | |
| 1252 for line in dump.iter_stacktrace: | |
| 1253 words = line.split() | |
| 1254 bucket = bucket_set.get(int(words[BUCKET_ID])) | |
| 1255 if (not bucket or | |
| 1256 (component_name and component_name != policy.find(bucket))): | |
| 1257 continue | |
| 1258 | |
| 1259 com_committed += int(words[COMMITTED]) | |
| 1260 com_allocs += int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]) | |
| 1261 | |
| 1262 return com_committed, com_allocs | |
| 1263 | |
| 1264 @staticmethod | |
| 1265 def _output_stacktrace_lines(dump, policy, bucket_set, component_name, out): | |
| 1266 """Prints information of stacktrace lines for pprof. | |
| 1267 | |
| 1268 Args: | |
| 1269 dump: A Dump object. | |
| 1270 policy: A Policy object. | |
| 1271 bucket_set: A BucketSet object. | |
| 1272 component_name: A name of component for filtering. | |
| 1273 out: An IO object to output. | |
| 1274 """ | |
| 1275 for line in dump.iter_stacktrace: | |
| 1276 words = line.split() | |
| 1277 bucket = bucket_set.get(int(words[BUCKET_ID])) | |
| 1278 if (not bucket or | |
| 1279 (component_name and component_name != policy.find(bucket))): | |
| 1280 continue | |
| 1281 | |
| 1282 out.write('%6d: %8s [%6d: %8s] @' % ( | |
| 1283 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]), | |
| 1284 words[COMMITTED], | |
| 1285 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]), | |
| 1286 words[COMMITTED])) | |
| 1287 for address in bucket.stacktrace: | |
| 1288 out.write(' 0x%016x' % address) | |
| 1289 out.write('\n') | |
| 1290 | |
| 1291 | |
| 1292 def main(): | |
| 1293 COMMANDS = { | |
| 1294 'csv': CSVCommand, | |
| 1295 'expand': ExpandCommand, | |
| 1296 'json': JSONCommand, | |
| 1297 'list': ListCommand, | |
| 1298 'pprof': PProfCommand, | |
| 1299 'stacktrace': StacktraceCommand, | |
| 1300 } | |
| 1301 | |
| 1302 if len(sys.argv) < 2 or (not sys.argv[1] in COMMANDS): | |
| 1303 sys.stderr.write("""Usage: %s <command> [options] [<args>] | |
| 1304 | |
| 1305 Commands: | |
| 1306 csv Classify memory usage in CSV | |
| 1307 expand Show all stacktraces contained in the specified component | |
| 1308 json Classify memory usage in JSON | |
| 1309 list Classify memory usage in simple listing format | |
| 1310 pprof Format the profile dump so that it can be processed by pprof | |
| 1311 stacktrace Convert runtime addresses to symbol names | |
| 1312 | |
| 1313 Quick Reference: | |
| 1314 dmprof csv [-p POLICY] <first-dump> | |
| 1315 dmprof expand <dump> <policy> <component> <depth> | |
| 1316 dmprof json [-p POLICY] <first-dump> | |
| 1317 dmprof list [-p POLICY] <first-dump> | |
| 1318 dmprof pprof [-c COMPONENT] <dump> <policy> | |
| 1319 dmprof stacktrace <dump> | |
| 1320 """ % (sys.argv[0])) | |
| 1321 sys.exit(1) | |
| 1322 action = sys.argv.pop(1) | |
| 1323 | |
| 1324 LOGGER.setLevel(logging.DEBUG) | |
| 1325 handler = logging.StreamHandler() | |
| 1326 handler.setLevel(logging.INFO) | |
| 1327 formatter = logging.Formatter('%(message)s') | |
| 1328 handler.setFormatter(formatter) | |
| 1329 LOGGER.addHandler(handler) | |
| 1330 | |
| 1331 try: | |
| 1332 errorcode = COMMANDS[action]().do(sys.argv) | |
| 1333 except ParsingException, e: | |
| 1334 errorcode = 1 | |
| 1335 sys.stderr.write('Exit by parsing error: %s\n' % e) | |
| 1336 | |
| 1337 return errorcode | |
| 1338 | |
| 1339 | |
| 1340 if __name__ == '__main__': | |
| 1341 sys.exit(main()) | |
| OLD | NEW |