| Index: tools/deep_memory_profiler/lib/dump.py
|
| diff --git a/tools/deep_memory_profiler/lib/dump.py b/tools/deep_memory_profiler/lib/dump.py
|
| index 1fa4fc3b698860092dd7f36d432ecd0b03bbeefb..798763a1f529691bf86a91a6a9cd3aac4ffbd1b2 100644
|
| --- a/tools/deep_memory_profiler/lib/dump.py
|
| +++ b/tools/deep_memory_profiler/lib/dump.py
|
| @@ -2,138 +2,60 @@
|
| # Use of this source code is governed by a BSD-style license that can be
|
| # found in the LICENSE file.
|
|
|
| -import copy
|
| -import datetime
|
| import logging
|
| import os
|
| -import re
|
| -import time
|
| -
|
| -from lib.exceptions import EmptyDumpException, InvalidDumpException
|
| -from lib.exceptions import ObsoleteDumpVersionException, ParsingException
|
| -from lib.pageframe import PageFrame
|
| -from lib.range_dict import ExclusiveRangeDict
|
| -from lib.symbol import procfs
|
|
|
|
|
| LOGGER = logging.getLogger('dmprof')
|
| -VIRTUAL, COMMITTED, ALLOC_COUNT, FREE_COUNT, _AT, BUCKET_ID = range(6)
|
| -
|
| -
|
| -# Heap Profile Dump versions
|
| -
|
| -# DUMP_DEEP_[1-4] are obsolete.
|
| -# DUMP_DEEP_2+ distinct mmap regions and malloc chunks.
|
| -# DUMP_DEEP_3+ don't include allocation functions in their stack dumps.
|
| -# DUMP_DEEP_4+ support comments with '#' and global stats "nonprofiled-*".
|
| -# DUMP_DEEP_[1-2] should be processed by POLICY_DEEP_1.
|
| -# DUMP_DEEP_[3-4] should be processed by POLICY_DEEP_2 or POLICY_DEEP_3.
|
| -DUMP_DEEP_1 = 'DUMP_DEEP_1'
|
| -DUMP_DEEP_2 = 'DUMP_DEEP_2'
|
| -DUMP_DEEP_3 = 'DUMP_DEEP_3'
|
| -DUMP_DEEP_4 = 'DUMP_DEEP_4'
|
| -
|
| -DUMP_DEEP_OBSOLETE = (DUMP_DEEP_1, DUMP_DEEP_2, DUMP_DEEP_3, DUMP_DEEP_4)
|
| -
|
| -# DUMP_DEEP_5 doesn't separate sections for malloc and mmap.
|
| -# malloc and mmap are identified in bucket files.
|
| -# DUMP_DEEP_5 should be processed by POLICY_DEEP_4.
|
| -DUMP_DEEP_5 = 'DUMP_DEEP_5'
|
| -
|
| -# DUMP_DEEP_6 adds a mmap list to DUMP_DEEP_5.
|
| -DUMP_DEEP_6 = 'DUMP_DEEP_6'
|
|
|
|
|
| class Dump(object):
|
| """Represents a heap profile dump."""
|
| -
|
| - _PATH_PATTERN = re.compile(r'^(.*)\.([0-9]+)\.([0-9]+)\.heap$')
|
| -
|
| - _HOOK_PATTERN = re.compile(
|
| - r'^ ([ \(])([a-f0-9]+)([ \)])-([ \(])([a-f0-9]+)([ \)])\s+'
|
| - r'(hooked|unhooked)\s+(.+)$', re.IGNORECASE)
|
| -
|
| - _HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / '
|
| - '(?P<RESERVED>[0-9]+) @ (?P<BUCKETID>[0-9]+)')
|
| - _UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / '
|
| - '(?P<RESERVED>[0-9]+)')
|
| -
|
| - _OLD_HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) @ (?P<BUCKETID>[0-9]+)')
|
| - _OLD_UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) (?P<COMMITTED>[0-9]+)')
|
| -
|
| - _TIME_PATTERN_FORMAT = re.compile(
|
| - r'^Time: ([0-9]+/[0-9]+/[0-9]+ [0-9]+:[0-9]+:[0-9]+)(\.[0-9]+)?')
|
| - _TIME_PATTERN_SECONDS = re.compile(r'^Time: ([0-9]+)$')
|
| -
|
| - def __init__(self, path, modified_time):
|
| - self._path = path
|
| - matched = self._PATH_PATTERN.match(path)
|
| - self._pid = int(matched.group(2))
|
| - self._count = int(matched.group(3))
|
| - self._time = modified_time
|
| - self._map = {}
|
| - self._procmaps = ExclusiveRangeDict(ProcMapsEntryAttribute)
|
| - self._stacktrace_lines = []
|
| - self._global_stats = {} # used only in apply_policy
|
| -
|
| - self._run_id = ''
|
| - self._pagesize = 4096
|
| - self._pageframe_length = 0
|
| - self._pageframe_encoding = ''
|
| - self._has_pagecount = False
|
| -
|
| - self._version = ''
|
| - self._lines = []
|
| + def __init__(self):
|
| + pass
|
|
|
| @property
|
| def path(self):
|
| - return self._path
|
| + raise NotImplementedError
|
|
|
| @property
|
| def count(self):
|
| - return self._count
|
| + raise NotImplementedError
|
|
|
| @property
|
| def time(self):
|
| - return self._time
|
| + raise NotImplementedError
|
|
|
| @property
|
| def iter_map(self):
|
| - for region in sorted(self._map.iteritems()):
|
| - yield region[0], region[1]
|
| + raise NotImplementedError
|
|
|
| @property
|
| def iter_stacktrace(self):
|
| - for line in self._stacktrace_lines:
|
| - words = line.split()
|
| - yield (int(words[BUCKET_ID]),
|
| - int(words[VIRTUAL]),
|
| - int(words[COMMITTED]),
|
| - int(words[ALLOC_COUNT]),
|
| - int(words[FREE_COUNT]))
|
| + raise NotImplementedError
|
|
|
| def global_stat(self, name):
|
| - return self._global_stats[name]
|
| + raise NotImplementedError
|
|
|
| @property
|
| def run_id(self):
|
| - return self._run_id
|
| + raise NotImplementedError
|
|
|
| @property
|
| def pagesize(self):
|
| - return self._pagesize
|
| + raise NotImplementedError
|
|
|
| @property
|
| def pageframe_length(self):
|
| - return self._pageframe_length
|
| + raise NotImplementedError
|
|
|
| @property
|
| def pageframe_encoding(self):
|
| - return self._pageframe_encoding
|
| + raise NotImplementedError
|
|
|
| @property
|
| def has_pagecount(self):
|
| - return self._has_pagecount
|
| + raise NotImplementedError
|
|
|
| @staticmethod
|
| def load(path, log_header='Loading a heap profile dump: '):
|
| @@ -149,263 +71,12 @@ class Dump(object):
|
| Raises:
|
| ParsingException for invalid heap profile dumps.
|
| """
|
| - dump = Dump(path, os.stat(path).st_mtime)
|
| + from lib.deep_dump import DeepDump
|
| + dump = DeepDump(path, os.stat(path).st_mtime)
|
| with open(path, 'r') as f:
|
| dump.load_file(f, log_header)
|
| return dump
|
|
|
| - def load_file(self, f, log_header):
|
| - self._lines = [line for line in f
|
| - if line and not line.startswith('#')]
|
| -
|
| - try:
|
| - self._version, ln = self._parse_version()
|
| - self._parse_meta_information()
|
| - if self._version == DUMP_DEEP_6:
|
| - self._parse_mmap_list()
|
| - self._parse_global_stats()
|
| - self._extract_stacktrace_lines(ln)
|
| - except EmptyDumpException:
|
| - LOGGER.info('%s%s ...ignored an empty dump.' % (log_header, self._path))
|
| - except ParsingException, e:
|
| - LOGGER.error('%s%s ...error %s' % (log_header, self._path, e))
|
| - raise
|
| - else:
|
| - LOGGER.info('%s%s (version:%s)' % (log_header, self._path, self._version))
|
| -
|
| - def _parse_version(self):
|
| - """Parses a version string in self._lines.
|
| -
|
| - Returns:
|
| - A pair of (a string representing a version of the stacktrace dump,
|
| - and an integer indicating a line number next to the version string).
|
| -
|
| - Raises:
|
| - ParsingException for invalid dump versions.
|
| - """
|
| - version = ''
|
| -
|
| - # Skip until an identifiable line.
|
| - headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ')
|
| - if not self._lines:
|
| - raise EmptyDumpException('Empty heap dump file.')
|
| - (ln, found) = skip_while(
|
| - 0, len(self._lines),
|
| - lambda n: not self._lines[n].startswith(headers))
|
| - if not found:
|
| - raise InvalidDumpException('No version header.')
|
| -
|
| - # Identify a version.
|
| - if self._lines[ln].startswith('heap profile: '):
|
| - version = self._lines[ln][13:].strip()
|
| - if version in (DUMP_DEEP_5, DUMP_DEEP_6):
|
| - (ln, _) = skip_while(
|
| - ln, len(self._lines),
|
| - lambda n: self._lines[n] != 'STACKTRACES:\n')
|
| - elif version in DUMP_DEEP_OBSOLETE:
|
| - raise ObsoleteDumpVersionException(version)
|
| - else:
|
| - raise InvalidDumpException('Invalid version: %s' % version)
|
| - elif self._lines[ln] == 'STACKTRACES:\n':
|
| - raise ObsoleteDumpVersionException(DUMP_DEEP_1)
|
| - elif self._lines[ln] == 'MMAP_STACKTRACES:\n':
|
| - raise ObsoleteDumpVersionException(DUMP_DEEP_2)
|
| -
|
| - return (version, ln)
|
| -
|
| - def _parse_global_stats(self):
|
| - """Parses lines in self._lines as global stats."""
|
| - (ln, _) = skip_while(
|
| - 0, len(self._lines),
|
| - lambda n: self._lines[n] != 'GLOBAL_STATS:\n')
|
| -
|
| - global_stat_names = [
|
| - 'total', 'absent', 'file-exec', 'file-nonexec', 'anonymous', 'stack',
|
| - 'other', 'nonprofiled-absent', 'nonprofiled-anonymous',
|
| - 'nonprofiled-file-exec', 'nonprofiled-file-nonexec',
|
| - 'nonprofiled-stack', 'nonprofiled-other',
|
| - 'profiled-mmap', 'profiled-malloc']
|
| -
|
| - for prefix in global_stat_names:
|
| - (ln, _) = skip_while(
|
| - ln, len(self._lines),
|
| - lambda n: self._lines[n].split()[0] != prefix)
|
| - words = self._lines[ln].split()
|
| - self._global_stats[prefix + '_virtual'] = int(words[-2])
|
| - self._global_stats[prefix + '_committed'] = int(words[-1])
|
| -
|
| - def _parse_meta_information(self):
|
| - """Parses lines in self._lines for meta information."""
|
| - (ln, found) = skip_while(
|
| - 0, len(self._lines),
|
| - lambda n: self._lines[n] != 'META:\n')
|
| - if not found:
|
| - return
|
| - ln += 1
|
| -
|
| - while True:
|
| - if self._lines[ln].startswith('Time:'):
|
| - matched_seconds = self._TIME_PATTERN_SECONDS.match(self._lines[ln])
|
| - matched_format = self._TIME_PATTERN_FORMAT.match(self._lines[ln])
|
| - if matched_format:
|
| - self._time = time.mktime(datetime.datetime.strptime(
|
| - matched_format.group(1), '%Y/%m/%d %H:%M:%S').timetuple())
|
| - if matched_format.group(2):
|
| - self._time += float(matched_format.group(2)[1:]) / 1000.0
|
| - elif matched_seconds:
|
| - self._time = float(matched_seconds.group(1))
|
| - elif self._lines[ln].startswith('Reason:'):
|
| - pass # Nothing to do for 'Reason:'
|
| - elif self._lines[ln].startswith('PageSize: '):
|
| - self._pagesize = int(self._lines[ln][10:])
|
| - elif self._lines[ln].startswith('CommandLine:'):
|
| - pass
|
| - elif (self._lines[ln].startswith('PageFrame: ') or
|
| - self._lines[ln].startswith('PFN: ')):
|
| - if self._lines[ln].startswith('PageFrame: '):
|
| - words = self._lines[ln][11:].split(',')
|
| - else:
|
| - words = self._lines[ln][5:].split(',')
|
| - for word in words:
|
| - if word == '24':
|
| - self._pageframe_length = 24
|
| - elif word == 'Base64':
|
| - self._pageframe_encoding = 'base64'
|
| - elif word == 'PageCount':
|
| - self._has_pagecount = True
|
| - elif self._lines[ln].startswith('RunID: '):
|
| - self._run_id = self._lines[ln][7:].strip()
|
| - elif (self._lines[ln].startswith('MMAP_LIST:') or
|
| - self._lines[ln].startswith('GLOBAL_STATS:')):
|
| - # Skip until "MMAP_LIST:" or "GLOBAL_STATS" is found.
|
| - break
|
| - else:
|
| - pass
|
| - ln += 1
|
| -
|
| - def _parse_mmap_list(self):
|
| - """Parses lines in self._lines as a mmap list."""
|
| - (ln, found) = skip_while(
|
| - 0, len(self._lines),
|
| - lambda n: self._lines[n] != 'MMAP_LIST:\n')
|
| - if not found:
|
| - return {}
|
| -
|
| - ln += 1
|
| - self._map = {}
|
| - current_vma = {}
|
| - pageframe_list = []
|
| - while True:
|
| - entry = procfs.ProcMaps.parse_line(self._lines[ln])
|
| - if entry:
|
| - current_vma = {}
|
| - for _, _, attr in self._procmaps.iter_range(entry.begin, entry.end):
|
| - for key, value in entry.as_dict().iteritems():
|
| - attr[key] = value
|
| - current_vma[key] = value
|
| - ln += 1
|
| - continue
|
| -
|
| - if self._lines[ln].startswith(' PF: '):
|
| - for pageframe in self._lines[ln][5:].split():
|
| - pageframe_list.append(PageFrame.parse(pageframe, self._pagesize))
|
| - ln += 1
|
| - continue
|
| -
|
| - matched = self._HOOK_PATTERN.match(self._lines[ln])
|
| - if not matched:
|
| - break
|
| - # 2: starting address
|
| - # 5: end address
|
| - # 7: hooked or unhooked
|
| - # 8: additional information
|
| - if matched.group(7) == 'hooked':
|
| - submatched = self._HOOKED_PATTERN.match(matched.group(8))
|
| - if not submatched:
|
| - submatched = self._OLD_HOOKED_PATTERN.match(matched.group(8))
|
| - elif matched.group(7) == 'unhooked':
|
| - submatched = self._UNHOOKED_PATTERN.match(matched.group(8))
|
| - if not submatched:
|
| - submatched = self._OLD_UNHOOKED_PATTERN.match(matched.group(8))
|
| - else:
|
| - assert matched.group(7) in ['hooked', 'unhooked']
|
| -
|
| - submatched_dict = submatched.groupdict()
|
| - region_info = { 'vma': current_vma }
|
| - if submatched_dict.get('TYPE'):
|
| - region_info['type'] = submatched_dict['TYPE'].strip()
|
| - if submatched_dict.get('COMMITTED'):
|
| - region_info['committed'] = int(submatched_dict['COMMITTED'])
|
| - if submatched_dict.get('RESERVED'):
|
| - region_info['reserved'] = int(submatched_dict['RESERVED'])
|
| - if submatched_dict.get('BUCKETID'):
|
| - region_info['bucket_id'] = int(submatched_dict['BUCKETID'])
|
| -
|
| - if matched.group(1) == '(':
|
| - start = current_vma['begin']
|
| - else:
|
| - start = int(matched.group(2), 16)
|
| - if matched.group(4) == '(':
|
| - end = current_vma['end']
|
| - else:
|
| - end = int(matched.group(5), 16)
|
| -
|
| - if pageframe_list and pageframe_list[0].start_truncated:
|
| - pageframe_list[0].set_size(
|
| - pageframe_list[0].size - start % self._pagesize)
|
| - if pageframe_list and pageframe_list[-1].end_truncated:
|
| - pageframe_list[-1].set_size(
|
| - pageframe_list[-1].size - (self._pagesize - end % self._pagesize))
|
| - region_info['pageframe'] = pageframe_list
|
| - pageframe_list = []
|
| -
|
| - self._map[(start, end)] = (matched.group(7), region_info)
|
| - ln += 1
|
| -
|
| - def _extract_stacktrace_lines(self, line_number):
|
| - """Extracts the position of stacktrace lines.
|
| -
|
| - Valid stacktrace lines are stored into self._stacktrace_lines.
|
| -
|
| - Args:
|
| - line_number: A line number to start parsing in lines.
|
| -
|
| - Raises:
|
| - ParsingException for invalid dump versions.
|
| - """
|
| - if self._version in (DUMP_DEEP_5, DUMP_DEEP_6):
|
| - (line_number, _) = skip_while(
|
| - line_number, len(self._lines),
|
| - lambda n: not self._lines[n].split()[0].isdigit())
|
| - stacktrace_start = line_number
|
| - (line_number, _) = skip_while(
|
| - line_number, len(self._lines),
|
| - lambda n: self._check_stacktrace_line(self._lines[n]))
|
| - self._stacktrace_lines = self._lines[stacktrace_start:line_number]
|
| -
|
| - elif self._version in DUMP_DEEP_OBSOLETE:
|
| - raise ObsoleteDumpVersionException(self._version)
|
| -
|
| - else:
|
| - raise InvalidDumpException('Invalid version: %s' % self._version)
|
| -
|
| - @staticmethod
|
| - def _check_stacktrace_line(stacktrace_line):
|
| - """Checks if a given stacktrace_line is valid as stacktrace.
|
| -
|
| - Args:
|
| - stacktrace_line: A string to be checked.
|
| -
|
| - Returns:
|
| - True if the given stacktrace_line is valid.
|
| - """
|
| - words = stacktrace_line.split()
|
| - if len(words) < BUCKET_ID + 1:
|
| - return False
|
| - if words[BUCKET_ID - 1] != '@':
|
| - return False
|
| - return True
|
| -
|
|
|
| class DumpList(object):
|
| """Represents a sequence of heap profile dumps.
|
| @@ -432,59 +103,3 @@ class DumpList(object):
|
|
|
| def __getitem__(self, index):
|
| return Dump.load(self._dump_path_list[index])
|
| -
|
| -
|
| -class ProcMapsEntryAttribute(ExclusiveRangeDict.RangeAttribute):
|
| - """Represents an entry of /proc/maps in range_dict.ExclusiveRangeDict."""
|
| - _DUMMY_ENTRY = procfs.ProcMapsEntry(
|
| - 0, # begin
|
| - 0, # end
|
| - '-', # readable
|
| - '-', # writable
|
| - '-', # executable
|
| - '-', # private
|
| - 0, # offset
|
| - '00', # major
|
| - '00', # minor
|
| - 0, # inode
|
| - '' # name
|
| - )
|
| -
|
| - def __init__(self):
|
| - super(ProcMapsEntryAttribute, self).__init__()
|
| - self._entry = self._DUMMY_ENTRY.as_dict()
|
| -
|
| - def __str__(self):
|
| - return str(self._entry)
|
| -
|
| - def __repr__(self):
|
| - return 'ProcMapsEntryAttribute' + str(self._entry)
|
| -
|
| - def __getitem__(self, key):
|
| - return self._entry[key]
|
| -
|
| - def __setitem__(self, key, value):
|
| - if key not in self._entry:
|
| - raise KeyError(key)
|
| - self._entry[key] = value
|
| -
|
| - def copy(self):
|
| - new_entry = ProcMapsEntryAttribute()
|
| - for key, value in self._entry.iteritems():
|
| - new_entry[key] = copy.deepcopy(value)
|
| - return new_entry
|
| -
|
| -
|
| -def skip_while(index, max_index, skipping_condition):
|
| - """Increments |index| until |skipping_condition|(|index|) is False.
|
| -
|
| - Returns:
|
| - A pair of an integer indicating a line number after skipped, and a
|
| - boolean value which is True if found a line which skipping_condition
|
| - is False for.
|
| - """
|
| - while skipping_condition(index):
|
| - index += 1
|
| - if index >= max_index:
|
| - return index, False
|
| - return index, True
|
|
|