tools/deep_memory_profiler/lib/dump.py - Issue 19346002: Refactor dmprof: Split dmprof.py into modules.

Unified Diff: tools/deep_memory_profiler/lib/dump.py

Issue 19346002: Refactor dmprof: Split dmprof.py into modules. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Created 7 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« tools/deep_memory_profiler/lib/bucket.py ('K') | « tools/deep_memory_profiler/lib/bucket.py ('k') | tools/deep_memory_profiler/lib/exception.py » ('j') | tools/deep_memory_profiler/lib/ordered_dict.py » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: tools/deep_memory_profiler/lib/dump.py

diff --git a/tools/deep_memory_profiler/lib/dump.py b/tools/deep_memory_profiler/lib/dump.py

new file mode 100644

index 0000000000000000000000000000000000000000..6c058a4b7059048e2723d5130c95a78ba208b1e9

--- /dev/null

+++ b/tools/deep_memory_profiler/lib/dump.py

@@ -0,0 +1,487 @@

+# Use of this source code is governed by a BSD-style license that can be

+# found in the LICENSE file.

+import copy

+import datetime

+import logging

+import os

+import re

+import time

+from lib.bucket import BUCKET_ID

+from lib.exception import EmptyDumpException, InvalidDumpException

+from lib.exception import ObsoleteDumpVersionException, ParsingException

+from lib.pageframe import PageFrame

+from lib.range_dict import ExclusiveRangeDict

+from lib.symbol import proc_maps

+LOGGER = logging.getLogger('dmprof')

+# Heap Profile Dump versions

+# DUMP_DEEP_[1-4] are obsolete.

+# DUMP_DEEP_2+ distinct mmap regions and malloc chunks.

+# DUMP_DEEP_3+ don't include allocation functions in their stack dumps.

+# DUMP_DEEP_4+ support comments with '#' and global stats "nonprofiled-*".

+# DUMP_DEEP_[1-2] should be processed by POLICY_DEEP_1.

+# DUMP_DEEP_[3-4] should be processed by POLICY_DEEP_2 or POLICY_DEEP_3.

+DUMP_DEEP_1 = 'DUMP_DEEP_1'

+DUMP_DEEP_2 = 'DUMP_DEEP_2'

+DUMP_DEEP_3 = 'DUMP_DEEP_3'

+DUMP_DEEP_4 = 'DUMP_DEEP_4'

+DUMP_DEEP_OBSOLETE = (DUMP_DEEP_1, DUMP_DEEP_2, DUMP_DEEP_3, DUMP_DEEP_4)

+# DUMP_DEEP_5 doesn't separate sections for malloc and mmap.

+# malloc and mmap are identified in bucket files.

+# DUMP_DEEP_5 should be processed by POLICY_DEEP_4.

+DUMP_DEEP_5 = 'DUMP_DEEP_5'

+# DUMP_DEEP_6 adds a mmap list to DUMP_DEEP_5.

+DUMP_DEEP_6 = 'DUMP_DEEP_6'

+class Dump(object):

+ """Represents a heap profile dump."""

+ _PATH_PATTERN = re.compile(r'^(.*)\.([0-9]+)\.([0-9]+)\.heap$')

+ _HOOK_PATTERN = re.compile(

+ r'^ ([ $])([a-f0-9]+)([ $])-([ $])([a-f0-9]+)([ $])\s+'

+ r'(hooked|unhooked)\s+(.+)$', re.IGNORECASE)

+ _HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / '

+ '(?P<RESERVED>[0-9]+) @ (?P<BUCKETID>[0-9]+)')

+ _UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / '

+ '(?P<RESERVED>[0-9]+)')

+ _OLD_HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) @ (?P<BUCKETID>[0-9]+)')

+ _OLD_UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) (?P<COMMITTED>[0-9]+)')

+ _TIME_PATTERN_FORMAT = re.compile(

+ r'^Time: ([0-9]+/[0-9]+/[0-9]+ [0-9]+:[0-9]+:[0-9]+)(\.[0-9]+)?')

+ _TIME_PATTERN_SECONDS = re.compile(r'^Time: ([0-9]+)$')

+ def __init__(self, path, modified_time):

+ self._path = path

+ matched = self._PATH_PATTERN.match(path)

+ self._pid = int(matched.group(2))

+ self._count = int(matched.group(3))

+ self._time = modified_time

+ self._map = {}

+ self._procmaps = ExclusiveRangeDict(ProcMapsEntryAttribute)

+ self._stacktrace_lines = []

+ self._global_stats = {} # used only in apply_policy

+ self._run_id = ''

+ self._pagesize = 4096

+ self._pageframe_length = 0

+ self._pageframe_encoding = ''

+ self._has_pagecount = False

+ self._version = ''

+ self._lines = []

+ @property

+ def path(self):

+ return self._path

+ @property

+ def count(self):

+ return self._count

+ @property

+ def time(self):

+ return self._time

+ @property

+ def iter_map(self):

+ for region in sorted(self._map.iteritems()):

+ yield region[0], region[1]

+ def iter_procmaps(self):

+ for begin, end, attr in self._map.iter_range():

+ yield begin, end, attr

+ @property

+ def iter_stacktrace(self):

+ for line in self._stacktrace_lines:

+ yield line

+ def global_stat(self, name):

+ return self._global_stats[name]

+ @property

+ def run_id(self):

+ return self._run_id

+ @property

+ def pagesize(self):

+ return self._pagesize

+ @property

+ def pageframe_length(self):

+ return self._pageframe_length

+ @property

+ def pageframe_encoding(self):

+ return self._pageframe_encoding

+ @property

+ def has_pagecount(self):

+ return self._has_pagecount

+ @staticmethod

+ def load(path, log_header='Loading a heap profile dump: '):

+ """Loads a heap profile dump.

+ Args:

+ path: A file path string to load.

+ log_header: A preceding string for log messages.

+ Returns:

+ A loaded Dump object.

+ Raises:

+ ParsingException for invalid heap profile dumps.

+ """

+ dump = Dump(path, os.stat(path).st_mtime)

+ with open(path, 'r') as f:

+ dump.load_file(f, log_header)

+ return dump

+ def load_file(self, f, log_header):

+ self._lines = [line for line in f

+ if line and not line.startswith('#')]

+ try:

+ self._version, ln = self._parse_version()

+ self._parse_meta_information()

+ if self._version == DUMP_DEEP_6:

+ self._parse_mmap_list()

+ self._parse_global_stats()

+ self._extract_stacktrace_lines(ln)

+ except EmptyDumpException:

+ LOGGER.info('%s%s ...ignored an empty dump.' % (log_header, self._path))

+ except ParsingException, e:

+ LOGGER.error('%s%s ...error %s' % (log_header, self._path, e))

+ raise

+ else:

+ LOGGER.info('%s%s (version:%s)' % (log_header, self._path, self._version))

+ def _parse_version(self):

+ """Parses a version string in self._lines.

+ Returns:

+ A pair of (a string representing a version of the stacktrace dump,

+ and an integer indicating a line number next to the version string).

+ Raises:

+ ParsingException for invalid dump versions.

+ """

+ version = ''

+ # Skip until an identifiable line.

+ headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ')

+ if not self._lines:

+ raise EmptyDumpException('Empty heap dump file.')

+ (ln, found) = skip_while(

+ 0, len(self._lines),

+ lambda n: not self._lines[n].startswith(headers))

+ if not found:

+ raise InvalidDumpException('No version header.')

+ # Identify a version.

+ if self._lines[ln].startswith('heap profile: '):

+ version = self._lines[ln][13:].strip()

+ if version in (DUMP_DEEP_5, DUMP_DEEP_6):

+ (ln, _) = skip_while(

+ ln, len(self._lines),

+ lambda n: self._lines[n] != 'STACKTRACES:\n')

+ elif version in DUMP_DEEP_OBSOLETE:

+ raise ObsoleteDumpVersionException(version)

+ else:

+ raise InvalidDumpException('Invalid version: %s' % version)

+ elif self._lines[ln] == 'STACKTRACES:\n':

+ raise ObsoleteDumpVersionException(DUMP_DEEP_1)

+ elif self._lines[ln] == 'MMAP_STACKTRACES:\n':

+ raise ObsoleteDumpVersionException(DUMP_DEEP_2)

+ return (version, ln)

+ def _parse_global_stats(self):

+ """Parses lines in self._lines as global stats."""

+ (ln, _) = skip_while(

+ 0, len(self._lines),

+ lambda n: self._lines[n] != 'GLOBAL_STATS:\n')

+ global_stat_names = [

+ 'total', 'absent', 'file-exec', 'file-nonexec', 'anonymous', 'stack',

+ 'other', 'nonprofiled-absent', 'nonprofiled-anonymous',

+ 'nonprofiled-file-exec', 'nonprofiled-file-nonexec',

+ 'nonprofiled-stack', 'nonprofiled-other',

+ 'profiled-mmap', 'profiled-malloc']

+ for prefix in global_stat_names:

+ (ln, _) = skip_while(

+ ln, len(self._lines),

+ lambda n: self._lines[n].split()[0] != prefix)

+ words = self._lines[ln].split()

+ self._global_stats[prefix + '_virtual'] = int(words[-2])

+ self._global_stats[prefix + '_committed'] = int(words[-1])

+ def _parse_meta_information(self):

+ """Parses lines in self._lines for meta information."""

+ (ln, found) = skip_while(

+ 0, len(self._lines),

+ lambda n: self._lines[n] != 'META:\n')

+ if not found:

+ return

+ ln += 1

+ while True:

+ if self._lines[ln].startswith('Time:'):

+ matched_seconds = self._TIME_PATTERN_SECONDS.match(self._lines[ln])

+ matched_format = self._TIME_PATTERN_FORMAT.match(self._lines[ln])

+ if matched_format:

+ self._time = time.mktime(datetime.datetime.strptime(

+ matched_format.group(1), '%Y/%m/%d %H:%M:%S').timetuple())

+ if matched_format.group(2):

+ self._time += float(matched_format.group(2)[1:]) / 1000.0

+ elif matched_seconds:

+ self._time = float(matched_seconds.group(1))

+ elif self._lines[ln].startswith('Reason:'):

+ pass # Nothing to do for 'Reason:'

+ elif self._lines[ln].startswith('PageSize: '):

+ self._pagesize = int(self._lines[ln][10:])

+ elif self._lines[ln].startswith('CommandLine:'):

+ pass

+ elif (self._lines[ln].startswith('PageFrame: ') or

+ self._lines[ln].startswith('PFN: ')):

+ if self._lines[ln].startswith('PageFrame: '):

+ words = self._lines[ln][11:].split(',')

+ else:

+ words = self._lines[ln][5:].split(',')

+ for word in words:

+ if word == '24':

+ self._pageframe_length = 24

+ elif word == 'Base64':

+ self._pageframe_encoding = 'base64'

+ elif word == 'PageCount':

+ self._has_pagecount = True

+ elif self._lines[ln].startswith('RunID: '):

+ self._run_id = self._lines[ln][7:].strip()

+ elif (self._lines[ln].startswith('MMAP_LIST:') or

+ self._lines[ln].startswith('GLOBAL_STATS:')):

+ # Skip until "MMAP_LIST:" or "GLOBAL_STATS" is found.

+ break

+ else:

+ pass

+ ln += 1

+ def _parse_mmap_list(self):

+ """Parses lines in self._lines as a mmap list."""

+ (ln, found) = skip_while(

+ 0, len(self._lines),

+ lambda n: self._lines[n] != 'MMAP_LIST:\n')

+ if not found:

+ return {}

+ ln += 1

+ self._map = {}

+ current_vma = {}

+ pageframe_list = []

+ while True:

+ entry = proc_maps.ProcMaps.parse_line(self._lines[ln])

+ if entry:

+ current_vma = {}

+ for _, _, attr in self._procmaps.iter_range(entry.begin, entry.end):

+ for key, value in entry.as_dict().iteritems():

+ attr[key] = value

+ current_vma[key] = value

+ ln += 1

+ continue

+ if self._lines[ln].startswith(' PF: '):

+ for pageframe in self._lines[ln][5:].split():

+ pageframe_list.append(PageFrame.parse(pageframe, self._pagesize))

+ ln += 1

+ continue

+ matched = self._HOOK_PATTERN.match(self._lines[ln])

+ if not matched:

+ break

+ # 2: starting address

+ # 5: end address

+ # 7: hooked or unhooked

+ # 8: additional information

+ if matched.group(7) == 'hooked':

+ submatched = self._HOOKED_PATTERN.match(matched.group(8))

+ if not submatched:

+ submatched = self._OLD_HOOKED_PATTERN.match(matched.group(8))

+ elif matched.group(7) == 'unhooked':

+ submatched = self._UNHOOKED_PATTERN.match(matched.group(8))

+ if not submatched:

+ submatched = self._OLD_UNHOOKED_PATTERN.match(matched.group(8))

+ else:

+ assert matched.group(7) in ['hooked', 'unhooked']

+ submatched_dict = submatched.groupdict()

+ region_info = { 'vma': current_vma }

+ if submatched_dict.get('TYPE'):

+ region_info['type'] = submatched_dict['TYPE'].strip()

+ if submatched_dict.get('COMMITTED'):

+ region_info['committed'] = int(submatched_dict['COMMITTED'])

+ if submatched_dict.get('RESERVED'):

+ region_info['reserved'] = int(submatched_dict['RESERVED'])

+ if submatched_dict.get('BUCKETID'):

+ region_info['bucket_id'] = int(submatched_dict['BUCKETID'])

+ if matched.group(1) == '(':

+ start = current_vma['begin']

+ else:

+ start = int(matched.group(2), 16)

+ if matched.group(4) == '(':

+ end = current_vma['end']

+ else:

+ end = int(matched.group(5), 16)

+ if pageframe_list and pageframe_list[0].start_truncated:

+ pageframe_list[0].set_size(

+ pageframe_list[0].size - start % self._pagesize)

+ if pageframe_list and pageframe_list[-1].end_truncated:

+ pageframe_list[-1].set_size(

+ pageframe_list[-1].size - (self._pagesize - end % self._pagesize))

+ region_info['pageframe'] = pageframe_list

+ pageframe_list = []

+ self._map[(start, end)] = (matched.group(7), region_info)

+ ln += 1

+ def _extract_stacktrace_lines(self, line_number):

+ """Extracts the position of stacktrace lines.

+ Valid stacktrace lines are stored into self._stacktrace_lines.

+ Args:

+ line_number: A line number to start parsing in lines.

+ Raises:

+ ParsingException for invalid dump versions.

+ """

+ if self._version in (DUMP_DEEP_5, DUMP_DEEP_6):

+ (line_number, _) = skip_while(

+ line_number, len(self._lines),

+ lambda n: not self._lines[n].split()[0].isdigit())

+ stacktrace_start = line_number

+ (line_number, _) = skip_while(

+ line_number, len(self._lines),

+ lambda n: self._check_stacktrace_line(self._lines[n]))

+ self._stacktrace_lines = self._lines[stacktrace_start:line_number]

+ elif self._version in DUMP_DEEP_OBSOLETE:

+ raise ObsoleteDumpVersionException(self._version)

+ else:

+ raise InvalidDumpException('Invalid version: %s' % self._version)

+ @staticmethod

+ def _check_stacktrace_line(stacktrace_line):

+ """Checks if a given stacktrace_line is valid as stacktrace.

+ Args:

+ stacktrace_line: A string to be checked.

+ Returns:

+ True if the given stacktrace_line is valid.

+ """

+ words = stacktrace_line.split()

+ if len(words) < BUCKET_ID + 1:

+ return False

+ if words[BUCKET_ID - 1] != '@':

+ return False

+ return True

+class DumpList(object):

+ """Represents a sequence of heap profile dumps."""

+ def __init__(self, dump_list):

+ self._dump_list = dump_list

+ @staticmethod

+ def load(path_list):

+ LOGGER.info('Loading heap dump profiles.')

+ dump_list = []

+ for path in path_list:

+ dump_list.append(Dump.load(path, ' '))

+ return DumpList(dump_list)

+ def __len__(self):

+ return len(self._dump_list)

+ def __iter__(self):

+ for dump in self._dump_list:

+ yield dump

+ def __getitem__(self, index):

+ return self._dump_list[index]

+class ProcMapsEntryAttribute(ExclusiveRangeDict.RangeAttribute):

+ """Represents an entry of /proc/maps in range_dict.ExclusiveRangeDict."""

+ _DUMMY_ENTRY = proc_maps.ProcMapsEntry(

+ 0, # begin

+ 0, # end

+ '-', # readable

+ '-', # writable

+ '-', # executable

+ '-', # private

+ 0, # offset

+ '00', # major

+ '00', # minor

+ 0, # inode

+ '' # name

+ )

+ def __init__(self):

+ super(ProcMapsEntryAttribute, self).__init__()

+ self._entry = self._DUMMY_ENTRY.as_dict()

+ def __str__(self):

+ return str(self._entry)

+ def __repr__(self):

+ return 'ProcMapsEntryAttribute' + str(self._entry)

+ def __getitem__(self, key):

+ return self._entry[key]

+ def __setitem__(self, key, value):

+ if key not in self._entry:

+ raise KeyError(key)

+ self._entry[key] = value

+ def copy(self):

+ new_entry = ProcMapsEntryAttribute()

+ for key, value in self._entry.iteritems():

+ new_entry[key] = copy.deepcopy(value)

+ return new_entry

+def skip_while(index, max_index, skipping_condition):

+ Returns:

+ A pair of an integer indicating a line number after skipped, and a

+ boolean value which is True if found a line which skipping_condition

+ is False for.

+ """

+ while skipping_condition(index):

+ index += 1

+ if index >= max_index:

+ return index, False

+ return index, True