Index: tools/deep_memory_profiler/lib/dump.py |
diff --git a/tools/deep_memory_profiler/lib/dump.py b/tools/deep_memory_profiler/lib/dump.py |
index 1fa4fc3b698860092dd7f36d432ecd0b03bbeefb..798763a1f529691bf86a91a6a9cd3aac4ffbd1b2 100644 |
--- a/tools/deep_memory_profiler/lib/dump.py |
+++ b/tools/deep_memory_profiler/lib/dump.py |
@@ -2,138 +2,60 @@ |
# Use of this source code is governed by a BSD-style license that can be |
# found in the LICENSE file. |
-import copy |
-import datetime |
import logging |
import os |
-import re |
-import time |
- |
-from lib.exceptions import EmptyDumpException, InvalidDumpException |
-from lib.exceptions import ObsoleteDumpVersionException, ParsingException |
-from lib.pageframe import PageFrame |
-from lib.range_dict import ExclusiveRangeDict |
-from lib.symbol import procfs |
LOGGER = logging.getLogger('dmprof') |
-VIRTUAL, COMMITTED, ALLOC_COUNT, FREE_COUNT, _AT, BUCKET_ID = range(6) |
- |
- |
-# Heap Profile Dump versions |
- |
-# DUMP_DEEP_[1-4] are obsolete. |
-# DUMP_DEEP_2+ distinct mmap regions and malloc chunks. |
-# DUMP_DEEP_3+ don't include allocation functions in their stack dumps. |
-# DUMP_DEEP_4+ support comments with '#' and global stats "nonprofiled-*". |
-# DUMP_DEEP_[1-2] should be processed by POLICY_DEEP_1. |
-# DUMP_DEEP_[3-4] should be processed by POLICY_DEEP_2 or POLICY_DEEP_3. |
-DUMP_DEEP_1 = 'DUMP_DEEP_1' |
-DUMP_DEEP_2 = 'DUMP_DEEP_2' |
-DUMP_DEEP_3 = 'DUMP_DEEP_3' |
-DUMP_DEEP_4 = 'DUMP_DEEP_4' |
- |
-DUMP_DEEP_OBSOLETE = (DUMP_DEEP_1, DUMP_DEEP_2, DUMP_DEEP_3, DUMP_DEEP_4) |
- |
-# DUMP_DEEP_5 doesn't separate sections for malloc and mmap. |
-# malloc and mmap are identified in bucket files. |
-# DUMP_DEEP_5 should be processed by POLICY_DEEP_4. |
-DUMP_DEEP_5 = 'DUMP_DEEP_5' |
- |
-# DUMP_DEEP_6 adds a mmap list to DUMP_DEEP_5. |
-DUMP_DEEP_6 = 'DUMP_DEEP_6' |
class Dump(object): |
"""Represents a heap profile dump.""" |
- |
- _PATH_PATTERN = re.compile(r'^(.*)\.([0-9]+)\.([0-9]+)\.heap$') |
- |
- _HOOK_PATTERN = re.compile( |
- r'^ ([ \(])([a-f0-9]+)([ \)])-([ \(])([a-f0-9]+)([ \)])\s+' |
- r'(hooked|unhooked)\s+(.+)$', re.IGNORECASE) |
- |
- _HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / ' |
- '(?P<RESERVED>[0-9]+) @ (?P<BUCKETID>[0-9]+)') |
- _UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / ' |
- '(?P<RESERVED>[0-9]+)') |
- |
- _OLD_HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) @ (?P<BUCKETID>[0-9]+)') |
- _OLD_UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) (?P<COMMITTED>[0-9]+)') |
- |
- _TIME_PATTERN_FORMAT = re.compile( |
- r'^Time: ([0-9]+/[0-9]+/[0-9]+ [0-9]+:[0-9]+:[0-9]+)(\.[0-9]+)?') |
- _TIME_PATTERN_SECONDS = re.compile(r'^Time: ([0-9]+)$') |
- |
- def __init__(self, path, modified_time): |
- self._path = path |
- matched = self._PATH_PATTERN.match(path) |
- self._pid = int(matched.group(2)) |
- self._count = int(matched.group(3)) |
- self._time = modified_time |
- self._map = {} |
- self._procmaps = ExclusiveRangeDict(ProcMapsEntryAttribute) |
- self._stacktrace_lines = [] |
- self._global_stats = {} # used only in apply_policy |
- |
- self._run_id = '' |
- self._pagesize = 4096 |
- self._pageframe_length = 0 |
- self._pageframe_encoding = '' |
- self._has_pagecount = False |
- |
- self._version = '' |
- self._lines = [] |
+ def __init__(self): |
+ pass |
@property |
def path(self): |
- return self._path |
+ raise NotImplementedError |
@property |
def count(self): |
- return self._count |
+ raise NotImplementedError |
@property |
def time(self): |
- return self._time |
+ raise NotImplementedError |
@property |
def iter_map(self): |
- for region in sorted(self._map.iteritems()): |
- yield region[0], region[1] |
+ raise NotImplementedError |
@property |
def iter_stacktrace(self): |
- for line in self._stacktrace_lines: |
- words = line.split() |
- yield (int(words[BUCKET_ID]), |
- int(words[VIRTUAL]), |
- int(words[COMMITTED]), |
- int(words[ALLOC_COUNT]), |
- int(words[FREE_COUNT])) |
+ raise NotImplementedError |
def global_stat(self, name): |
- return self._global_stats[name] |
+ raise NotImplementedError |
@property |
def run_id(self): |
- return self._run_id |
+ raise NotImplementedError |
@property |
def pagesize(self): |
- return self._pagesize |
+ raise NotImplementedError |
@property |
def pageframe_length(self): |
- return self._pageframe_length |
+ raise NotImplementedError |
@property |
def pageframe_encoding(self): |
- return self._pageframe_encoding |
+ raise NotImplementedError |
@property |
def has_pagecount(self): |
- return self._has_pagecount |
+ raise NotImplementedError |
@staticmethod |
def load(path, log_header='Loading a heap profile dump: '): |
@@ -149,263 +71,12 @@ class Dump(object): |
Raises: |
ParsingException for invalid heap profile dumps. |
""" |
- dump = Dump(path, os.stat(path).st_mtime) |
+ from lib.deep_dump import DeepDump |
+ dump = DeepDump(path, os.stat(path).st_mtime) |
with open(path, 'r') as f: |
dump.load_file(f, log_header) |
return dump |
- def load_file(self, f, log_header): |
- self._lines = [line for line in f |
- if line and not line.startswith('#')] |
- |
- try: |
- self._version, ln = self._parse_version() |
- self._parse_meta_information() |
- if self._version == DUMP_DEEP_6: |
- self._parse_mmap_list() |
- self._parse_global_stats() |
- self._extract_stacktrace_lines(ln) |
- except EmptyDumpException: |
- LOGGER.info('%s%s ...ignored an empty dump.' % (log_header, self._path)) |
- except ParsingException, e: |
- LOGGER.error('%s%s ...error %s' % (log_header, self._path, e)) |
- raise |
- else: |
- LOGGER.info('%s%s (version:%s)' % (log_header, self._path, self._version)) |
- |
- def _parse_version(self): |
- """Parses a version string in self._lines. |
- |
- Returns: |
- A pair of (a string representing a version of the stacktrace dump, |
- and an integer indicating a line number next to the version string). |
- |
- Raises: |
- ParsingException for invalid dump versions. |
- """ |
- version = '' |
- |
- # Skip until an identifiable line. |
- headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ') |
- if not self._lines: |
- raise EmptyDumpException('Empty heap dump file.') |
- (ln, found) = skip_while( |
- 0, len(self._lines), |
- lambda n: not self._lines[n].startswith(headers)) |
- if not found: |
- raise InvalidDumpException('No version header.') |
- |
- # Identify a version. |
- if self._lines[ln].startswith('heap profile: '): |
- version = self._lines[ln][13:].strip() |
- if version in (DUMP_DEEP_5, DUMP_DEEP_6): |
- (ln, _) = skip_while( |
- ln, len(self._lines), |
- lambda n: self._lines[n] != 'STACKTRACES:\n') |
- elif version in DUMP_DEEP_OBSOLETE: |
- raise ObsoleteDumpVersionException(version) |
- else: |
- raise InvalidDumpException('Invalid version: %s' % version) |
- elif self._lines[ln] == 'STACKTRACES:\n': |
- raise ObsoleteDumpVersionException(DUMP_DEEP_1) |
- elif self._lines[ln] == 'MMAP_STACKTRACES:\n': |
- raise ObsoleteDumpVersionException(DUMP_DEEP_2) |
- |
- return (version, ln) |
- |
- def _parse_global_stats(self): |
- """Parses lines in self._lines as global stats.""" |
- (ln, _) = skip_while( |
- 0, len(self._lines), |
- lambda n: self._lines[n] != 'GLOBAL_STATS:\n') |
- |
- global_stat_names = [ |
- 'total', 'absent', 'file-exec', 'file-nonexec', 'anonymous', 'stack', |
- 'other', 'nonprofiled-absent', 'nonprofiled-anonymous', |
- 'nonprofiled-file-exec', 'nonprofiled-file-nonexec', |
- 'nonprofiled-stack', 'nonprofiled-other', |
- 'profiled-mmap', 'profiled-malloc'] |
- |
- for prefix in global_stat_names: |
- (ln, _) = skip_while( |
- ln, len(self._lines), |
- lambda n: self._lines[n].split()[0] != prefix) |
- words = self._lines[ln].split() |
- self._global_stats[prefix + '_virtual'] = int(words[-2]) |
- self._global_stats[prefix + '_committed'] = int(words[-1]) |
- |
- def _parse_meta_information(self): |
- """Parses lines in self._lines for meta information.""" |
- (ln, found) = skip_while( |
- 0, len(self._lines), |
- lambda n: self._lines[n] != 'META:\n') |
- if not found: |
- return |
- ln += 1 |
- |
- while True: |
- if self._lines[ln].startswith('Time:'): |
- matched_seconds = self._TIME_PATTERN_SECONDS.match(self._lines[ln]) |
- matched_format = self._TIME_PATTERN_FORMAT.match(self._lines[ln]) |
- if matched_format: |
- self._time = time.mktime(datetime.datetime.strptime( |
- matched_format.group(1), '%Y/%m/%d %H:%M:%S').timetuple()) |
- if matched_format.group(2): |
- self._time += float(matched_format.group(2)[1:]) / 1000.0 |
- elif matched_seconds: |
- self._time = float(matched_seconds.group(1)) |
- elif self._lines[ln].startswith('Reason:'): |
- pass # Nothing to do for 'Reason:' |
- elif self._lines[ln].startswith('PageSize: '): |
- self._pagesize = int(self._lines[ln][10:]) |
- elif self._lines[ln].startswith('CommandLine:'): |
- pass |
- elif (self._lines[ln].startswith('PageFrame: ') or |
- self._lines[ln].startswith('PFN: ')): |
- if self._lines[ln].startswith('PageFrame: '): |
- words = self._lines[ln][11:].split(',') |
- else: |
- words = self._lines[ln][5:].split(',') |
- for word in words: |
- if word == '24': |
- self._pageframe_length = 24 |
- elif word == 'Base64': |
- self._pageframe_encoding = 'base64' |
- elif word == 'PageCount': |
- self._has_pagecount = True |
- elif self._lines[ln].startswith('RunID: '): |
- self._run_id = self._lines[ln][7:].strip() |
- elif (self._lines[ln].startswith('MMAP_LIST:') or |
- self._lines[ln].startswith('GLOBAL_STATS:')): |
- # Skip until "MMAP_LIST:" or "GLOBAL_STATS" is found. |
- break |
- else: |
- pass |
- ln += 1 |
- |
- def _parse_mmap_list(self): |
- """Parses lines in self._lines as a mmap list.""" |
- (ln, found) = skip_while( |
- 0, len(self._lines), |
- lambda n: self._lines[n] != 'MMAP_LIST:\n') |
- if not found: |
- return {} |
- |
- ln += 1 |
- self._map = {} |
- current_vma = {} |
- pageframe_list = [] |
- while True: |
- entry = procfs.ProcMaps.parse_line(self._lines[ln]) |
- if entry: |
- current_vma = {} |
- for _, _, attr in self._procmaps.iter_range(entry.begin, entry.end): |
- for key, value in entry.as_dict().iteritems(): |
- attr[key] = value |
- current_vma[key] = value |
- ln += 1 |
- continue |
- |
- if self._lines[ln].startswith(' PF: '): |
- for pageframe in self._lines[ln][5:].split(): |
- pageframe_list.append(PageFrame.parse(pageframe, self._pagesize)) |
- ln += 1 |
- continue |
- |
- matched = self._HOOK_PATTERN.match(self._lines[ln]) |
- if not matched: |
- break |
- # 2: starting address |
- # 5: end address |
- # 7: hooked or unhooked |
- # 8: additional information |
- if matched.group(7) == 'hooked': |
- submatched = self._HOOKED_PATTERN.match(matched.group(8)) |
- if not submatched: |
- submatched = self._OLD_HOOKED_PATTERN.match(matched.group(8)) |
- elif matched.group(7) == 'unhooked': |
- submatched = self._UNHOOKED_PATTERN.match(matched.group(8)) |
- if not submatched: |
- submatched = self._OLD_UNHOOKED_PATTERN.match(matched.group(8)) |
- else: |
- assert matched.group(7) in ['hooked', 'unhooked'] |
- |
- submatched_dict = submatched.groupdict() |
- region_info = { 'vma': current_vma } |
- if submatched_dict.get('TYPE'): |
- region_info['type'] = submatched_dict['TYPE'].strip() |
- if submatched_dict.get('COMMITTED'): |
- region_info['committed'] = int(submatched_dict['COMMITTED']) |
- if submatched_dict.get('RESERVED'): |
- region_info['reserved'] = int(submatched_dict['RESERVED']) |
- if submatched_dict.get('BUCKETID'): |
- region_info['bucket_id'] = int(submatched_dict['BUCKETID']) |
- |
- if matched.group(1) == '(': |
- start = current_vma['begin'] |
- else: |
- start = int(matched.group(2), 16) |
- if matched.group(4) == '(': |
- end = current_vma['end'] |
- else: |
- end = int(matched.group(5), 16) |
- |
- if pageframe_list and pageframe_list[0].start_truncated: |
- pageframe_list[0].set_size( |
- pageframe_list[0].size - start % self._pagesize) |
- if pageframe_list and pageframe_list[-1].end_truncated: |
- pageframe_list[-1].set_size( |
- pageframe_list[-1].size - (self._pagesize - end % self._pagesize)) |
- region_info['pageframe'] = pageframe_list |
- pageframe_list = [] |
- |
- self._map[(start, end)] = (matched.group(7), region_info) |
- ln += 1 |
- |
- def _extract_stacktrace_lines(self, line_number): |
- """Extracts the position of stacktrace lines. |
- |
- Valid stacktrace lines are stored into self._stacktrace_lines. |
- |
- Args: |
- line_number: A line number to start parsing in lines. |
- |
- Raises: |
- ParsingException for invalid dump versions. |
- """ |
- if self._version in (DUMP_DEEP_5, DUMP_DEEP_6): |
- (line_number, _) = skip_while( |
- line_number, len(self._lines), |
- lambda n: not self._lines[n].split()[0].isdigit()) |
- stacktrace_start = line_number |
- (line_number, _) = skip_while( |
- line_number, len(self._lines), |
- lambda n: self._check_stacktrace_line(self._lines[n])) |
- self._stacktrace_lines = self._lines[stacktrace_start:line_number] |
- |
- elif self._version in DUMP_DEEP_OBSOLETE: |
- raise ObsoleteDumpVersionException(self._version) |
- |
- else: |
- raise InvalidDumpException('Invalid version: %s' % self._version) |
- |
- @staticmethod |
- def _check_stacktrace_line(stacktrace_line): |
- """Checks if a given stacktrace_line is valid as stacktrace. |
- |
- Args: |
- stacktrace_line: A string to be checked. |
- |
- Returns: |
- True if the given stacktrace_line is valid. |
- """ |
- words = stacktrace_line.split() |
- if len(words) < BUCKET_ID + 1: |
- return False |
- if words[BUCKET_ID - 1] != '@': |
- return False |
- return True |
- |
class DumpList(object): |
"""Represents a sequence of heap profile dumps. |
@@ -432,59 +103,3 @@ class DumpList(object): |
def __getitem__(self, index): |
return Dump.load(self._dump_path_list[index]) |
- |
- |
-class ProcMapsEntryAttribute(ExclusiveRangeDict.RangeAttribute): |
- """Represents an entry of /proc/maps in range_dict.ExclusiveRangeDict.""" |
- _DUMMY_ENTRY = procfs.ProcMapsEntry( |
- 0, # begin |
- 0, # end |
- '-', # readable |
- '-', # writable |
- '-', # executable |
- '-', # private |
- 0, # offset |
- '00', # major |
- '00', # minor |
- 0, # inode |
- '' # name |
- ) |
- |
- def __init__(self): |
- super(ProcMapsEntryAttribute, self).__init__() |
- self._entry = self._DUMMY_ENTRY.as_dict() |
- |
- def __str__(self): |
- return str(self._entry) |
- |
- def __repr__(self): |
- return 'ProcMapsEntryAttribute' + str(self._entry) |
- |
- def __getitem__(self, key): |
- return self._entry[key] |
- |
- def __setitem__(self, key, value): |
- if key not in self._entry: |
- raise KeyError(key) |
- self._entry[key] = value |
- |
- def copy(self): |
- new_entry = ProcMapsEntryAttribute() |
- for key, value in self._entry.iteritems(): |
- new_entry[key] = copy.deepcopy(value) |
- return new_entry |
- |
- |
-def skip_while(index, max_index, skipping_condition): |
- """Increments |index| until |skipping_condition|(|index|) is False. |
- |
- Returns: |
- A pair of an integer indicating a line number after skipped, and a |
- boolean value which is True if found a line which skipping_condition |
- is False for. |
- """ |
- while skipping_condition(index): |
- index += 1 |
- if index >= max_index: |
- return index, False |
- return index, True |