Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(221)

Side by Side Diff: tools/deep_memory_profiler/lib/dump.py

Issue 371303002: Refactor dmprof: split lib.Dump into lib.Dump and lib.DeepDump. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « tools/deep_memory_profiler/lib/deep_dump.py ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2013 The Chromium Authors. All rights reserved. 1 # Copyright 2013 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import copy
6 import datetime
7 import logging 5 import logging
8 import os 6 import os
9 import re
10 import time
11
12 from lib.exceptions import EmptyDumpException, InvalidDumpException
13 from lib.exceptions import ObsoleteDumpVersionException, ParsingException
14 from lib.pageframe import PageFrame
15 from lib.range_dict import ExclusiveRangeDict
16 from lib.symbol import procfs
17 7
18 8
19 LOGGER = logging.getLogger('dmprof') 9 LOGGER = logging.getLogger('dmprof')
20 VIRTUAL, COMMITTED, ALLOC_COUNT, FREE_COUNT, _AT, BUCKET_ID = range(6)
21
22
23 # Heap Profile Dump versions
24
25 # DUMP_DEEP_[1-4] are obsolete.
26 # DUMP_DEEP_2+ distinct mmap regions and malloc chunks.
27 # DUMP_DEEP_3+ don't include allocation functions in their stack dumps.
28 # DUMP_DEEP_4+ support comments with '#' and global stats "nonprofiled-*".
29 # DUMP_DEEP_[1-2] should be processed by POLICY_DEEP_1.
30 # DUMP_DEEP_[3-4] should be processed by POLICY_DEEP_2 or POLICY_DEEP_3.
31 DUMP_DEEP_1 = 'DUMP_DEEP_1'
32 DUMP_DEEP_2 = 'DUMP_DEEP_2'
33 DUMP_DEEP_3 = 'DUMP_DEEP_3'
34 DUMP_DEEP_4 = 'DUMP_DEEP_4'
35
36 DUMP_DEEP_OBSOLETE = (DUMP_DEEP_1, DUMP_DEEP_2, DUMP_DEEP_3, DUMP_DEEP_4)
37
38 # DUMP_DEEP_5 doesn't separate sections for malloc and mmap.
39 # malloc and mmap are identified in bucket files.
40 # DUMP_DEEP_5 should be processed by POLICY_DEEP_4.
41 DUMP_DEEP_5 = 'DUMP_DEEP_5'
42
43 # DUMP_DEEP_6 adds a mmap list to DUMP_DEEP_5.
44 DUMP_DEEP_6 = 'DUMP_DEEP_6'
45 10
46 11
47 class Dump(object): 12 class Dump(object):
48 """Represents a heap profile dump.""" 13 """Represents a heap profile dump."""
49 14 def __init__(self):
50 _PATH_PATTERN = re.compile(r'^(.*)\.([0-9]+)\.([0-9]+)\.heap$') 15 pass
51
52 _HOOK_PATTERN = re.compile(
53 r'^ ([ \(])([a-f0-9]+)([ \)])-([ \(])([a-f0-9]+)([ \)])\s+'
54 r'(hooked|unhooked)\s+(.+)$', re.IGNORECASE)
55
56 _HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / '
57 '(?P<RESERVED>[0-9]+) @ (?P<BUCKETID>[0-9]+)')
58 _UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / '
59 '(?P<RESERVED>[0-9]+)')
60
61 _OLD_HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) @ (?P<BUCKETID>[0-9]+)')
62 _OLD_UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) (?P<COMMITTED>[0-9]+)')
63
64 _TIME_PATTERN_FORMAT = re.compile(
65 r'^Time: ([0-9]+/[0-9]+/[0-9]+ [0-9]+:[0-9]+:[0-9]+)(\.[0-9]+)?')
66 _TIME_PATTERN_SECONDS = re.compile(r'^Time: ([0-9]+)$')
67
68 def __init__(self, path, modified_time):
69 self._path = path
70 matched = self._PATH_PATTERN.match(path)
71 self._pid = int(matched.group(2))
72 self._count = int(matched.group(3))
73 self._time = modified_time
74 self._map = {}
75 self._procmaps = ExclusiveRangeDict(ProcMapsEntryAttribute)
76 self._stacktrace_lines = []
77 self._global_stats = {} # used only in apply_policy
78
79 self._run_id = ''
80 self._pagesize = 4096
81 self._pageframe_length = 0
82 self._pageframe_encoding = ''
83 self._has_pagecount = False
84
85 self._version = ''
86 self._lines = []
87 16
88 @property 17 @property
89 def path(self): 18 def path(self):
90 return self._path 19 raise NotImplementedError
91 20
92 @property 21 @property
93 def count(self): 22 def count(self):
94 return self._count 23 raise NotImplementedError
95 24
96 @property 25 @property
97 def time(self): 26 def time(self):
98 return self._time 27 raise NotImplementedError
99 28
100 @property 29 @property
101 def iter_map(self): 30 def iter_map(self):
102 for region in sorted(self._map.iteritems()): 31 raise NotImplementedError
103 yield region[0], region[1]
104 32
105 @property 33 @property
106 def iter_stacktrace(self): 34 def iter_stacktrace(self):
107 for line in self._stacktrace_lines: 35 raise NotImplementedError
108 words = line.split()
109 yield (int(words[BUCKET_ID]),
110 int(words[VIRTUAL]),
111 int(words[COMMITTED]),
112 int(words[ALLOC_COUNT]),
113 int(words[FREE_COUNT]))
114 36
115 def global_stat(self, name): 37 def global_stat(self, name):
116 return self._global_stats[name] 38 raise NotImplementedError
117 39
118 @property 40 @property
119 def run_id(self): 41 def run_id(self):
120 return self._run_id 42 raise NotImplementedError
121 43
122 @property 44 @property
123 def pagesize(self): 45 def pagesize(self):
124 return self._pagesize 46 raise NotImplementedError
125 47
126 @property 48 @property
127 def pageframe_length(self): 49 def pageframe_length(self):
128 return self._pageframe_length 50 raise NotImplementedError
129 51
130 @property 52 @property
131 def pageframe_encoding(self): 53 def pageframe_encoding(self):
132 return self._pageframe_encoding 54 raise NotImplementedError
133 55
134 @property 56 @property
135 def has_pagecount(self): 57 def has_pagecount(self):
136 return self._has_pagecount 58 raise NotImplementedError
137 59
138 @staticmethod 60 @staticmethod
139 def load(path, log_header='Loading a heap profile dump: '): 61 def load(path, log_header='Loading a heap profile dump: '):
140 """Loads a heap profile dump. 62 """Loads a heap profile dump.
141 63
142 Args: 64 Args:
143 path: A file path string to load. 65 path: A file path string to load.
144 log_header: A preceding string for log messages. 66 log_header: A preceding string for log messages.
145 67
146 Returns: 68 Returns:
147 A loaded Dump object. 69 A loaded Dump object.
148 70
149 Raises: 71 Raises:
150 ParsingException for invalid heap profile dumps. 72 ParsingException for invalid heap profile dumps.
151 """ 73 """
152 dump = Dump(path, os.stat(path).st_mtime) 74 from lib.deep_dump import DeepDump
75 dump = DeepDump(path, os.stat(path).st_mtime)
153 with open(path, 'r') as f: 76 with open(path, 'r') as f:
154 dump.load_file(f, log_header) 77 dump.load_file(f, log_header)
155 return dump 78 return dump
156 79
157 def load_file(self, f, log_header):
158 self._lines = [line for line in f
159 if line and not line.startswith('#')]
160
161 try:
162 self._version, ln = self._parse_version()
163 self._parse_meta_information()
164 if self._version == DUMP_DEEP_6:
165 self._parse_mmap_list()
166 self._parse_global_stats()
167 self._extract_stacktrace_lines(ln)
168 except EmptyDumpException:
169 LOGGER.info('%s%s ...ignored an empty dump.' % (log_header, self._path))
170 except ParsingException, e:
171 LOGGER.error('%s%s ...error %s' % (log_header, self._path, e))
172 raise
173 else:
174 LOGGER.info('%s%s (version:%s)' % (log_header, self._path, self._version))
175
176 def _parse_version(self):
177 """Parses a version string in self._lines.
178
179 Returns:
180 A pair of (a string representing a version of the stacktrace dump,
181 and an integer indicating a line number next to the version string).
182
183 Raises:
184 ParsingException for invalid dump versions.
185 """
186 version = ''
187
188 # Skip until an identifiable line.
189 headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ')
190 if not self._lines:
191 raise EmptyDumpException('Empty heap dump file.')
192 (ln, found) = skip_while(
193 0, len(self._lines),
194 lambda n: not self._lines[n].startswith(headers))
195 if not found:
196 raise InvalidDumpException('No version header.')
197
198 # Identify a version.
199 if self._lines[ln].startswith('heap profile: '):
200 version = self._lines[ln][13:].strip()
201 if version in (DUMP_DEEP_5, DUMP_DEEP_6):
202 (ln, _) = skip_while(
203 ln, len(self._lines),
204 lambda n: self._lines[n] != 'STACKTRACES:\n')
205 elif version in DUMP_DEEP_OBSOLETE:
206 raise ObsoleteDumpVersionException(version)
207 else:
208 raise InvalidDumpException('Invalid version: %s' % version)
209 elif self._lines[ln] == 'STACKTRACES:\n':
210 raise ObsoleteDumpVersionException(DUMP_DEEP_1)
211 elif self._lines[ln] == 'MMAP_STACKTRACES:\n':
212 raise ObsoleteDumpVersionException(DUMP_DEEP_2)
213
214 return (version, ln)
215
216 def _parse_global_stats(self):
217 """Parses lines in self._lines as global stats."""
218 (ln, _) = skip_while(
219 0, len(self._lines),
220 lambda n: self._lines[n] != 'GLOBAL_STATS:\n')
221
222 global_stat_names = [
223 'total', 'absent', 'file-exec', 'file-nonexec', 'anonymous', 'stack',
224 'other', 'nonprofiled-absent', 'nonprofiled-anonymous',
225 'nonprofiled-file-exec', 'nonprofiled-file-nonexec',
226 'nonprofiled-stack', 'nonprofiled-other',
227 'profiled-mmap', 'profiled-malloc']
228
229 for prefix in global_stat_names:
230 (ln, _) = skip_while(
231 ln, len(self._lines),
232 lambda n: self._lines[n].split()[0] != prefix)
233 words = self._lines[ln].split()
234 self._global_stats[prefix + '_virtual'] = int(words[-2])
235 self._global_stats[prefix + '_committed'] = int(words[-1])
236
237 def _parse_meta_information(self):
238 """Parses lines in self._lines for meta information."""
239 (ln, found) = skip_while(
240 0, len(self._lines),
241 lambda n: self._lines[n] != 'META:\n')
242 if not found:
243 return
244 ln += 1
245
246 while True:
247 if self._lines[ln].startswith('Time:'):
248 matched_seconds = self._TIME_PATTERN_SECONDS.match(self._lines[ln])
249 matched_format = self._TIME_PATTERN_FORMAT.match(self._lines[ln])
250 if matched_format:
251 self._time = time.mktime(datetime.datetime.strptime(
252 matched_format.group(1), '%Y/%m/%d %H:%M:%S').timetuple())
253 if matched_format.group(2):
254 self._time += float(matched_format.group(2)[1:]) / 1000.0
255 elif matched_seconds:
256 self._time = float(matched_seconds.group(1))
257 elif self._lines[ln].startswith('Reason:'):
258 pass # Nothing to do for 'Reason:'
259 elif self._lines[ln].startswith('PageSize: '):
260 self._pagesize = int(self._lines[ln][10:])
261 elif self._lines[ln].startswith('CommandLine:'):
262 pass
263 elif (self._lines[ln].startswith('PageFrame: ') or
264 self._lines[ln].startswith('PFN: ')):
265 if self._lines[ln].startswith('PageFrame: '):
266 words = self._lines[ln][11:].split(',')
267 else:
268 words = self._lines[ln][5:].split(',')
269 for word in words:
270 if word == '24':
271 self._pageframe_length = 24
272 elif word == 'Base64':
273 self._pageframe_encoding = 'base64'
274 elif word == 'PageCount':
275 self._has_pagecount = True
276 elif self._lines[ln].startswith('RunID: '):
277 self._run_id = self._lines[ln][7:].strip()
278 elif (self._lines[ln].startswith('MMAP_LIST:') or
279 self._lines[ln].startswith('GLOBAL_STATS:')):
280 # Skip until "MMAP_LIST:" or "GLOBAL_STATS" is found.
281 break
282 else:
283 pass
284 ln += 1
285
286 def _parse_mmap_list(self):
287 """Parses lines in self._lines as a mmap list."""
288 (ln, found) = skip_while(
289 0, len(self._lines),
290 lambda n: self._lines[n] != 'MMAP_LIST:\n')
291 if not found:
292 return {}
293
294 ln += 1
295 self._map = {}
296 current_vma = {}
297 pageframe_list = []
298 while True:
299 entry = procfs.ProcMaps.parse_line(self._lines[ln])
300 if entry:
301 current_vma = {}
302 for _, _, attr in self._procmaps.iter_range(entry.begin, entry.end):
303 for key, value in entry.as_dict().iteritems():
304 attr[key] = value
305 current_vma[key] = value
306 ln += 1
307 continue
308
309 if self._lines[ln].startswith(' PF: '):
310 for pageframe in self._lines[ln][5:].split():
311 pageframe_list.append(PageFrame.parse(pageframe, self._pagesize))
312 ln += 1
313 continue
314
315 matched = self._HOOK_PATTERN.match(self._lines[ln])
316 if not matched:
317 break
318 # 2: starting address
319 # 5: end address
320 # 7: hooked or unhooked
321 # 8: additional information
322 if matched.group(7) == 'hooked':
323 submatched = self._HOOKED_PATTERN.match(matched.group(8))
324 if not submatched:
325 submatched = self._OLD_HOOKED_PATTERN.match(matched.group(8))
326 elif matched.group(7) == 'unhooked':
327 submatched = self._UNHOOKED_PATTERN.match(matched.group(8))
328 if not submatched:
329 submatched = self._OLD_UNHOOKED_PATTERN.match(matched.group(8))
330 else:
331 assert matched.group(7) in ['hooked', 'unhooked']
332
333 submatched_dict = submatched.groupdict()
334 region_info = { 'vma': current_vma }
335 if submatched_dict.get('TYPE'):
336 region_info['type'] = submatched_dict['TYPE'].strip()
337 if submatched_dict.get('COMMITTED'):
338 region_info['committed'] = int(submatched_dict['COMMITTED'])
339 if submatched_dict.get('RESERVED'):
340 region_info['reserved'] = int(submatched_dict['RESERVED'])
341 if submatched_dict.get('BUCKETID'):
342 region_info['bucket_id'] = int(submatched_dict['BUCKETID'])
343
344 if matched.group(1) == '(':
345 start = current_vma['begin']
346 else:
347 start = int(matched.group(2), 16)
348 if matched.group(4) == '(':
349 end = current_vma['end']
350 else:
351 end = int(matched.group(5), 16)
352
353 if pageframe_list and pageframe_list[0].start_truncated:
354 pageframe_list[0].set_size(
355 pageframe_list[0].size - start % self._pagesize)
356 if pageframe_list and pageframe_list[-1].end_truncated:
357 pageframe_list[-1].set_size(
358 pageframe_list[-1].size - (self._pagesize - end % self._pagesize))
359 region_info['pageframe'] = pageframe_list
360 pageframe_list = []
361
362 self._map[(start, end)] = (matched.group(7), region_info)
363 ln += 1
364
365 def _extract_stacktrace_lines(self, line_number):
366 """Extracts the position of stacktrace lines.
367
368 Valid stacktrace lines are stored into self._stacktrace_lines.
369
370 Args:
371 line_number: A line number to start parsing in lines.
372
373 Raises:
374 ParsingException for invalid dump versions.
375 """
376 if self._version in (DUMP_DEEP_5, DUMP_DEEP_6):
377 (line_number, _) = skip_while(
378 line_number, len(self._lines),
379 lambda n: not self._lines[n].split()[0].isdigit())
380 stacktrace_start = line_number
381 (line_number, _) = skip_while(
382 line_number, len(self._lines),
383 lambda n: self._check_stacktrace_line(self._lines[n]))
384 self._stacktrace_lines = self._lines[stacktrace_start:line_number]
385
386 elif self._version in DUMP_DEEP_OBSOLETE:
387 raise ObsoleteDumpVersionException(self._version)
388
389 else:
390 raise InvalidDumpException('Invalid version: %s' % self._version)
391
392 @staticmethod
393 def _check_stacktrace_line(stacktrace_line):
394 """Checks if a given stacktrace_line is valid as stacktrace.
395
396 Args:
397 stacktrace_line: A string to be checked.
398
399 Returns:
400 True if the given stacktrace_line is valid.
401 """
402 words = stacktrace_line.split()
403 if len(words) < BUCKET_ID + 1:
404 return False
405 if words[BUCKET_ID - 1] != '@':
406 return False
407 return True
408
409 80
410 class DumpList(object): 81 class DumpList(object):
411 """Represents a sequence of heap profile dumps. 82 """Represents a sequence of heap profile dumps.
412 83
413 Individual dumps are loaded into memory lazily as the sequence is accessed, 84 Individual dumps are loaded into memory lazily as the sequence is accessed,
414 either while being iterated through or randomly accessed. Loaded dumps are 85 either while being iterated through or randomly accessed. Loaded dumps are
415 not cached, meaning a newly loaded Dump object is returned every time an 86 not cached, meaning a newly loaded Dump object is returned every time an
416 element in the list is accessed. 87 element in the list is accessed.
417 """ 88 """
418 89
419 def __init__(self, dump_path_list): 90 def __init__(self, dump_path_list):
420 self._dump_path_list = dump_path_list 91 self._dump_path_list = dump_path_list
421 92
422 @staticmethod 93 @staticmethod
423 def load(path_list): 94 def load(path_list):
424 return DumpList(path_list) 95 return DumpList(path_list)
425 96
426 def __len__(self): 97 def __len__(self):
427 return len(self._dump_path_list) 98 return len(self._dump_path_list)
428 99
429 def __iter__(self): 100 def __iter__(self):
430 for dump in self._dump_path_list: 101 for dump in self._dump_path_list:
431 yield Dump.load(dump) 102 yield Dump.load(dump)
432 103
433 def __getitem__(self, index): 104 def __getitem__(self, index):
434 return Dump.load(self._dump_path_list[index]) 105 return Dump.load(self._dump_path_list[index])
435
436
437 class ProcMapsEntryAttribute(ExclusiveRangeDict.RangeAttribute):
438 """Represents an entry of /proc/maps in range_dict.ExclusiveRangeDict."""
439 _DUMMY_ENTRY = procfs.ProcMapsEntry(
440 0, # begin
441 0, # end
442 '-', # readable
443 '-', # writable
444 '-', # executable
445 '-', # private
446 0, # offset
447 '00', # major
448 '00', # minor
449 0, # inode
450 '' # name
451 )
452
453 def __init__(self):
454 super(ProcMapsEntryAttribute, self).__init__()
455 self._entry = self._DUMMY_ENTRY.as_dict()
456
457 def __str__(self):
458 return str(self._entry)
459
460 def __repr__(self):
461 return 'ProcMapsEntryAttribute' + str(self._entry)
462
463 def __getitem__(self, key):
464 return self._entry[key]
465
466 def __setitem__(self, key, value):
467 if key not in self._entry:
468 raise KeyError(key)
469 self._entry[key] = value
470
471 def copy(self):
472 new_entry = ProcMapsEntryAttribute()
473 for key, value in self._entry.iteritems():
474 new_entry[key] = copy.deepcopy(value)
475 return new_entry
476
477
478 def skip_while(index, max_index, skipping_condition):
479 """Increments |index| until |skipping_condition|(|index|) is False.
480
481 Returns:
482 A pair of an integer indicating a line number after skipped, and a
483 boolean value which is True if found a line which skipping_condition
484 is False for.
485 """
486 while skipping_condition(index):
487 index += 1
488 if index >= max_index:
489 return index, False
490 return index, True
OLDNEW
« no previous file with comments | « tools/deep_memory_profiler/lib/deep_dump.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698