OLD | NEW |
1 # Copyright 2013 The Chromium Authors. All rights reserved. | 1 # Copyright 2013 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 import copy | |
6 import datetime | |
7 import logging | 5 import logging |
8 import os | 6 import os |
9 import re | |
10 import time | |
11 | |
12 from lib.exceptions import EmptyDumpException, InvalidDumpException | |
13 from lib.exceptions import ObsoleteDumpVersionException, ParsingException | |
14 from lib.pageframe import PageFrame | |
15 from lib.range_dict import ExclusiveRangeDict | |
16 from lib.symbol import procfs | |
17 | 7 |
18 | 8 |
19 LOGGER = logging.getLogger('dmprof') | 9 LOGGER = logging.getLogger('dmprof') |
20 VIRTUAL, COMMITTED, ALLOC_COUNT, FREE_COUNT, _AT, BUCKET_ID = range(6) | |
21 | |
22 | |
23 # Heap Profile Dump versions | |
24 | |
25 # DUMP_DEEP_[1-4] are obsolete. | |
26 # DUMP_DEEP_2+ distinct mmap regions and malloc chunks. | |
27 # DUMP_DEEP_3+ don't include allocation functions in their stack dumps. | |
28 # DUMP_DEEP_4+ support comments with '#' and global stats "nonprofiled-*". | |
29 # DUMP_DEEP_[1-2] should be processed by POLICY_DEEP_1. | |
30 # DUMP_DEEP_[3-4] should be processed by POLICY_DEEP_2 or POLICY_DEEP_3. | |
31 DUMP_DEEP_1 = 'DUMP_DEEP_1' | |
32 DUMP_DEEP_2 = 'DUMP_DEEP_2' | |
33 DUMP_DEEP_3 = 'DUMP_DEEP_3' | |
34 DUMP_DEEP_4 = 'DUMP_DEEP_4' | |
35 | |
36 DUMP_DEEP_OBSOLETE = (DUMP_DEEP_1, DUMP_DEEP_2, DUMP_DEEP_3, DUMP_DEEP_4) | |
37 | |
38 # DUMP_DEEP_5 doesn't separate sections for malloc and mmap. | |
39 # malloc and mmap are identified in bucket files. | |
40 # DUMP_DEEP_5 should be processed by POLICY_DEEP_4. | |
41 DUMP_DEEP_5 = 'DUMP_DEEP_5' | |
42 | |
43 # DUMP_DEEP_6 adds a mmap list to DUMP_DEEP_5. | |
44 DUMP_DEEP_6 = 'DUMP_DEEP_6' | |
45 | 10 |
46 | 11 |
47 class Dump(object): | 12 class Dump(object): |
48 """Represents a heap profile dump.""" | 13 """Represents a heap profile dump.""" |
49 | 14 def __init__(self): |
50 _PATH_PATTERN = re.compile(r'^(.*)\.([0-9]+)\.([0-9]+)\.heap$') | 15 pass |
51 | |
52 _HOOK_PATTERN = re.compile( | |
53 r'^ ([ \(])([a-f0-9]+)([ \)])-([ \(])([a-f0-9]+)([ \)])\s+' | |
54 r'(hooked|unhooked)\s+(.+)$', re.IGNORECASE) | |
55 | |
56 _HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / ' | |
57 '(?P<RESERVED>[0-9]+) @ (?P<BUCKETID>[0-9]+)') | |
58 _UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / ' | |
59 '(?P<RESERVED>[0-9]+)') | |
60 | |
61 _OLD_HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) @ (?P<BUCKETID>[0-9]+)') | |
62 _OLD_UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) (?P<COMMITTED>[0-9]+)') | |
63 | |
64 _TIME_PATTERN_FORMAT = re.compile( | |
65 r'^Time: ([0-9]+/[0-9]+/[0-9]+ [0-9]+:[0-9]+:[0-9]+)(\.[0-9]+)?') | |
66 _TIME_PATTERN_SECONDS = re.compile(r'^Time: ([0-9]+)$') | |
67 | |
68 def __init__(self, path, modified_time): | |
69 self._path = path | |
70 matched = self._PATH_PATTERN.match(path) | |
71 self._pid = int(matched.group(2)) | |
72 self._count = int(matched.group(3)) | |
73 self._time = modified_time | |
74 self._map = {} | |
75 self._procmaps = ExclusiveRangeDict(ProcMapsEntryAttribute) | |
76 self._stacktrace_lines = [] | |
77 self._global_stats = {} # used only in apply_policy | |
78 | |
79 self._run_id = '' | |
80 self._pagesize = 4096 | |
81 self._pageframe_length = 0 | |
82 self._pageframe_encoding = '' | |
83 self._has_pagecount = False | |
84 | |
85 self._version = '' | |
86 self._lines = [] | |
87 | 16 |
88 @property | 17 @property |
89 def path(self): | 18 def path(self): |
90 return self._path | 19 raise NotImplementedError |
91 | 20 |
92 @property | 21 @property |
93 def count(self): | 22 def count(self): |
94 return self._count | 23 raise NotImplementedError |
95 | 24 |
96 @property | 25 @property |
97 def time(self): | 26 def time(self): |
98 return self._time | 27 raise NotImplementedError |
99 | 28 |
100 @property | 29 @property |
101 def iter_map(self): | 30 def iter_map(self): |
102 for region in sorted(self._map.iteritems()): | 31 raise NotImplementedError |
103 yield region[0], region[1] | |
104 | 32 |
105 @property | 33 @property |
106 def iter_stacktrace(self): | 34 def iter_stacktrace(self): |
107 for line in self._stacktrace_lines: | 35 raise NotImplementedError |
108 words = line.split() | |
109 yield (int(words[BUCKET_ID]), | |
110 int(words[VIRTUAL]), | |
111 int(words[COMMITTED]), | |
112 int(words[ALLOC_COUNT]), | |
113 int(words[FREE_COUNT])) | |
114 | 36 |
115 def global_stat(self, name): | 37 def global_stat(self, name): |
116 return self._global_stats[name] | 38 raise NotImplementedError |
117 | 39 |
118 @property | 40 @property |
119 def run_id(self): | 41 def run_id(self): |
120 return self._run_id | 42 raise NotImplementedError |
121 | 43 |
122 @property | 44 @property |
123 def pagesize(self): | 45 def pagesize(self): |
124 return self._pagesize | 46 raise NotImplementedError |
125 | 47 |
126 @property | 48 @property |
127 def pageframe_length(self): | 49 def pageframe_length(self): |
128 return self._pageframe_length | 50 raise NotImplementedError |
129 | 51 |
130 @property | 52 @property |
131 def pageframe_encoding(self): | 53 def pageframe_encoding(self): |
132 return self._pageframe_encoding | 54 raise NotImplementedError |
133 | 55 |
134 @property | 56 @property |
135 def has_pagecount(self): | 57 def has_pagecount(self): |
136 return self._has_pagecount | 58 raise NotImplementedError |
137 | 59 |
138 @staticmethod | 60 @staticmethod |
139 def load(path, log_header='Loading a heap profile dump: '): | 61 def load(path, log_header='Loading a heap profile dump: '): |
140 """Loads a heap profile dump. | 62 """Loads a heap profile dump. |
141 | 63 |
142 Args: | 64 Args: |
143 path: A file path string to load. | 65 path: A file path string to load. |
144 log_header: A preceding string for log messages. | 66 log_header: A preceding string for log messages. |
145 | 67 |
146 Returns: | 68 Returns: |
147 A loaded Dump object. | 69 A loaded Dump object. |
148 | 70 |
149 Raises: | 71 Raises: |
150 ParsingException for invalid heap profile dumps. | 72 ParsingException for invalid heap profile dumps. |
151 """ | 73 """ |
152 dump = Dump(path, os.stat(path).st_mtime) | 74 from lib.deep_dump import DeepDump |
| 75 dump = DeepDump(path, os.stat(path).st_mtime) |
153 with open(path, 'r') as f: | 76 with open(path, 'r') as f: |
154 dump.load_file(f, log_header) | 77 dump.load_file(f, log_header) |
155 return dump | 78 return dump |
156 | 79 |
157 def load_file(self, f, log_header): | |
158 self._lines = [line for line in f | |
159 if line and not line.startswith('#')] | |
160 | |
161 try: | |
162 self._version, ln = self._parse_version() | |
163 self._parse_meta_information() | |
164 if self._version == DUMP_DEEP_6: | |
165 self._parse_mmap_list() | |
166 self._parse_global_stats() | |
167 self._extract_stacktrace_lines(ln) | |
168 except EmptyDumpException: | |
169 LOGGER.info('%s%s ...ignored an empty dump.' % (log_header, self._path)) | |
170 except ParsingException, e: | |
171 LOGGER.error('%s%s ...error %s' % (log_header, self._path, e)) | |
172 raise | |
173 else: | |
174 LOGGER.info('%s%s (version:%s)' % (log_header, self._path, self._version)) | |
175 | |
176 def _parse_version(self): | |
177 """Parses a version string in self._lines. | |
178 | |
179 Returns: | |
180 A pair of (a string representing a version of the stacktrace dump, | |
181 and an integer indicating a line number next to the version string). | |
182 | |
183 Raises: | |
184 ParsingException for invalid dump versions. | |
185 """ | |
186 version = '' | |
187 | |
188 # Skip until an identifiable line. | |
189 headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ') | |
190 if not self._lines: | |
191 raise EmptyDumpException('Empty heap dump file.') | |
192 (ln, found) = skip_while( | |
193 0, len(self._lines), | |
194 lambda n: not self._lines[n].startswith(headers)) | |
195 if not found: | |
196 raise InvalidDumpException('No version header.') | |
197 | |
198 # Identify a version. | |
199 if self._lines[ln].startswith('heap profile: '): | |
200 version = self._lines[ln][13:].strip() | |
201 if version in (DUMP_DEEP_5, DUMP_DEEP_6): | |
202 (ln, _) = skip_while( | |
203 ln, len(self._lines), | |
204 lambda n: self._lines[n] != 'STACKTRACES:\n') | |
205 elif version in DUMP_DEEP_OBSOLETE: | |
206 raise ObsoleteDumpVersionException(version) | |
207 else: | |
208 raise InvalidDumpException('Invalid version: %s' % version) | |
209 elif self._lines[ln] == 'STACKTRACES:\n': | |
210 raise ObsoleteDumpVersionException(DUMP_DEEP_1) | |
211 elif self._lines[ln] == 'MMAP_STACKTRACES:\n': | |
212 raise ObsoleteDumpVersionException(DUMP_DEEP_2) | |
213 | |
214 return (version, ln) | |
215 | |
216 def _parse_global_stats(self): | |
217 """Parses lines in self._lines as global stats.""" | |
218 (ln, _) = skip_while( | |
219 0, len(self._lines), | |
220 lambda n: self._lines[n] != 'GLOBAL_STATS:\n') | |
221 | |
222 global_stat_names = [ | |
223 'total', 'absent', 'file-exec', 'file-nonexec', 'anonymous', 'stack', | |
224 'other', 'nonprofiled-absent', 'nonprofiled-anonymous', | |
225 'nonprofiled-file-exec', 'nonprofiled-file-nonexec', | |
226 'nonprofiled-stack', 'nonprofiled-other', | |
227 'profiled-mmap', 'profiled-malloc'] | |
228 | |
229 for prefix in global_stat_names: | |
230 (ln, _) = skip_while( | |
231 ln, len(self._lines), | |
232 lambda n: self._lines[n].split()[0] != prefix) | |
233 words = self._lines[ln].split() | |
234 self._global_stats[prefix + '_virtual'] = int(words[-2]) | |
235 self._global_stats[prefix + '_committed'] = int(words[-1]) | |
236 | |
237 def _parse_meta_information(self): | |
238 """Parses lines in self._lines for meta information.""" | |
239 (ln, found) = skip_while( | |
240 0, len(self._lines), | |
241 lambda n: self._lines[n] != 'META:\n') | |
242 if not found: | |
243 return | |
244 ln += 1 | |
245 | |
246 while True: | |
247 if self._lines[ln].startswith('Time:'): | |
248 matched_seconds = self._TIME_PATTERN_SECONDS.match(self._lines[ln]) | |
249 matched_format = self._TIME_PATTERN_FORMAT.match(self._lines[ln]) | |
250 if matched_format: | |
251 self._time = time.mktime(datetime.datetime.strptime( | |
252 matched_format.group(1), '%Y/%m/%d %H:%M:%S').timetuple()) | |
253 if matched_format.group(2): | |
254 self._time += float(matched_format.group(2)[1:]) / 1000.0 | |
255 elif matched_seconds: | |
256 self._time = float(matched_seconds.group(1)) | |
257 elif self._lines[ln].startswith('Reason:'): | |
258 pass # Nothing to do for 'Reason:' | |
259 elif self._lines[ln].startswith('PageSize: '): | |
260 self._pagesize = int(self._lines[ln][10:]) | |
261 elif self._lines[ln].startswith('CommandLine:'): | |
262 pass | |
263 elif (self._lines[ln].startswith('PageFrame: ') or | |
264 self._lines[ln].startswith('PFN: ')): | |
265 if self._lines[ln].startswith('PageFrame: '): | |
266 words = self._lines[ln][11:].split(',') | |
267 else: | |
268 words = self._lines[ln][5:].split(',') | |
269 for word in words: | |
270 if word == '24': | |
271 self._pageframe_length = 24 | |
272 elif word == 'Base64': | |
273 self._pageframe_encoding = 'base64' | |
274 elif word == 'PageCount': | |
275 self._has_pagecount = True | |
276 elif self._lines[ln].startswith('RunID: '): | |
277 self._run_id = self._lines[ln][7:].strip() | |
278 elif (self._lines[ln].startswith('MMAP_LIST:') or | |
279 self._lines[ln].startswith('GLOBAL_STATS:')): | |
280 # Skip until "MMAP_LIST:" or "GLOBAL_STATS" is found. | |
281 break | |
282 else: | |
283 pass | |
284 ln += 1 | |
285 | |
286 def _parse_mmap_list(self): | |
287 """Parses lines in self._lines as a mmap list.""" | |
288 (ln, found) = skip_while( | |
289 0, len(self._lines), | |
290 lambda n: self._lines[n] != 'MMAP_LIST:\n') | |
291 if not found: | |
292 return {} | |
293 | |
294 ln += 1 | |
295 self._map = {} | |
296 current_vma = {} | |
297 pageframe_list = [] | |
298 while True: | |
299 entry = procfs.ProcMaps.parse_line(self._lines[ln]) | |
300 if entry: | |
301 current_vma = {} | |
302 for _, _, attr in self._procmaps.iter_range(entry.begin, entry.end): | |
303 for key, value in entry.as_dict().iteritems(): | |
304 attr[key] = value | |
305 current_vma[key] = value | |
306 ln += 1 | |
307 continue | |
308 | |
309 if self._lines[ln].startswith(' PF: '): | |
310 for pageframe in self._lines[ln][5:].split(): | |
311 pageframe_list.append(PageFrame.parse(pageframe, self._pagesize)) | |
312 ln += 1 | |
313 continue | |
314 | |
315 matched = self._HOOK_PATTERN.match(self._lines[ln]) | |
316 if not matched: | |
317 break | |
318 # 2: starting address | |
319 # 5: end address | |
320 # 7: hooked or unhooked | |
321 # 8: additional information | |
322 if matched.group(7) == 'hooked': | |
323 submatched = self._HOOKED_PATTERN.match(matched.group(8)) | |
324 if not submatched: | |
325 submatched = self._OLD_HOOKED_PATTERN.match(matched.group(8)) | |
326 elif matched.group(7) == 'unhooked': | |
327 submatched = self._UNHOOKED_PATTERN.match(matched.group(8)) | |
328 if not submatched: | |
329 submatched = self._OLD_UNHOOKED_PATTERN.match(matched.group(8)) | |
330 else: | |
331 assert matched.group(7) in ['hooked', 'unhooked'] | |
332 | |
333 submatched_dict = submatched.groupdict() | |
334 region_info = { 'vma': current_vma } | |
335 if submatched_dict.get('TYPE'): | |
336 region_info['type'] = submatched_dict['TYPE'].strip() | |
337 if submatched_dict.get('COMMITTED'): | |
338 region_info['committed'] = int(submatched_dict['COMMITTED']) | |
339 if submatched_dict.get('RESERVED'): | |
340 region_info['reserved'] = int(submatched_dict['RESERVED']) | |
341 if submatched_dict.get('BUCKETID'): | |
342 region_info['bucket_id'] = int(submatched_dict['BUCKETID']) | |
343 | |
344 if matched.group(1) == '(': | |
345 start = current_vma['begin'] | |
346 else: | |
347 start = int(matched.group(2), 16) | |
348 if matched.group(4) == '(': | |
349 end = current_vma['end'] | |
350 else: | |
351 end = int(matched.group(5), 16) | |
352 | |
353 if pageframe_list and pageframe_list[0].start_truncated: | |
354 pageframe_list[0].set_size( | |
355 pageframe_list[0].size - start % self._pagesize) | |
356 if pageframe_list and pageframe_list[-1].end_truncated: | |
357 pageframe_list[-1].set_size( | |
358 pageframe_list[-1].size - (self._pagesize - end % self._pagesize)) | |
359 region_info['pageframe'] = pageframe_list | |
360 pageframe_list = [] | |
361 | |
362 self._map[(start, end)] = (matched.group(7), region_info) | |
363 ln += 1 | |
364 | |
365 def _extract_stacktrace_lines(self, line_number): | |
366 """Extracts the position of stacktrace lines. | |
367 | |
368 Valid stacktrace lines are stored into self._stacktrace_lines. | |
369 | |
370 Args: | |
371 line_number: A line number to start parsing in lines. | |
372 | |
373 Raises: | |
374 ParsingException for invalid dump versions. | |
375 """ | |
376 if self._version in (DUMP_DEEP_5, DUMP_DEEP_6): | |
377 (line_number, _) = skip_while( | |
378 line_number, len(self._lines), | |
379 lambda n: not self._lines[n].split()[0].isdigit()) | |
380 stacktrace_start = line_number | |
381 (line_number, _) = skip_while( | |
382 line_number, len(self._lines), | |
383 lambda n: self._check_stacktrace_line(self._lines[n])) | |
384 self._stacktrace_lines = self._lines[stacktrace_start:line_number] | |
385 | |
386 elif self._version in DUMP_DEEP_OBSOLETE: | |
387 raise ObsoleteDumpVersionException(self._version) | |
388 | |
389 else: | |
390 raise InvalidDumpException('Invalid version: %s' % self._version) | |
391 | |
392 @staticmethod | |
393 def _check_stacktrace_line(stacktrace_line): | |
394 """Checks if a given stacktrace_line is valid as stacktrace. | |
395 | |
396 Args: | |
397 stacktrace_line: A string to be checked. | |
398 | |
399 Returns: | |
400 True if the given stacktrace_line is valid. | |
401 """ | |
402 words = stacktrace_line.split() | |
403 if len(words) < BUCKET_ID + 1: | |
404 return False | |
405 if words[BUCKET_ID - 1] != '@': | |
406 return False | |
407 return True | |
408 | |
409 | 80 |
410 class DumpList(object): | 81 class DumpList(object): |
411 """Represents a sequence of heap profile dumps. | 82 """Represents a sequence of heap profile dumps. |
412 | 83 |
413 Individual dumps are loaded into memory lazily as the sequence is accessed, | 84 Individual dumps are loaded into memory lazily as the sequence is accessed, |
414 either while being iterated through or randomly accessed. Loaded dumps are | 85 either while being iterated through or randomly accessed. Loaded dumps are |
415 not cached, meaning a newly loaded Dump object is returned every time an | 86 not cached, meaning a newly loaded Dump object is returned every time an |
416 element in the list is accessed. | 87 element in the list is accessed. |
417 """ | 88 """ |
418 | 89 |
419 def __init__(self, dump_path_list): | 90 def __init__(self, dump_path_list): |
420 self._dump_path_list = dump_path_list | 91 self._dump_path_list = dump_path_list |
421 | 92 |
422 @staticmethod | 93 @staticmethod |
423 def load(path_list): | 94 def load(path_list): |
424 return DumpList(path_list) | 95 return DumpList(path_list) |
425 | 96 |
426 def __len__(self): | 97 def __len__(self): |
427 return len(self._dump_path_list) | 98 return len(self._dump_path_list) |
428 | 99 |
429 def __iter__(self): | 100 def __iter__(self): |
430 for dump in self._dump_path_list: | 101 for dump in self._dump_path_list: |
431 yield Dump.load(dump) | 102 yield Dump.load(dump) |
432 | 103 |
433 def __getitem__(self, index): | 104 def __getitem__(self, index): |
434 return Dump.load(self._dump_path_list[index]) | 105 return Dump.load(self._dump_path_list[index]) |
435 | |
436 | |
437 class ProcMapsEntryAttribute(ExclusiveRangeDict.RangeAttribute): | |
438 """Represents an entry of /proc/maps in range_dict.ExclusiveRangeDict.""" | |
439 _DUMMY_ENTRY = procfs.ProcMapsEntry( | |
440 0, # begin | |
441 0, # end | |
442 '-', # readable | |
443 '-', # writable | |
444 '-', # executable | |
445 '-', # private | |
446 0, # offset | |
447 '00', # major | |
448 '00', # minor | |
449 0, # inode | |
450 '' # name | |
451 ) | |
452 | |
453 def __init__(self): | |
454 super(ProcMapsEntryAttribute, self).__init__() | |
455 self._entry = self._DUMMY_ENTRY.as_dict() | |
456 | |
457 def __str__(self): | |
458 return str(self._entry) | |
459 | |
460 def __repr__(self): | |
461 return 'ProcMapsEntryAttribute' + str(self._entry) | |
462 | |
463 def __getitem__(self, key): | |
464 return self._entry[key] | |
465 | |
466 def __setitem__(self, key, value): | |
467 if key not in self._entry: | |
468 raise KeyError(key) | |
469 self._entry[key] = value | |
470 | |
471 def copy(self): | |
472 new_entry = ProcMapsEntryAttribute() | |
473 for key, value in self._entry.iteritems(): | |
474 new_entry[key] = copy.deepcopy(value) | |
475 return new_entry | |
476 | |
477 | |
478 def skip_while(index, max_index, skipping_condition): | |
479 """Increments |index| until |skipping_condition|(|index|) is False. | |
480 | |
481 Returns: | |
482 A pair of an integer indicating a line number after skipped, and a | |
483 boolean value which is True if found a line which skipping_condition | |
484 is False for. | |
485 """ | |
486 while skipping_condition(index): | |
487 index += 1 | |
488 if index >= max_index: | |
489 return index, False | |
490 return index, True | |
OLD | NEW |