tools/deep_memory_profiler/dmprof - Issue 10096009: Improve parsing error handling in dmprof.

Side by Side Diff: tools/deep_memory_profiler/dmprof

Issue 10096009: Improve parsing error handling in dmprof. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Created 8 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 #!/usr/bin/env python	1 #!/usr/bin/env python

2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.	2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.

3 # Use of this source code is governed by a BSD-style license that can be	3 # Use of this source code is governed by a BSD-style license that can be

4 # found in the LICENSE file.	4 # found in the LICENSE file.

5	5

6 """The deep heap profiler script for Chrome."""	6 """The deep heap profiler script for Chrome."""

7	7

8 from datetime import datetime	8 from datetime import datetime

9 import json	9 import json

10 import os	10 import os

(...skipping 48 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
59 # POLICY_DEEP_2 DOES include allocation_type columns.	59 # POLICY_DEEP_2 DOES include allocation_type columns.

60 # mmap regions are distincted w/ the allocation_type column.	60 # mmap regions are distincted w/ the allocation_type column.

61 POLICY_DEEP_2 = 'POLICY_DEEP_2'	61 POLICY_DEEP_2 = 'POLICY_DEEP_2'

62	62

63 # TODO(dmikurube): Avoid global variables.	63 # TODO(dmikurube): Avoid global variables.

64 address_symbol_dict = {}	64 address_symbol_dict = {}

65 appeared_addresses = set()	65 appeared_addresses = set()

66 components = []	66 components = []

67	67

68	68

	69 class ParsingException(Exception):

	70 def __init__(self, value):

	71 self.value = value

	72 def __str__(self):

	73 return repr(self.value)

	74

	75

69 class Policy(object):	76 class Policy(object):

70	77

71 def __init__(self, name, mmap, pattern):	78 def __init__(self, name, mmap, pattern):

72 self.name = name	79 self.name = name

73 self.mmap = mmap	80 self.mmap = mmap

74 self.condition = re.compile(pattern + r'\Z')	81 self.condition = re.compile(pattern + r'\Z')

75	82

76	83

77 def get_component(policy_list, bucket, mmap):	84 def get_component(policy_list, bucket, mmap):

78 """Returns a component name which a given bucket belongs to.	85 """Returns a component name which a given bucket belongs to.

(...skipping 26 matching lines...) Expand all Loading...
105 class Bucket(object):	112 class Bucket(object):

106	113

107 def __init__(self, stacktrace):	114 def __init__(self, stacktrace):

108 self.stacktrace = stacktrace	115 self.stacktrace = stacktrace

109 self.component = ''	116 self.component = ''

110	117

111	118

112 class Log(object):	119 class Log(object):

113	120

114 """A class representing one dumped log data."""	121 """A class representing one dumped log data."""

115 def __init__(self, log_path, buckets):	122 def __init__(self, log_path):

116 self.log_path = log_path	123 self.log_path = log_path

117 self.log_lines = [	124 self.log_lines = [

118 l for l in open(self.log_path, 'r') if l and not l.startswith('#')]	125 l for l in open(self.log_path, 'r') if l and not l.startswith('#')]

119 self.log_version = ''	126 self.log_version = ''

120 sys.stderr.write('parsing a log file:%s\n' % log_path)	127 sys.stderr.write('Loading a dump: %s\n' % log_path)

121 self.mmap_stacktrace_lines = []	128 self.mmap_stacktrace_lines = []

122 self.malloc_stacktrace_lines = []	129 self.malloc_stacktrace_lines = []

123 self.counters = {}	130 self.counters = {}

124 self.log_time = os.stat(self.log_path).st_mtime	131 self.log_time = os.stat(self.log_path).st_mtime

125 self.parse_log(buckets)

126	132

127 @staticmethod	133 @staticmethod

128 def dump_stacktrace_lines(stacktrace_lines, buckets):	134 def dump_stacktrace_lines(stacktrace_lines, buckets):

129 """Prints a given stacktrace.	135 """Prints a given stacktrace.

130	136

131 Args:	137 Args:

132 stacktrace_lines: A list of strings which are valid as stacktraces.	138 stacktrace_lines: A list of strings which are valid as stacktraces.

133 buckets: A dict mapping bucket ids and their corresponding Bucket	139 buckets: A dict mapping bucket ids and their corresponding Bucket

134 objects.	140 objects.

135 """	141 """

(...skipping 136 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
272 return False	278 return False

273 bucket = buckets.get(int(words[BUCKET_ID]))	279 bucket = buckets.get(int(words[BUCKET_ID]))

274 if bucket:	280 if bucket:

275 for address in bucket.stacktrace:	281 for address in bucket.stacktrace:

276 appeared_addresses.add(address)	282 appeared_addresses.add(address)

277 return True	283 return True

278	284

279 @staticmethod	285 @staticmethod

280 def skip_lines_while(line_number, max_line_number, skipping_condition):	286 def skip_lines_while(line_number, max_line_number, skipping_condition):

281 """Increments line_number until skipping_condition(line_number) is false.	287 """Increments line_number until skipping_condition(line_number) is false.

	288

	289 Returns:

	290 A pair of an integer indicating a line number after skipped, and a

	291 boolean value which is True if found a line which skipping_condition

	292 is False for.

282 """	293 """

283 while skipping_condition(line_number):	294 while skipping_condition(line_number):

284 line_number += 1	295 line_number += 1

285 if line_number >= max_line_number:	296 if line_number >= max_line_number:

286 sys.stderr.write('invalid heap profile dump.')	297 return line_number, False

287 return line_number	298 return line_number, True

288 return line_number

289	299

290 def parse_stacktraces_while_valid(self, buckets, log_lines, line_number):	300 def parse_stacktraces_while_valid(self, buckets, log_lines, line_number):

291 """Parses stacktrace lines while the lines are valid.	301 """Parses stacktrace lines while the lines are valid.

292	302

293 Args:	303 Args:

294 buckets: A dict mapping bucket ids and their corresponding Bucket	304 buckets: A dict mapping bucket ids and their corresponding Bucket

295 objects.	305 objects.

296 log_lines: A list of lines to be parsed.	306 log_lines: A list of lines to be parsed.

297 line_number: An integer representing the starting line number in	307 line_number: An integer representing the starting line number in

298 log_lines.	308 log_lines.

299	309

300 Returns:	310 Returns:

301 A pair of a list of valid lines and an integer representing the last	311 A pair of a list of valid lines and an integer representing the last

302 line number in log_lines.	312 line number in log_lines.

303 """	313 """

304 line_number = self.skip_lines_while(	314 (line_number, found) = self.skip_lines_while(
	M-A Ruel 2012/04/16 12:20:55 since found is not used, you can either: line_numb since found is not used, you can either: line_number = self.skip_lines_while(...)[0] or (lin_number, _) = self.skip_lines_while(...) Dai Mikurube (NOT FULLTIME) 2012/04/17 03:26:32 Done. Show quoted text On 2012/04/16 12:20:55, Marc-Antoine Ruel wrote: > since found is not used, you can either: > line_number = self.skip_lines_while(...)[0] > or > (lin_number, _) = self.skip_lines_while(...) Done.
305 line_number, len(log_lines),	315 line_number, len(log_lines),

306 lambda n: not log_lines[n].split()[0].isdigit())	316 lambda n: not log_lines[n].split()[0].isdigit())

307 stacktrace_lines_start = line_number	317 stacktrace_lines_start = line_number

308 line_number = self.skip_lines_while(	318 (line_number, found) = self.skip_lines_while(

309 line_number, len(log_lines),	319 line_number, len(log_lines),

310 lambda n: self.check_stacktrace_line(log_lines[n], buckets))	320 lambda n: self.check_stacktrace_line(log_lines[n], buckets))

311 return (log_lines[stacktrace_lines_start:line_number], line_number)	321 return (log_lines[stacktrace_lines_start:line_number], line_number)

312	322

313 def parse_stacktraces(self, buckets, line_number):	323 def parse_stacktraces(self, buckets, line_number):

314 """Parses lines in self.log_lines as stacktrace.	324 """Parses lines in self.log_lines as stacktrace.

315	325

316 Valid stacktrace lines are stored into self.mmap_stacktrace_lines and	326 Valid stacktrace lines are stored into self.mmap_stacktrace_lines and

317 self.malloc_stacktrace_lines.	327 self.malloc_stacktrace_lines.

318	328

319 Args:	329 Args:

320 buckets: A dict mapping bucket ids and their corresponding Bucket	330 buckets: A dict mapping bucket ids and their corresponding Bucket

321 objects.	331 objects.

322 line_number: An integer representing the starting line number in	332 line_number: An integer representing the starting line number in

323 log_lines.	333 log_lines.

324	334

325 Raises:	335 Raises:

326 RuntimeException for invalid dump versions.	336 ParsingException for invalid dump versions.

327 """	337 """

328 sys.stderr.write(' heap profile dump version: %s\n' % self.log_version)	338 sys.stderr.write(' Version: %s\n' % self.log_version)

329	339

330 if self.log_version in (DUMP_DEEP_3, DUMP_DEEP_4):	340 if self.log_version in (DUMP_DEEP_3, DUMP_DEEP_4):

331 (self.mmap_stacktrace_lines, line_number) = (	341 (self.mmap_stacktrace_lines, line_number) = (

332 self.parse_stacktraces_while_valid(	342 self.parse_stacktraces_while_valid(

333 buckets, self.log_lines, line_number))	343 buckets, self.log_lines, line_number))

334 line_number = self.skip_lines_while(	344 (line_number, found) = self.skip_lines_while(

335 line_number, len(self.log_lines),	345 line_number, len(self.log_lines),

336 lambda n: self.log_lines[n] != 'MALLOC_STACKTRACES:\n')	346 lambda n: self.log_lines[n] != 'MALLOC_STACKTRACES:\n')

337 (self.malloc_stacktrace_lines, line_number) = (	347 (self.malloc_stacktrace_lines, line_number) = (

338 self.parse_stacktraces_while_valid(	348 self.parse_stacktraces_while_valid(

339 buckets, self.log_lines, line_number))	349 buckets, self.log_lines, line_number))

340	350

341 elif self.log_version == DUMP_DEEP_2:	351 elif self.log_version == DUMP_DEEP_2:

342 (self.mmap_stacktrace_lines, line_number) = (	352 (self.mmap_stacktrace_lines, line_number) = (

343 self.parse_stacktraces_while_valid(	353 self.parse_stacktraces_while_valid(

344 buckets, self.log_lines, line_number))	354 buckets, self.log_lines, line_number))

345 line_number = self.skip_lines_while(	355 (line_number, found) = self.skip_lines_while(

346 line_number, len(self.log_lines),	356 line_number, len(self.log_lines),

347 lambda n: self.log_lines[n] != 'MALLOC_STACKTRACES:\n')	357 lambda n: self.log_lines[n] != 'MALLOC_STACKTRACES:\n')

348 (self.malloc_stacktrace_lines, line_number) = (	358 (self.malloc_stacktrace_lines, line_number) = (

349 self.parse_stacktraces_while_valid(	359 self.parse_stacktraces_while_valid(

350 buckets, self.log_lines, line_number))	360 buckets, self.log_lines, line_number))

351 self.malloc_stacktrace_lines.extend(self.mmap_stacktrace_lines)	361 self.malloc_stacktrace_lines.extend(self.mmap_stacktrace_lines)

352 self.mmap_stacktrace_lines = []	362 self.mmap_stacktrace_lines = []

353	363

354 elif self.log_version == DUMP_DEEP_1:	364 elif self.log_version == DUMP_DEEP_1:

355 (self.malloc_stacktrace_lines, line_number) = (	365 (self.malloc_stacktrace_lines, line_number) = (

356 self.parse_stacktraces_while_valid(	366 self.parse_stacktraces_while_valid(

357 buckets, self.log_lines, line_number))	367 buckets, self.log_lines, line_number))

358	368

359 else:	369 else:

360 raise RuntimeError('invalid heap profile dump version: %s' % (	370 raise ParsingException('invalid heap profile dump version: %s' % (

361 self.log_version))	371 self.log_version))

362	372

363 def parse_global_stats(self):	373 def parse_global_stats(self):

364 """Parses lines in self.log_lines as global stats."""	374 """Parses lines in self.log_lines as global stats."""

365 ln = self.skip_lines_while(	375 (ln, found) = self.skip_lines_while(

366 0, len(self.log_lines),	376 0, len(self.log_lines),

367 lambda n: self.log_lines[n] != 'GLOBAL_STATS:\n')	377 lambda n: self.log_lines[n] != 'GLOBAL_STATS:\n')

368	378

369 if self.log_version == DUMP_DEEP_4:	379 if self.log_version == DUMP_DEEP_4:

370 global_stat_names = [	380 global_stat_names = [

371 'total', 'file-exec', 'file-nonexec', 'anonymous', 'stack', 'other',	381 'total', 'file-exec', 'file-nonexec', 'anonymous', 'stack', 'other',

372 'nonprofiled-absent', 'nonprofiled-anonymous',	382 'nonprofiled-absent', 'nonprofiled-anonymous',

373 'nonprofiled-file-exec', 'nonprofiled-file-nonexec',	383 'nonprofiled-file-exec', 'nonprofiled-file-nonexec',

374 'nonprofiled-stack', 'nonprofiled-other',	384 'nonprofiled-stack', 'nonprofiled-other',

375 'profiled-mmap', 'profiled-malloc']	385 'profiled-mmap', 'profiled-malloc']

376 else:	386 else:

377 global_stat_names = [	387 global_stat_names = [

378 'total', 'file', 'anonymous', 'other', 'mmap', 'tcmalloc']	388 'total', 'file', 'anonymous', 'other', 'mmap', 'tcmalloc']

379	389

380 for prefix in global_stat_names:	390 for prefix in global_stat_names:

381 ln = self.skip_lines_while(	391 (ln, found) = self.skip_lines_while(

382 ln, len(self.log_lines),	392 ln, len(self.log_lines),

383 lambda n: self.log_lines[n].split()[0] != prefix)	393 lambda n: self.log_lines[n].split()[0] != prefix)

384 words = self.log_lines[ln].split()	394 words = self.log_lines[ln].split()

385 self.counters[prefix + '_virtual'] = int(words[-2])	395 self.counters[prefix + '_virtual'] = int(words[-2])

386 self.counters[prefix + '_committed'] = int(words[-1])	396 self.counters[prefix + '_committed'] = int(words[-1])

387	397

388 def parse_version(self):	398 def parse_version(self):

389 """Parses a version string in self.log_lines.	399 """Parses a version string in self.log_lines.

390	400

391 Returns:	401 Returns:

392 A pair of (a string representing a version of the stacktrace dump,	402 A pair of (a string representing a version of the stacktrace dump,

393 and an integer indicating a line number next to the version string).	403 and an integer indicating a line number next to the version string).

394	404

395 Raises:	405 Raises:

396 RuntimeException for invalid dump versions.	406 ParsingException for invalid dump versions.

397 """	407 """

398 version = ''	408 version = ''

399	409

400 # Skip until an identifiable line.	410 # Skip until an identifiable line.

401 headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ')	411 headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ')

402 ln = self.skip_lines_while(	412 if len(self.log_lines) <= 0:
	M-A Ruel 2012/04/16 12:20:55 len() < 0? Just use: if not self.log_lines: len() < 0? Just use: if not self.log_lines: Dai Mikurube (NOT FULLTIME) 2012/04/17 03:26:32 Done. Show quoted text On 2012/04/16 12:20:55, Marc-Antoine Ruel wrote: > len() < 0? > Just use: > if not self.log_lines: Done.
	413 raise ParsingException('Empty heap dump file.')

	414 (ln, found) = self.skip_lines_while(

403 0, len(self.log_lines),	415 0, len(self.log_lines),

404 lambda n: not self.log_lines[n].startswith(headers))	416 lambda n: not self.log_lines[n].startswith(headers))

	417 if not found:

	418 raise ParsingException('Invalid heap dump file (no version header).')

405	419

406 # Identify a version.	420 # Identify a version.

407 if self.log_lines[ln].startswith('heap profile: '):	421 if self.log_lines[ln].startswith('heap profile: '):

408 version = self.log_lines[ln][13:].strip()	422 version = self.log_lines[ln][13:].strip()

409 if (version == DUMP_DEEP_2 or version == DUMP_DEEP_3 or	423 if (version == DUMP_DEEP_2 or version == DUMP_DEEP_3 or

410 version == DUMP_DEEP_4):	424 version == DUMP_DEEP_4):

411 ln = self.skip_lines_while(	425 (ln, found) = self.skip_lines_while(

412 ln, len(self.log_lines),	426 ln, len(self.log_lines),

413 lambda n: self.log_lines[n] != 'MMAP_STACKTRACES:\n')	427 lambda n: self.log_lines[n] != 'MMAP_STACKTRACES:\n')

414 else:	428 else:

415 raise RuntimeError('invalid heap profile dump version: %s' % version)	429 raise ParsingException('invalid heap profile dump version: %s'

	430 % version)

416 elif self.log_lines[ln] == 'STACKTRACES:\n':	431 elif self.log_lines[ln] == 'STACKTRACES:\n':

417 version = DUMP_DEEP_1	432 version = DUMP_DEEP_1

418 elif self.log_lines[ln] == 'MMAP_STACKTRACES:\n':	433 elif self.log_lines[ln] == 'MMAP_STACKTRACES:\n':

419 version = DUMP_DEEP_2	434 version = DUMP_DEEP_2

420	435

421 return (version, ln)	436 return (version, ln)

422	437

423 def parse_log(self, buckets):	438 def parse_log(self, buckets):

424 self.log_version, ln = self.parse_version()	439 self.log_version, ln = self.parse_version()

425 self.parse_global_stats()	440 self.parse_global_stats()

(...skipping 313 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
739 break	754 break

740 n += 1	755 n += 1

741 continue	756 continue

742 sys.stderr.write('reading buckets from %s\n' % (buckets_path))	757 sys.stderr.write('reading buckets from %s\n' % (buckets_path))

743 with open(buckets_path, 'r') as buckets_f:	758 with open(buckets_path, 'r') as buckets_f:

744 for l in buckets_f:	759 for l in buckets_f:

745 words = l.split()	760 words = l.split()

746 buckets[int(words[0])] = Bucket(words[1:])	761 buckets[int(words[0])] = Bucket(words[1:])

747 n += 1	762 n += 1

748	763

749 sys.stderr.write('the number buckets: %d\n' % (bucket_count))

750

751 log_path_list = [log_path]	764 log_path_list = [log_path]

752	765

753 if action in ('--csv', '--json'):	766 if action in ('--csv', '--json'):

754 # search for the sequence of files	767 # search for the sequence of files

755 n = int(log_path[len(log_path) - 9 : len(log_path) - 5])	768 n = int(log_path[len(log_path) - 9 : len(log_path) - 5])

756 n += 1 # skip current file	769 n += 1 # skip current file

757 while True:	770 while True:

758 p = '%s.%04d.heap' % (prefix, n)	771 p = '%s.%04d.heap' % (prefix, n)

759 if os.path.exists(p):	772 if os.path.exists(p):

760 log_path_list.append(p)	773 log_path_list.append(p)

761 else:	774 else:

762 break	775 break

763 n += 1	776 n += 1

764	777

765 logs = [Log(path, buckets) for path in log_path_list]	778 logs = []

	779 for path in log_path_list:

	780 new_log = Log(path)

	781 sys.stderr.write('Parsing a dump: %s\n' % path)

	782 try:

	783 new_log.parse_log(buckets)
	M-A Ruel 2012/04/16 12:20:55 The functional part of me dislike the fact that a The functional part of me dislike the fact that a member function 'loads' the object and prefers something like: new_log = Log.parse_log(path, buckets) but I don't mind. Dai Mikurube (NOT FULLTIME) 2012/04/17 03:26:32 I basically agree with you. I'll do that in anoth Show quoted text On 2012/04/16 12:20:55, Marc-Antoine Ruel wrote: > The functional part of me dislike the fact that a member function 'loads' the > object and prefers something like: > new_log = Log.parse_log(path, buckets) > but I don't mind. I basically agree with you. I'll do that in another change.
	784 except ParsingException:

	785 sys.stderr.write(' Ignored an invalid dump: %s\n' % path)

	786 else:

	787 logs.append(new_log)

766	788

767 sys.stderr.write('getting symbols\n')	789 sys.stderr.write('getting symbols\n')

768 update_symbols(symbol_path, maps_lines, chrome_path)	790 update_symbols(symbol_path, maps_lines, chrome_path)

769	791

770 # TODO(dmikurube): Many modes now. Split them into separete functions.	792 # TODO(dmikurube): Many modes now. Split them into separete functions.

771 if action == '--stacktrace':	793 if action == '--stacktrace':

772 logs[0].dump_stacktrace(buckets)	794 logs[0].dump_stacktrace(buckets)

773	795

774 elif action == '--csv':	796 elif action == '--csv':

775 sys.stdout.write(','.join(components))	797 sys.stdout.write(','.join(components))

(...skipping 41 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
817	839

818 elif action == '--pprof':	840 elif action == '--pprof':

819 if len(sys.argv) > 5:	841 if len(sys.argv) > 5:

820 logs[0].dump_for_pprof(policy_list, buckets, maps_lines, sys.argv[5])	842 logs[0].dump_for_pprof(policy_list, buckets, maps_lines, sys.argv[5])

821 else:	843 else:

822 logs[0].dump_for_pprof(policy_list, buckets, maps_lines, None)	844 logs[0].dump_for_pprof(policy_list, buckets, maps_lines, None)

823	845

824	846

825 if __name__ == '__main__':	847 if __name__ == '__main__':

826 sys.exit(main())	848 sys.exit(main())

OLD	NEW

« no previous file with comments | « no previous file | no next file » | no next file with comments »