tools/deep_memory_profiler/lib/dump.py - Issue 371303002: Refactor dmprof: split lib.Dump into lib.Dump and lib.DeepDump.

Side by Side Diff: tools/deep_memory_profiler/lib/dump.py

Issue 371303002: Refactor dmprof: split lib.Dump into lib.Dump and lib.DeepDump. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Created 6 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 # Copyright 2013 The Chromium Authors. All rights reserved.	1 # Copyright 2013 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 import copy

6 import datetime

7 import logging	5 import logging

8 import os	6 import os

9 import re

10 import time

11

12 from lib.exceptions import EmptyDumpException, InvalidDumpException

13 from lib.exceptions import ObsoleteDumpVersionException, ParsingException

14 from lib.pageframe import PageFrame

15 from lib.range_dict import ExclusiveRangeDict

16 from lib.symbol import procfs

17	7

18	8

19 LOGGER = logging.getLogger('dmprof')	9 LOGGER = logging.getLogger('dmprof')

20 VIRTUAL, COMMITTED, ALLOC_COUNT, FREE_COUNT, _AT, BUCKET_ID = range(6)

21

22

23 # Heap Profile Dump versions

24

25 # DUMP_DEEP_[1-4] are obsolete.

26 # DUMP_DEEP_2+ distinct mmap regions and malloc chunks.

27 # DUMP_DEEP_3+ don't include allocation functions in their stack dumps.

28 # DUMP_DEEP_4+ support comments with '#' and global stats "nonprofiled-*".

29 # DUMP_DEEP_[1-2] should be processed by POLICY_DEEP_1.

30 # DUMP_DEEP_[3-4] should be processed by POLICY_DEEP_2 or POLICY_DEEP_3.

31 DUMP_DEEP_1 = 'DUMP_DEEP_1'

32 DUMP_DEEP_2 = 'DUMP_DEEP_2'

33 DUMP_DEEP_3 = 'DUMP_DEEP_3'

34 DUMP_DEEP_4 = 'DUMP_DEEP_4'

35

36 DUMP_DEEP_OBSOLETE = (DUMP_DEEP_1, DUMP_DEEP_2, DUMP_DEEP_3, DUMP_DEEP_4)

37

38 # DUMP_DEEP_5 doesn't separate sections for malloc and mmap.

39 # malloc and mmap are identified in bucket files.

40 # DUMP_DEEP_5 should be processed by POLICY_DEEP_4.

41 DUMP_DEEP_5 = 'DUMP_DEEP_5'

42

43 # DUMP_DEEP_6 adds a mmap list to DUMP_DEEP_5.

44 DUMP_DEEP_6 = 'DUMP_DEEP_6'

45	10

46	11

47 class Dump(object):	12 class Dump(object):

48 """Represents a heap profile dump."""	13 """Represents a heap profile dump."""

49	14 def __init__(self):

50 _PATH_PATTERN = re.compile(r'^(.*)\.([0-9]+)\.([0-9]+)\.heap$')	15 pass

51

52 _HOOK_PATTERN = re.compile(

53 r'^ ([ $])([a-f0-9]+)([ $])-([ $])([a-f0-9]+)([ $])\s+'

54 r'(hooked\|unhooked)\s+(.+)$', re.IGNORECASE)

55

56 _HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / '

57 '(?P<RESERVED>[0-9]+) @ (?P<BUCKETID>[0-9]+)')

58 _UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / '

59 '(?P<RESERVED>[0-9]+)')

60

61 _OLD_HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) @ (?P<BUCKETID>[0-9]+)')

62 _OLD_UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) (?P<COMMITTED>[0-9]+)')

63

64 _TIME_PATTERN_FORMAT = re.compile(

65 r'^Time: ([0-9]+/[0-9]+/[0-9]+ [0-9]+:[0-9]+:[0-9]+)(\.[0-9]+)?')

66 _TIME_PATTERN_SECONDS = re.compile(r'^Time: ([0-9]+)$')

67

68 def __init__(self, path, modified_time):

69 self._path = path

70 matched = self._PATH_PATTERN.match(path)

71 self._pid = int(matched.group(2))

72 self._count = int(matched.group(3))

73 self._time = modified_time

74 self._map = {}

75 self._procmaps = ExclusiveRangeDict(ProcMapsEntryAttribute)

76 self._stacktrace_lines = []

77 self._global_stats = {} # used only in apply_policy

78

79 self._run_id = ''

80 self._pagesize = 4096

81 self._pageframe_length = 0

82 self._pageframe_encoding = ''

83 self._has_pagecount = False

84

85 self._version = ''

86 self._lines = []

87	16

88 @property	17 @property

89 def path(self):	18 def path(self):

90 return self._path	19 raise NotImplementedError

91	20

92 @property	21 @property

93 def count(self):	22 def count(self):

94 return self._count	23 raise NotImplementedError

95	24

96 @property	25 @property

97 def time(self):	26 def time(self):

98 return self._time	27 raise NotImplementedError

99	28

100 @property	29 @property

101 def iter_map(self):	30 def iter_map(self):

102 for region in sorted(self._map.iteritems()):	31 raise NotImplementedError

103 yield region[0], region[1]

104	32

105 @property	33 @property

106 def iter_stacktrace(self):	34 def iter_stacktrace(self):

107 for line in self._stacktrace_lines:	35 raise NotImplementedError

108 words = line.split()

109 yield (int(words[BUCKET_ID]),

110 int(words[VIRTUAL]),

111 int(words[COMMITTED]),

112 int(words[ALLOC_COUNT]),

113 int(words[FREE_COUNT]))

114	36

115 def global_stat(self, name):	37 def global_stat(self, name):

116 return self._global_stats[name]	38 raise NotImplementedError

117	39

118 @property	40 @property

119 def run_id(self):	41 def run_id(self):

120 return self._run_id	42 raise NotImplementedError

121	43

122 @property	44 @property

123 def pagesize(self):	45 def pagesize(self):

124 return self._pagesize	46 raise NotImplementedError

125	47

126 @property	48 @property

127 def pageframe_length(self):	49 def pageframe_length(self):

128 return self._pageframe_length	50 raise NotImplementedError

129	51

130 @property	52 @property

131 def pageframe_encoding(self):	53 def pageframe_encoding(self):

132 return self._pageframe_encoding	54 raise NotImplementedError

133	55

134 @property	56 @property

135 def has_pagecount(self):	57 def has_pagecount(self):

136 return self._has_pagecount	58 raise NotImplementedError

137	59

138 @staticmethod	60 @staticmethod

139 def load(path, log_header='Loading a heap profile dump: '):	61 def load(path, log_header='Loading a heap profile dump: '):

140 """Loads a heap profile dump.	62 """Loads a heap profile dump.

141	63

142 Args:	64 Args:

143 path: A file path string to load.	65 path: A file path string to load.

144 log_header: A preceding string for log messages.	66 log_header: A preceding string for log messages.

145	67

146 Returns:	68 Returns:

147 A loaded Dump object.	69 A loaded Dump object.

148	70

149 Raises:	71 Raises:

150 ParsingException for invalid heap profile dumps.	72 ParsingException for invalid heap profile dumps.

151 """	73 """

152 dump = Dump(path, os.stat(path).st_mtime)	74 from lib.deep_dump import DeepDump

	75 dump = DeepDump(path, os.stat(path).st_mtime)

153 with open(path, 'r') as f:	76 with open(path, 'r') as f:

154 dump.load_file(f, log_header)	77 dump.load_file(f, log_header)

155 return dump	78 return dump

156	79

157 def load_file(self, f, log_header):

158 self._lines = [line for line in f

159 if line and not line.startswith('#')]

160

161 try:

162 self._version, ln = self._parse_version()

163 self._parse_meta_information()

164 if self._version == DUMP_DEEP_6:

165 self._parse_mmap_list()

166 self._parse_global_stats()

167 self._extract_stacktrace_lines(ln)

168 except EmptyDumpException:

169 LOGGER.info('%s%s ...ignored an empty dump.' % (log_header, self._path))

170 except ParsingException, e:

171 LOGGER.error('%s%s ...error %s' % (log_header, self._path, e))

172 raise

173 else:

174 LOGGER.info('%s%s (version:%s)' % (log_header, self._path, self._version))

175

176 def _parse_version(self):

177 """Parses a version string in self._lines.

178

179 Returns:

180 A pair of (a string representing a version of the stacktrace dump,

181 and an integer indicating a line number next to the version string).

182

183 Raises:

184 ParsingException for invalid dump versions.

185 """

186 version = ''

187

188 # Skip until an identifiable line.

189 headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ')

190 if not self._lines:

191 raise EmptyDumpException('Empty heap dump file.')

192 (ln, found) = skip_while(

193 0, len(self._lines),

194 lambda n: not self._lines[n].startswith(headers))

195 if not found:

196 raise InvalidDumpException('No version header.')

197

198 # Identify a version.

199 if self._lines[ln].startswith('heap profile: '):

200 version = self._lines[ln][13:].strip()

201 if version in (DUMP_DEEP_5, DUMP_DEEP_6):

202 (ln, _) = skip_while(

203 ln, len(self._lines),

204 lambda n: self._lines[n] != 'STACKTRACES:\n')

205 elif version in DUMP_DEEP_OBSOLETE:

206 raise ObsoleteDumpVersionException(version)

207 else:

208 raise InvalidDumpException('Invalid version: %s' % version)

209 elif self._lines[ln] == 'STACKTRACES:\n':

210 raise ObsoleteDumpVersionException(DUMP_DEEP_1)

211 elif self._lines[ln] == 'MMAP_STACKTRACES:\n':

212 raise ObsoleteDumpVersionException(DUMP_DEEP_2)

213

214 return (version, ln)

215

216 def _parse_global_stats(self):

217 """Parses lines in self._lines as global stats."""

218 (ln, _) = skip_while(

219 0, len(self._lines),

220 lambda n: self._lines[n] != 'GLOBAL_STATS:\n')

221

222 global_stat_names = [

223 'total', 'absent', 'file-exec', 'file-nonexec', 'anonymous', 'stack',

224 'other', 'nonprofiled-absent', 'nonprofiled-anonymous',

225 'nonprofiled-file-exec', 'nonprofiled-file-nonexec',

226 'nonprofiled-stack', 'nonprofiled-other',

227 'profiled-mmap', 'profiled-malloc']

228

229 for prefix in global_stat_names:

230 (ln, _) = skip_while(

231 ln, len(self._lines),

232 lambda n: self._lines[n].split()[0] != prefix)

233 words = self._lines[ln].split()

234 self._global_stats[prefix + '_virtual'] = int(words[-2])

235 self._global_stats[prefix + '_committed'] = int(words[-1])

236

237 def _parse_meta_information(self):

238 """Parses lines in self._lines for meta information."""

239 (ln, found) = skip_while(

240 0, len(self._lines),

241 lambda n: self._lines[n] != 'META:\n')

242 if not found:

243 return

244 ln += 1

245

246 while True:

247 if self._lines[ln].startswith('Time:'):

248 matched_seconds = self._TIME_PATTERN_SECONDS.match(self._lines[ln])

249 matched_format = self._TIME_PATTERN_FORMAT.match(self._lines[ln])

250 if matched_format:

251 self._time = time.mktime(datetime.datetime.strptime(

252 matched_format.group(1), '%Y/%m/%d %H:%M:%S').timetuple())

253 if matched_format.group(2):

254 self._time += float(matched_format.group(2)[1:]) / 1000.0

255 elif matched_seconds:

256 self._time = float(matched_seconds.group(1))

257 elif self._lines[ln].startswith('Reason:'):

258 pass # Nothing to do for 'Reason:'

259 elif self._lines[ln].startswith('PageSize: '):

260 self._pagesize = int(self._lines[ln][10:])

261 elif self._lines[ln].startswith('CommandLine:'):

262 pass

263 elif (self._lines[ln].startswith('PageFrame: ') or

264 self._lines[ln].startswith('PFN: ')):

265 if self._lines[ln].startswith('PageFrame: '):

266 words = self._lines[ln][11:].split(',')

267 else:

268 words = self._lines[ln][5:].split(',')

269 for word in words:

270 if word == '24':

271 self._pageframe_length = 24

272 elif word == 'Base64':

273 self._pageframe_encoding = 'base64'

274 elif word == 'PageCount':

275 self._has_pagecount = True

276 elif self._lines[ln].startswith('RunID: '):

277 self._run_id = self._lines[ln][7:].strip()

278 elif (self._lines[ln].startswith('MMAP_LIST:') or

279 self._lines[ln].startswith('GLOBAL_STATS:')):

280 # Skip until "MMAP_LIST:" or "GLOBAL_STATS" is found.

281 break

282 else:

283 pass

284 ln += 1

285

286 def _parse_mmap_list(self):

287 """Parses lines in self._lines as a mmap list."""

288 (ln, found) = skip_while(

289 0, len(self._lines),

290 lambda n: self._lines[n] != 'MMAP_LIST:\n')

291 if not found:

292 return {}

293

294 ln += 1

295 self._map = {}

296 current_vma = {}

297 pageframe_list = []

298 while True:

299 entry = procfs.ProcMaps.parse_line(self._lines[ln])

300 if entry:

301 current_vma = {}

302 for _, _, attr in self._procmaps.iter_range(entry.begin, entry.end):

303 for key, value in entry.as_dict().iteritems():

304 attr[key] = value

305 current_vma[key] = value

306 ln += 1

307 continue

308

309 if self._lines[ln].startswith(' PF: '):

310 for pageframe in self._lines[ln][5:].split():

311 pageframe_list.append(PageFrame.parse(pageframe, self._pagesize))

312 ln += 1

313 continue

314

315 matched = self._HOOK_PATTERN.match(self._lines[ln])

316 if not matched:

317 break

318 # 2: starting address

319 # 5: end address

320 # 7: hooked or unhooked

321 # 8: additional information

322 if matched.group(7) == 'hooked':

323 submatched = self._HOOKED_PATTERN.match(matched.group(8))

324 if not submatched:

325 submatched = self._OLD_HOOKED_PATTERN.match(matched.group(8))

326 elif matched.group(7) == 'unhooked':

327 submatched = self._UNHOOKED_PATTERN.match(matched.group(8))

328 if not submatched:

329 submatched = self._OLD_UNHOOKED_PATTERN.match(matched.group(8))

330 else:

331 assert matched.group(7) in ['hooked', 'unhooked']

332

333 submatched_dict = submatched.groupdict()

334 region_info = { 'vma': current_vma }

335 if submatched_dict.get('TYPE'):

336 region_info['type'] = submatched_dict['TYPE'].strip()

337 if submatched_dict.get('COMMITTED'):

338 region_info['committed'] = int(submatched_dict['COMMITTED'])

339 if submatched_dict.get('RESERVED'):

340 region_info['reserved'] = int(submatched_dict['RESERVED'])

341 if submatched_dict.get('BUCKETID'):

342 region_info['bucket_id'] = int(submatched_dict['BUCKETID'])

343

344 if matched.group(1) == '(':

345 start = current_vma['begin']

346 else:

347 start = int(matched.group(2), 16)

348 if matched.group(4) == '(':

349 end = current_vma['end']

350 else:

351 end = int(matched.group(5), 16)

352

353 if pageframe_list and pageframe_list[0].start_truncated:

354 pageframe_list[0].set_size(

355 pageframe_list[0].size - start % self._pagesize)

356 if pageframe_list and pageframe_list[-1].end_truncated:

357 pageframe_list[-1].set_size(

358 pageframe_list[-1].size - (self._pagesize - end % self._pagesize))

359 region_info['pageframe'] = pageframe_list

360 pageframe_list = []

361

362 self._map[(start, end)] = (matched.group(7), region_info)

363 ln += 1

364

365 def _extract_stacktrace_lines(self, line_number):

366 """Extracts the position of stacktrace lines.

367

368 Valid stacktrace lines are stored into self._stacktrace_lines.

369

370 Args:

371 line_number: A line number to start parsing in lines.

372

373 Raises:

374 ParsingException for invalid dump versions.

375 """

376 if self._version in (DUMP_DEEP_5, DUMP_DEEP_6):

377 (line_number, _) = skip_while(

378 line_number, len(self._lines),

379 lambda n: not self._lines[n].split()[0].isdigit())

380 stacktrace_start = line_number

381 (line_number, _) = skip_while(

382 line_number, len(self._lines),

383 lambda n: self._check_stacktrace_line(self._lines[n]))

384 self._stacktrace_lines = self._lines[stacktrace_start:line_number]

385

386 elif self._version in DUMP_DEEP_OBSOLETE:

387 raise ObsoleteDumpVersionException(self._version)

388

389 else:

390 raise InvalidDumpException('Invalid version: %s' % self._version)

391

392 @staticmethod

393 def _check_stacktrace_line(stacktrace_line):

394 """Checks if a given stacktrace_line is valid as stacktrace.

395

396 Args:

397 stacktrace_line: A string to be checked.

398

399 Returns:

400 True if the given stacktrace_line is valid.

401 """

402 words = stacktrace_line.split()

403 if len(words) < BUCKET_ID + 1:

404 return False

405 if words[BUCKET_ID - 1] != '@':

406 return False

407 return True

408

409	80

410 class DumpList(object):	81 class DumpList(object):

411 """Represents a sequence of heap profile dumps.	82 """Represents a sequence of heap profile dumps.

412	83

413 Individual dumps are loaded into memory lazily as the sequence is accessed,	84 Individual dumps are loaded into memory lazily as the sequence is accessed,

414 either while being iterated through or randomly accessed. Loaded dumps are	85 either while being iterated through or randomly accessed. Loaded dumps are

415 not cached, meaning a newly loaded Dump object is returned every time an	86 not cached, meaning a newly loaded Dump object is returned every time an

416 element in the list is accessed.	87 element in the list is accessed.

417 """	88 """

418	89

419 def __init__(self, dump_path_list):	90 def __init__(self, dump_path_list):

420 self._dump_path_list = dump_path_list	91 self._dump_path_list = dump_path_list

421	92

422 @staticmethod	93 @staticmethod

423 def load(path_list):	94 def load(path_list):

424 return DumpList(path_list)	95 return DumpList(path_list)

425	96

426 def __len__(self):	97 def __len__(self):

427 return len(self._dump_path_list)	98 return len(self._dump_path_list)

428	99

429 def __iter__(self):	100 def __iter__(self):

430 for dump in self._dump_path_list:	101 for dump in self._dump_path_list:

431 yield Dump.load(dump)	102 yield Dump.load(dump)

432	103

433 def __getitem__(self, index):	104 def __getitem__(self, index):

434 return Dump.load(self._dump_path_list[index])	105 return Dump.load(self._dump_path_list[index])

435

436

437 class ProcMapsEntryAttribute(ExclusiveRangeDict.RangeAttribute):

438 """Represents an entry of /proc/maps in range_dict.ExclusiveRangeDict."""

439 _DUMMY_ENTRY = procfs.ProcMapsEntry(

440 0, # begin

441 0, # end

442 '-', # readable

443 '-', # writable

444 '-', # executable

445 '-', # private

446 0, # offset

447 '00', # major

448 '00', # minor

449 0, # inode

450 '' # name

451 )

452

453 def __init__(self):

454 super(ProcMapsEntryAttribute, self).__init__()

455 self._entry = self._DUMMY_ENTRY.as_dict()

456

457 def __str__(self):

458 return str(self._entry)

459

460 def __repr__(self):

461 return 'ProcMapsEntryAttribute' + str(self._entry)

462

463 def __getitem__(self, key):

464 return self._entry[key]

465

466 def __setitem__(self, key, value):

467 if key not in self._entry:

468 raise KeyError(key)

469 self._entry[key] = value

470

471 def copy(self):

472 new_entry = ProcMapsEntryAttribute()

473 for key, value in self._entry.iteritems():

474 new_entry[key] = copy.deepcopy(value)

475 return new_entry

476

477

478 def skip_while(index, max_index, skipping_condition):

479 """Increments \|index\| until \|skipping_condition\|(\|index\|) is False.

480

481 Returns:

482 A pair of an integer indicating a line number after skipped, and a

483 boolean value which is True if found a line which skipping_condition

484 is False for.

485 """

486 while skipping_condition(index):

487 index += 1

488 if index >= max_index:

489 return index, False

490 return index, True

OLD	NEW

« no previous file with comments | « tools/deep_memory_profiler/lib/deep_dump.py ('k') | no next file » | no next file with comments »