tools/deep_memory_profiler/dmprof - Issue 10802049: Change dmprof commandline format, and clean up start-up routines.

Side by Side Diff: tools/deep_memory_profiler/dmprof

Issue 10802049: Change dmprof commandline format, and clean up start-up routines. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: reordered import. Created 8 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 #!/usr/bin/env python	1 #!/usr/bin/env python

2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.	2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.

3 # Use of this source code is governed by a BSD-style license that can be	3 # Use of this source code is governed by a BSD-style license that can be

4 # found in the LICENSE file.	4 # found in the LICENSE file.

5	5

6 """The deep heap profiler script for Chrome."""	6 """The deep heap profiler script for Chrome."""

7	7

8 from datetime import datetime	8 from datetime import datetime

9 import json	9 import json

	10 import optparse

10 import os	11 import os

11 import re	12 import re

12 import shutil	13 import shutil

13 import subprocess	14 import subprocess

14 import sys	15 import sys

15 import tempfile	16 import tempfile

16	17

17 FIND_RUNTIME_SYMBOLS_PATH = os.path.join(	18 FIND_RUNTIME_SYMBOLS_PATH = os.path.join(

18 os.path.dirname(os.path.abspath(__file__)),	19 os.path.dirname(os.path.abspath(__file__)),

19 os.pardir,	20 os.pardir,

20 'find_runtime_symbols')	21 'find_runtime_symbols')

21 sys.path.append(FIND_RUNTIME_SYMBOLS_PATH)	22 sys.path.append(FIND_RUNTIME_SYMBOLS_PATH)

22	23

23 from prepare_symbol_info import prepare_symbol_info	24 from prepare_symbol_info import prepare_symbol_info

24 from find_runtime_symbols import find_runtime_symbols_list	25 from find_runtime_symbols import find_runtime_symbols_list

25	26

26 BUCKET_ID = 5	27 BUCKET_ID = 5

27 VIRTUAL = 0	28 VIRTUAL = 0

28 COMMITTED = 1	29 COMMITTED = 1

29 ALLOC_COUNT = 2	30 ALLOC_COUNT = 2

30 FREE_COUNT = 3	31 FREE_COUNT = 3

31 NULL_REGEX = re.compile('')	32 NULL_REGEX = re.compile('')

32	33

	34 POLICIES_JSON_PATH = os.path.join(

	35 os.path.dirname(os.path.abspath(__file__)),

	36 'policies.json')

	37

33 # Heap Profile Dump versions	38 # Heap Profile Dump versions

34	39

35 # DUMP_DEEP_1 is OBSOLETE.	40 # DUMP_DEEP_1 is OBSOLETE.

36 # DUMP_DEEP_1 DOES NOT distinct mmap regions and malloc chunks.	41 # DUMP_DEEP_1 DOES NOT distinct mmap regions and malloc chunks.

37 # Their stacktraces DO contain mmap* or tc-* at their tops.	42 # Their stacktraces DO contain mmap* or tc-* at their tops.

38 # They should be processed by POLICY_DEEP_1.	43 # They should be processed by POLICY_DEEP_1.

39 DUMP_DEEP_1 = 'DUMP_DEEP_1'	44 DUMP_DEEP_1 = 'DUMP_DEEP_1'

40	45

41 # DUMP_DEEP_2 is OBSOLETE.	46 # DUMP_DEEP_2 is OBSOLETE.

42 # DUMP_DEEP_2 DOES distinct mmap regions and malloc chunks.	47 # DUMP_DEEP_2 DOES distinct mmap regions and malloc chunks.

(...skipping 22 matching lines...) Expand all Loading...
65 # Heap Profile Policy versions	70 # Heap Profile Policy versions

66	71

67 # POLICY_DEEP_1 DOES NOT include allocation_type columns.	72 # POLICY_DEEP_1 DOES NOT include allocation_type columns.

68 # mmap regions are distincted w/ mmap frames in the pattern column.	73 # mmap regions are distincted w/ mmap frames in the pattern column.

69 POLICY_DEEP_1 = 'POLICY_DEEP_1'	74 POLICY_DEEP_1 = 'POLICY_DEEP_1'

70	75

71 # POLICY_DEEP_2 DOES include allocation_type columns.	76 # POLICY_DEEP_2 DOES include allocation_type columns.

72 # mmap regions are distincted w/ the allocation_type column.	77 # mmap regions are distincted w/ the allocation_type column.

73 POLICY_DEEP_2 = 'POLICY_DEEP_2'	78 POLICY_DEEP_2 = 'POLICY_DEEP_2'

74	79

75 # TODO(dmikurube): Avoid global variables.

76 address_symbol_dict = {}

77 appeared_addresses = set()

78 components = []

79

80	80

81 class EmptyDumpException(Exception):	81 class EmptyDumpException(Exception):

82 def __init__(self, value):	82 def __init__(self, value):

83 self.value = value	83 self.value = value

84 def __str__(self):	84 def __str__(self):

85 return repr(self.value)	85 return repr(self.value)

86	86

87	87

88 class ParsingException(Exception):	88 class ParsingException(Exception):

89 def __init__(self, value):	89 def __init__(self, value):

90 self.value = value	90 self.value = value

91 def __str__(self):	91 def __str__(self):

92 return repr(self.value)	92 return repr(self.value)

93	93

94	94

95 class InvalidDumpException(ParsingException):	95 class InvalidDumpException(ParsingException):

96 def __init__(self, value):	96 def __init__(self, value):

97 self.value = value	97 self.value = value

98 def __str__(self):	98 def __str__(self):

99 return "invalid heap profile dump: %s" % repr(self.value)	99 return "invalid heap profile dump: %s" % repr(self.value)

100	100

101	101

102 class ObsoleteDumpVersionException(ParsingException):	102 class ObsoleteDumpVersionException(ParsingException):

103 def __init__(self, value):	103 def __init__(self, value):

104 self.value = value	104 self.value = value

105 def __str__(self):	105 def __str__(self):

106 return "obsolete heap profile dump version: %s" % repr(self.value)	106 return "obsolete heap profile dump version: %s" % repr(self.value)

107	107

108	108

109 class Policy(object):	109 class Rule(object):

	110 """Represents one matching rule in a policy file."""

110	111

111 def __init__(self, name, mmap, pattern):	112 def __init__(self, name, mmap, pattern):

112 self.name = name	113 self.name = name

113 self.mmap = mmap	114 self.mmap = mmap

114 self.condition = re.compile(pattern + r'\Z')	115 self.condition = re.compile(pattern + r'\Z')

115	116

116	117

117 def get_component(policy_list, bucket):	118 class Policy(object):

	119 """Represents a policy, a content of a policy file."""

	120

	121 def __init__(self, rules, version, components):

	122 self.rules = rules

	123 self.version = version

	124 self.components = components

	125

	126 def append_rule(self, rule):

	127 self.rules.append(rule)

	128

	129

	130 def get_component(rule_list, bucket, symbols):

118 """Returns a component name which a given bucket belongs to.	131 """Returns a component name which a given bucket belongs to.

119	132

120 Args:	133 Args:

121 policy_list: A list containing Policy objects. (Parsed policy data by	134 rule_list: A list of Rule objects.

122 parse_policy.)

123 bucket: A Bucket object to be searched for.	135 bucket: A Bucket object to be searched for.

	136 symbols: A dict mapping runtime addresses to symbol names.

124	137

125 Returns:	138 Returns:

126 A string representing a component name.	139 A string representing a component name.

127 """	140 """

128 if not bucket:	141 if not bucket:

129 return 'no-bucket'	142 return 'no-bucket'

130 if bucket.component:	143 if bucket.component_cache:

131 return bucket.component	144 return bucket.component_cache

132	145

133 stacktrace = ''.join(	146 stacktrace = ''.join(symbols[a] + ' ' for a in bucket.stacktrace).strip()

134 address_symbol_dict[a] + ' ' for a in bucket.stacktrace).strip()

135	147

136 for policy in policy_list:	148 for rule in rule_list:

137 if bucket.mmap == policy.mmap and policy.condition.match(stacktrace):	149 if bucket.mmap == rule.mmap and rule.condition.match(stacktrace):

138 bucket.component = policy.name	150 bucket.component_cache = rule.name

139 return policy.name	151 return rule.name

140	152

141 assert False	153 assert False

142	154

143	155

144 class Bucket(object):	156 class Bucket(object):

	157 """Represents a bucket, which is a unit of memory classification."""

145	158

146 def __init__(self, stacktrace, mmap):	159 def __init__(self, stacktrace, mmap):

147 self.stacktrace = stacktrace	160 self.stacktrace = stacktrace

148 self.mmap = mmap	161 self.mmap = mmap

149 self.component = ''	162 self.component_cache = ''

	163

	164 def clear_component_cache(self):

	165 self.component_cache = ''

150	166

151	167

152 class Log(object):	168 class Dump(object):

	169 """Represents one heap profile dump."""

153	170

154 """A class representing one dumped log data."""	171 def __init__(self, dump_path):

155 def __init__(self, log_path):	172 self.dump_path = dump_path

156 self.log_path = log_path	173 self.dump_lines = [

157 self.log_lines = [	174 l for l in open(self.dump_path, 'r') if l and not l.startswith('#')]

158 l for l in open(self.log_path, 'r') if l and not l.startswith('#')]	175 self.dump_version = ''

159 self.log_version = ''

160 sys.stderr.write('Loading a dump: %s\n' % log_path)

161 self.stacktrace_lines = []	176 self.stacktrace_lines = []

162 self.counters = {}	177 self.counters = {}

163 self.log_time = os.stat(self.log_path).st_mtime	178 self.dump_time = os.stat(self.dump_path).st_mtime

164	179

165 def dump_stacktrace(buckets):	180 def print_stacktrace(self, buckets, symbols):

166 """Prints a given stacktrace.	181 """Prints a given stacktrace.

167	182

168 Args:	183 Args:

169 buckets: A dict mapping bucket ids and their corresponding Bucket	184 buckets: A dict mapping bucket ids to Bucket objects.

170 objects.	185 symbols: A dict mapping runtime addresses to symbol names.

171 """	186 """

172 for line in self.stacktrace_lines:	187 for line in self.stacktrace_lines:

173 words = line.split()	188 words = line.split()

174 bucket = buckets.get(int(words[BUCKET_ID]))	189 bucket = buckets.get(int(words[BUCKET_ID]))

175 if not bucket:	190 if not bucket:

176 continue	191 continue

177 for i in range(0, BUCKET_ID - 1):	192 for i in range(0, BUCKET_ID - 1):

178 sys.stdout.write(words[i] + ' ')	193 sys.stdout.write(words[i] + ' ')

179 for address in bucket.stacktrace:	194 for address in bucket.stacktrace:

180 sys.stdout.write((address_symbol_dict.get(address) or address) + ' ')	195 sys.stdout.write((symbols.get(address) or address) + ' ')

181 sys.stdout.write('\n')	196 sys.stdout.write('\n')

182	197

183 @staticmethod	198 @staticmethod

184 def accumulate_size_for_pprof(stacktrace_lines, policy_list, buckets,	199 def accumulate_size_for_pprof(stacktrace_lines, rule_list, buckets,

185 component_name):	200 component_name, symbols):

186 """Accumulates size of committed chunks and the number of allocated chunks.	201 """Accumulates size of committed chunks and the number of allocated chunks.

187	202

188 Args:	203 Args:

189 stacktrace_lines: A list of strings which are valid as stacktraces.	204 stacktrace_lines: A list of strings which are valid as stacktraces.

190 policy_list: A list containing Policy objects. (Parsed policy data by	205 rule_list: A list of Rule objects.

191 parse_policy.)	206 buckets: A dict mapping bucket ids to Bucket objects.

192 buckets: A dict mapping bucket ids and their corresponding Bucket

193 objects.

194 component_name: A name of component for filtering.	207 component_name: A name of component for filtering.

	208 symbols: A dict mapping runtime addresses to symbol names.

195	209

196 Returns:	210 Returns:

197 Two integers which are the accumulated size of committed regions and the	211 Two integers which are the accumulated size of committed regions and the

198 number of allocated chunks, respectively.	212 number of allocated chunks, respectively.

199 """	213 """

200 com_committed = 0	214 com_committed = 0

201 com_allocs = 0	215 com_allocs = 0

202 for line in stacktrace_lines:	216 for line in stacktrace_lines:

203 words = line.split()	217 words = line.split()

204 bucket = buckets.get(int(words[BUCKET_ID]))	218 bucket = buckets.get(int(words[BUCKET_ID]))

205 if (not bucket or	219 if (not bucket or

206 (component_name and	220 (component_name and

207 component_name != get_component(policy_list, bucket))):	221 component_name != get_component(rule_list, bucket, symbols))):

208 continue	222 continue

209	223

210 com_committed += int(words[COMMITTED])	224 com_committed += int(words[COMMITTED])

211 com_allocs += int(words[ALLOC_COUNT]) - int(words[FREE_COUNT])	225 com_allocs += int(words[ALLOC_COUNT]) - int(words[FREE_COUNT])

212	226

213 return com_committed, com_allocs	227 return com_committed, com_allocs

214	228

215 @staticmethod	229 @staticmethod

216 def dump_stacktrace_lines_for_pprof(stacktrace_lines, policy_list,	230 def print_stacktrace_lines_for_pprof(stacktrace_lines, rule_list,

217 buckets, component_name):	231 buckets, component_name, symbols):

218 """Prints information of stacktrace lines for pprof.	232 """Prints information of stacktrace lines for pprof.

219	233

220 Args:	234 Args:

221 stacktrace_lines: A list of strings which are valid as stacktraces.	235 stacktrace_lines: A list of strings which are valid as stacktraces.

222 policy_list: A list containing Policy objects. (Parsed policy data by	236 rule_list: A list of Rule objects.

223 parse_policy.)	237 buckets: A dict mapping bucket ids to Bucket objects.

224 buckets: A dict mapping bucket ids and their corresponding Bucket

225 objects.

226 component_name: A name of component for filtering.	238 component_name: A name of component for filtering.

	239 symbols: A dict mapping runtime addresses to symbol names.

227 """	240 """

228 for line in stacktrace_lines:	241 for line in stacktrace_lines:

229 words = line.split()	242 words = line.split()

230 bucket = buckets.get(int(words[BUCKET_ID]))	243 bucket = buckets.get(int(words[BUCKET_ID]))

231 if (not bucket or	244 if (not bucket or

232 (component_name and	245 (component_name and

233 component_name != get_component(policy_list, bucket))):	246 component_name != get_component(rule_list, bucket, symbols))):

234 continue	247 continue

235	248

236 sys.stdout.write('%6d: %8s [%6d: %8s] @' % (	249 sys.stdout.write('%6d: %8s [%6d: %8s] @' % (

237 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]),	250 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]),

238 words[COMMITTED],	251 words[COMMITTED],

239 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]),	252 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]),

240 words[COMMITTED]))	253 words[COMMITTED]))

241 for address in bucket.stacktrace:	254 for address in bucket.stacktrace:

242 sys.stdout.write(' ' + address)	255 sys.stdout.write(' ' + address)

243 sys.stdout.write('\n')	256 sys.stdout.write('\n')

244	257

245 def dump_for_pprof(self, policy_list, buckets, mapping_lines, component_name):	258 def print_for_pprof(

246 """Converts the log file so it can be processed by pprof.	259 self, rule_list, buckets, maps_lines, component_name, symbols):

	260 """Converts the heap profile dump so it can be processed by pprof.

247	261

248 Args:	262 Args:

249 policy_list: A list containing Policy objects. (Parsed policy data by	263 rule_list: A list of Rule objects.

250 parse_policy.)	264 buckets: A dict mapping bucket ids to Bucket objects.

251 buckets: A dict mapping bucket ids and their corresponding Bucket	265 maps_lines: A list of strings containing /proc/.../maps.

252 objects.

253 mapping_lines: A list of strings containing /proc/.../maps.

254 component_name: A name of component for filtering.	266 component_name: A name of component for filtering.

	267 symbols: A dict mapping runtime addresses to symbol names.

255 """	268 """

256 sys.stdout.write('heap profile: ')	269 sys.stdout.write('heap profile: ')

257 com_committed, com_allocs = self.accumulate_size_for_pprof(	270 com_committed, com_allocs = self.accumulate_size_for_pprof(

258 self.stacktrace_lines, policy_list, buckets, component_name)	271 self.stacktrace_lines, rule_list, buckets, component_name, symbols)

259	272

260 sys.stdout.write('%6d: %8s [%6d: %8s] @ heapprofile\n' % (	273 sys.stdout.write('%6d: %8s [%6d: %8s] @ heapprofile\n' % (

261 com_allocs, com_committed, com_allocs, com_committed))	274 com_allocs, com_committed, com_allocs, com_committed))

262	275

263 self.dump_stacktrace_lines_for_pprof(	276 self.print_stacktrace_lines_for_pprof(

264 self.stacktrace_lines, policy_list, buckets, component_name)	277 self.stacktrace_lines, rule_list, buckets, component_name, symbols)

265	278

266 sys.stdout.write('MAPPED_LIBRARIES:\n')	279 sys.stdout.write('MAPPED_LIBRARIES:\n')

267 for line in mapping_lines:	280 for line in maps_lines:

268 sys.stdout.write(line)	281 sys.stdout.write(line)

269	282

270 @staticmethod	283 @staticmethod

271 def check_stacktrace_line(stacktrace_line, buckets):	284 def check_stacktrace_line(stacktrace_line, buckets, appeared_addresses):

272 """Checks if a given stacktrace_line is valid as stacktrace.	285 """Checks if a given stacktrace_line is valid as stacktrace.

273	286

274 Args:	287 Args:

275 stacktrace_line: A string to be checked.	288 stacktrace_line: A string to be checked.

276 buckets: A dict mapping bucket ids and their corresponding Bucket	289 buckets: A dict mapping bucket ids to Bucket objects.

277 objects.	290 appeared_addresses: A list where appeared addresses will be stored.

278	291

279 Returns:	292 Returns:

280 True if the given stacktrace_line is valid.	293 True if the given stacktrace_line is valid.

281 """	294 """

282 words = stacktrace_line.split()	295 words = stacktrace_line.split()

283 if len(words) < BUCKET_ID + 1:	296 if len(words) < BUCKET_ID + 1:

284 return False	297 return False

285 if words[BUCKET_ID - 1] != '@':	298 if words[BUCKET_ID - 1] != '@':

286 return False	299 return False

287 bucket = buckets.get(int(words[BUCKET_ID]))	300 bucket = buckets.get(int(words[BUCKET_ID]))

(...skipping 10 matching lines...) Expand all Loading...
298 A pair of an integer indicating a line number after skipped, and a	311 A pair of an integer indicating a line number after skipped, and a

299 boolean value which is True if found a line which skipping_condition	312 boolean value which is True if found a line which skipping_condition

300 is False for.	313 is False for.

301 """	314 """

302 while skipping_condition(line_number):	315 while skipping_condition(line_number):

303 line_number += 1	316 line_number += 1

304 if line_number >= max_line_number:	317 if line_number >= max_line_number:

305 return line_number, False	318 return line_number, False

306 return line_number, True	319 return line_number, True

307	320

308 def parse_stacktraces_while_valid(self, buckets, log_lines, line_number):	321 def parse_stacktraces_while_valid(

	322 self, buckets, dump_lines, line_number, appeared_addresses):

309 """Parses stacktrace lines while the lines are valid.	323 """Parses stacktrace lines while the lines are valid.

310	324

311 Args:	325 Args:

312 buckets: A dict mapping bucket ids and their corresponding Bucket	326 buckets: A dict mapping bucket ids to Bucket objects.

313 objects.	327 dump_lines: A list of lines to be parsed.

314 log_lines: A list of lines to be parsed.	328 line_number: A line number to start parsing in dump_lines.

315 line_number: An integer representing the starting line number in	329 appeared_addresses: A list where appeared addresses will be stored.

316 log_lines.

317	330

318 Returns:	331 Returns:

319 A pair of a list of valid lines and an integer representing the last	332 A pair of a list of valid lines and an integer representing the last

320 line number in log_lines.	333 line number in dump_lines.

321 """	334 """

322 (line_number, _) = self.skip_lines_while(	335 (line_number, _) = self.skip_lines_while(

323 line_number, len(log_lines),	336 line_number, len(dump_lines),

324 lambda n: not log_lines[n].split()[0].isdigit())	337 lambda n: not dump_lines[n].split()[0].isdigit())

325 stacktrace_lines_start = line_number	338 stacktrace_lines_start = line_number

326 (line_number, _) = self.skip_lines_while(	339 (line_number, _) = self.skip_lines_while(

327 line_number, len(log_lines),	340 line_number, len(dump_lines),

328 lambda n: self.check_stacktrace_line(log_lines[n], buckets))	341 lambda n: self.check_stacktrace_line(

329 return (log_lines[stacktrace_lines_start:line_number], line_number)	342 dump_lines[n], buckets, appeared_addresses))

	343 return (dump_lines[stacktrace_lines_start:line_number], line_number)

330	344

331 def parse_stacktraces(self, buckets, line_number):	345 def parse_stacktraces(self, buckets, line_number, appeared_addresses):

332 """Parses lines in self.log_lines as stacktrace.	346 """Parses lines in self.dump_lines as stacktrace.

333	347

334 Valid stacktrace lines are stored into self.stacktrace_lines.	348 Valid stacktrace lines are stored into self.stacktrace_lines.

335	349

336 Args:	350 Args:

337 buckets: A dict mapping bucket ids and their corresponding Bucket	351 buckets: A dict mapping bucket ids to Bucket objects.

338 objects.	352 line_number: A line number to start parsing in dump_lines.

339 line_number: An integer representing the starting line number in	353 appeared_addresses: A list where appeared addresses will be stored.

340 log_lines.

341	354

342 Raises:	355 Raises:

343 ParsingException for invalid dump versions.	356 ParsingException for invalid dump versions.

344 """	357 """

345 sys.stderr.write(' Version: %s\n' % self.log_version)	358 if self.dump_version == DUMP_DEEP_5:

346

347 if self.log_version == DUMP_DEEP_5:

348 (self.stacktrace_lines, line_number) = (	359 (self.stacktrace_lines, line_number) = (

349 self.parse_stacktraces_while_valid(	360 self.parse_stacktraces_while_valid(

350 buckets, self.log_lines, line_number))	361 buckets, self.dump_lines, line_number, appeared_addresses))

351	362

352 elif self.log_version in DUMP_DEEP_OBSOLETE:	363 elif self.dump_version in DUMP_DEEP_OBSOLETE:

353 raise ObsoleteDumpVersionException(self.log_version)	364 raise ObsoleteDumpVersionException(self.dump_version)

354	365

355 else:	366 else:

356 raise InvalidDumpException('Invalid version: %s' % self.log_version)	367 raise InvalidDumpException('Invalid version: %s' % self.dump_version)

357	368

358 def parse_global_stats(self):	369 def parse_global_stats(self):

359 """Parses lines in self.log_lines as global stats."""	370 """Parses lines in self.dump_lines as global stats."""

360 (ln, _) = self.skip_lines_while(	371 (ln, _) = self.skip_lines_while(

361 0, len(self.log_lines),	372 0, len(self.dump_lines),

362 lambda n: self.log_lines[n] != 'GLOBAL_STATS:\n')	373 lambda n: self.dump_lines[n] != 'GLOBAL_STATS:\n')

363	374

364 global_stat_names = [	375 global_stat_names = [

365 'total', 'file-exec', 'file-nonexec', 'anonymous', 'stack', 'other',	376 'total', 'file-exec', 'file-nonexec', 'anonymous', 'stack', 'other',

366 'nonprofiled-absent', 'nonprofiled-anonymous',	377 'nonprofiled-absent', 'nonprofiled-anonymous',

367 'nonprofiled-file-exec', 'nonprofiled-file-nonexec',	378 'nonprofiled-file-exec', 'nonprofiled-file-nonexec',

368 'nonprofiled-stack', 'nonprofiled-other',	379 'nonprofiled-stack', 'nonprofiled-other',

369 'profiled-mmap', 'profiled-malloc']	380 'profiled-mmap', 'profiled-malloc']

370	381

371 for prefix in global_stat_names:	382 for prefix in global_stat_names:

372 (ln, _) = self.skip_lines_while(	383 (ln, _) = self.skip_lines_while(

373 ln, len(self.log_lines),	384 ln, len(self.dump_lines),

374 lambda n: self.log_lines[n].split()[0] != prefix)	385 lambda n: self.dump_lines[n].split()[0] != prefix)

375 words = self.log_lines[ln].split()	386 words = self.dump_lines[ln].split()

376 self.counters[prefix + '_virtual'] = int(words[-2])	387 self.counters[prefix + '_virtual'] = int(words[-2])

377 self.counters[prefix + '_committed'] = int(words[-1])	388 self.counters[prefix + '_committed'] = int(words[-1])

378	389

379 def parse_version(self):	390 def parse_version(self):

380 """Parses a version string in self.log_lines.	391 """Parses a version string in self.dump_lines.

381	392

382 Returns:	393 Returns:

383 A pair of (a string representing a version of the stacktrace dump,	394 A pair of (a string representing a version of the stacktrace dump,

384 and an integer indicating a line number next to the version string).	395 and an integer indicating a line number next to the version string).

385	396

386 Raises:	397 Raises:

387 ParsingException for invalid dump versions.	398 ParsingException for invalid dump versions.

388 """	399 """

389 version = ''	400 version = ''

390	401

391 # Skip until an identifiable line.	402 # Skip until an identifiable line.

392 headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ')	403 headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ')

393 if not self.log_lines:	404 if not self.dump_lines:

394 raise EmptyDumpException('Empty heap dump file.')	405 raise EmptyDumpException('Empty heap dump file.')

395 (ln, found) = self.skip_lines_while(	406 (ln, found) = self.skip_lines_while(

396 0, len(self.log_lines),	407 0, len(self.dump_lines),

397 lambda n: not self.log_lines[n].startswith(headers))	408 lambda n: not self.dump_lines[n].startswith(headers))

398 if not found:	409 if not found:

399 raise InvalidDumpException('No version header.')	410 raise InvalidDumpException('No version header.')

400	411

401 # Identify a version.	412 # Identify a version.

402 if self.log_lines[ln].startswith('heap profile: '):	413 if self.dump_lines[ln].startswith('heap profile: '):

403 version = self.log_lines[ln][13:].strip()	414 version = self.dump_lines[ln][13:].strip()

404 if version == DUMP_DEEP_5:	415 if version == DUMP_DEEP_5:

405 (ln, _) = self.skip_lines_while(	416 (ln, _) = self.skip_lines_while(

406 ln, len(self.log_lines),	417 ln, len(self.dump_lines),

407 lambda n: self.log_lines[n] != 'STACKTRACES:\n')	418 lambda n: self.dump_lines[n] != 'STACKTRACES:\n')

408 elif version in DUMP_DEEP_OBSOLETE:	419 elif version in DUMP_DEEP_OBSOLETE:

409 raise ObsoleteDumpVersionException(version)	420 raise ObsoleteDumpVersionException(version)

410 else:	421 else:

411 raise InvalidDumpException('Invalid version: %s' % version)	422 raise InvalidDumpException('Invalid version: %s' % version)

412 elif self.log_lines[ln] == 'STACKTRACES:\n':	423 elif self.dump_lines[ln] == 'STACKTRACES:\n':

413 raise ObsoleteDumpVersionException(DUMP_DEEP_1)	424 raise ObsoleteDumpVersionException(DUMP_DEEP_1)

414 elif self.log_lines[ln] == 'MMAP_STACKTRACES:\n':	425 elif self.dump_lines[ln] == 'MMAP_STACKTRACES:\n':

415 raise ObsoleteDumpVersionException(DUMP_DEEP_2)	426 raise ObsoleteDumpVersionException(DUMP_DEEP_2)

416	427

417 return (version, ln)	428 return (version, ln)

418	429

419 def parse_log(self, buckets):	430 def parse_dump(self, buckets, appeared_addresses):

420 self.log_version, ln = self.parse_version()	431 self.dump_version, ln = self.parse_version()

421 self.parse_global_stats()	432 self.parse_global_stats()

422 self.parse_stacktraces(buckets, ln)	433 self.parse_stacktraces(buckets, ln, appeared_addresses)

423	434

424 @staticmethod	435 @staticmethod

425 def accumulate_size_for_policy(stacktrace_lines,	436 def accumulate_size_for_policy(stacktrace_lines,

426 policy_list, buckets, sizes):	437 rule_list, buckets, sizes, symbols):

427 for line in stacktrace_lines:	438 for line in stacktrace_lines:

428 words = line.split()	439 words = line.split()

429 bucket = buckets.get(int(words[BUCKET_ID]))	440 bucket = buckets.get(int(words[BUCKET_ID]))

430 component_match = get_component(policy_list, bucket)	441 component_match = get_component(rule_list, bucket, symbols)

431 sizes[component_match] += int(words[COMMITTED])	442 sizes[component_match] += int(words[COMMITTED])

432	443

433 if component_match.startswith('tc-'):	444 if component_match.startswith('tc-'):

434 sizes['tc-total-log'] += int(words[COMMITTED])	445 sizes['tc-total-log'] += int(words[COMMITTED])

435 elif component_match.startswith('mmap-'):	446 elif component_match.startswith('mmap-'):

436 sizes['mmap-total-log'] += int(words[COMMITTED])	447 sizes['mmap-total-log'] += int(words[COMMITTED])

437 else:	448 else:

438 sizes['other-total-log'] += int(words[COMMITTED])	449 sizes['other-total-log'] += int(words[COMMITTED])

439	450

440 def apply_policy(self, policy_list, buckets, first_log_time):	451 def apply_policy(

	452 self, rule_list, buckets, first_dump_time, components, symbols):

441 """Aggregates the total memory size of each component.	453 """Aggregates the total memory size of each component.

442	454

443 Iterate through all stacktraces and attribute them to one of the components	455 Iterate through all stacktraces and attribute them to one of the components

444 based on the policy. It is important to apply policy in right order.	456 based on the policy. It is important to apply policy in right order.

445	457

446 Args:	458 Args:

447 policy_list: A list containing Policy objects. (Parsed policy data by	459 rule_list: A list of Rule objects.

448 parse_policy.)	460 buckets: A dict mapping bucket ids to Bucket objects.

449 buckets: A dict mapping bucket ids and their corresponding Bucket	461 first_dump_time: An integer representing time when the first dump is

450 objects.

451 first_log_time: An integer representing time when the first log is

452 dumped.	462 dumped.

	463 components: A list of strings of component names.

	464 symbols: A dict mapping runtime addresses to symbol names.

453	465

454 Returns:	466 Returns:

455 A dict mapping components and their corresponding sizes.	467 A dict mapping components and their corresponding sizes.

456 """	468 """

457	469

458 sys.stderr.write('apply policy:%s\n' % (self.log_path))	470 sys.stderr.write('Applying policy: "%s".\n' % self.dump_path)

459 sizes = dict((c, 0) for c in components)	471 sizes = dict((c, 0) for c in components)

460	472

461 self.accumulate_size_for_policy(self.stacktrace_lines,	473 self.accumulate_size_for_policy(self.stacktrace_lines,

462 policy_list, buckets, sizes)	474 rule_list, buckets, sizes, symbols)

463	475

464 mmap_prefix = 'profiled-mmap'	476 mmap_prefix = 'profiled-mmap'

465 malloc_prefix = 'profiled-malloc'	477 malloc_prefix = 'profiled-malloc'

466	478

467 sizes['mmap-no-log'] = (	479 sizes['mmap-no-log'] = (

468 self.counters['%s_committed' % mmap_prefix] - sizes['mmap-total-log'])	480 self.counters['%s_committed' % mmap_prefix] - sizes['mmap-total-log'])

469 sizes['mmap-total-record'] = self.counters['%s_committed' % mmap_prefix]	481 sizes['mmap-total-record'] = self.counters['%s_committed' % mmap_prefix]

470 sizes['mmap-total-record-vm'] = self.counters['%s_virtual' % mmap_prefix]	482 sizes['mmap-total-record-vm'] = self.counters['%s_virtual' % mmap_prefix]

471	483

472 sizes['tc-no-log'] = (	484 sizes['tc-no-log'] = (

(...skipping 34 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
507 'nonprofiled-stack_committed',	519 'nonprofiled-stack_committed',

508 'nonprofiled-other_committed')	520 'nonprofiled-other_committed')

509 sizes['mustbezero'] = (	521 sizes['mustbezero'] = (

510 self.counters['total_committed'] -	522 self.counters['total_committed'] -

511 sum(self.counters[i] for i in removed))	523 sum(self.counters[i] for i in removed))

512 if 'total-exclude-profiler' in sizes:	524 if 'total-exclude-profiler' in sizes:

513 sizes['total-exclude-profiler'] = (	525 sizes['total-exclude-profiler'] = (

514 self.counters['total_committed'] -	526 self.counters['total_committed'] -

515 (sizes['mmap-profiler'] + sizes['mmap-allocated-type']))	527 (sizes['mmap-profiler'] + sizes['mmap-allocated-type']))

516 if 'hour' in sizes:	528 if 'hour' in sizes:

517 sizes['hour'] = (self.log_time - first_log_time) / 60.0 / 60.0	529 sizes['hour'] = (self.dump_time - first_dump_time) / 60.0 / 60.0

518 if 'minute' in sizes:	530 if 'minute' in sizes:

519 sizes['minute'] = (self.log_time - first_log_time) / 60.0	531 sizes['minute'] = (self.dump_time - first_dump_time) / 60.0

520 if 'second' in sizes:	532 if 'second' in sizes:

521 sizes['second'] = self.log_time - first_log_time	533 sizes['second'] = self.dump_time - first_dump_time

522	534

523 return sizes	535 return sizes

524	536

525 @staticmethod	537 @staticmethod

526 def accumulate_size_for_expand(stacktrace_lines, policy_list, buckets,	538 def accumulate_size_for_expand(stacktrace_lines, rule_list, buckets,

527 component_name, depth, sizes):	539 component_name, depth, sizes, symbols):

528 for line in stacktrace_lines:	540 for line in stacktrace_lines:

529 words = line.split()	541 words = line.split()

530 bucket = buckets.get(int(words[BUCKET_ID]))	542 bucket = buckets.get(int(words[BUCKET_ID]))

531 component_match = get_component(policy_list, bucket)	543 component_match = get_component(rule_list, bucket, symbols)

532 if component_match == component_name:	544 if component_match == component_name:

533 stacktrace_sequence = ''	545 stacktrace_sequence = ''

534 for address in bucket.stacktrace[0 : min(len(bucket.stacktrace),	546 for address in bucket.stacktrace[0 : min(len(bucket.stacktrace),

535 1 + depth)]:	547 1 + depth)]:

536 stacktrace_sequence += address_symbol_dict[address] + ' '	548 stacktrace_sequence += symbols[address] + ' '

537 if not stacktrace_sequence in sizes:	549 if not stacktrace_sequence in sizes:

538 sizes[stacktrace_sequence] = 0	550 sizes[stacktrace_sequence] = 0

539 sizes[stacktrace_sequence] += int(words[COMMITTED])	551 sizes[stacktrace_sequence] += int(words[COMMITTED])

540	552

541 def expand(self, policy_list, buckets, component_name, depth):	553 def expand(self, rule_list, buckets, component_name, depth, symbols):

542 """Prints all stacktraces in a given component of given depth.	554 """Prints all stacktraces in a given component of given depth.

543	555

544 Args:	556 Args:

545 policy_list: A list containing Policy objects. (Parsed policy data by	557 rule_list: A list of Rule objects.

546 parse_policy.)	558 buckets: A dict mapping bucket ids to Bucket objects.

547 buckets: A dict mapping bucket ids and their corresponding Bucket

548 objects.

549 component_name: A name of component for filtering.	559 component_name: A name of component for filtering.

550 depth: An integer representing depth to be printed.	560 depth: An integer representing depth to be printed.

	561 symbols: A dict mapping runtime addresses to symbol names.

551 """	562 """

552 sizes = {}	563 sizes = {}

553	564

554 self.accumulate_size_for_expand(	565 self.accumulate_size_for_expand(

555 self.stacktrace_lines, policy_list, buckets, component_name,	566 self.stacktrace_lines, rule_list, buckets, component_name,

556 depth, sizes)	567 depth, sizes, symbols)

557	568

558 sorted_sizes_list = sorted(	569 sorted_sizes_list = sorted(

559 sizes.iteritems(), key=(lambda x: x[1]), reverse=True)	570 sizes.iteritems(), key=(lambda x: x[1]), reverse=True)

560 total = 0	571 total = 0

561 for size_pair in sorted_sizes_list:	572 for size_pair in sorted_sizes_list:

562 sys.stdout.write('%10d %s\n' % (size_pair[1], size_pair[0]))	573 sys.stdout.write('%10d %s\n' % (size_pair[1], size_pair[0]))

563 total += size_pair[1]	574 total += size_pair[1]

564 sys.stderr.write('total: %d\n' % (total))	575 sys.stderr.write('total: %d\n' % (total))

565	576

566	577

567 def update_symbols(symbol_path, mapping_lines, maps_path):	578 def update_symbols(

	579 symbol_path, maps_path, appeared_addresses, symbols):

568 """Updates address/symbol mapping on memory and in a .symbol cache file.	580 """Updates address/symbol mapping on memory and in a .symbol cache file.

569	581

570 It reads cached address/symbol mapping from a .symbol file if it exists.	582 It reads cached address/symbol mapping from a .symbol file if it exists.

571 Then, it resolves unresolved addresses from a Chrome binary with pprof.	583 Then, it resolves unresolved addresses from a Chrome binary with pprof.

572 Both mappings on memory and in a .symbol cache file are updated.	584 Both mappings on memory and in a .symbol cache file are updated.

573	585

574 Symbol files are formatted as follows:	586 Symbol files are formatted as follows:

575 <Address> <Symbol>	587 <Address> <Symbol>

576 <Address> <Symbol>	588 <Address> <Symbol>

577 <Address> <Symbol>	589 <Address> <Symbol>

578 ...	590 ...

579	591

580 Args:	592 Args:

581 symbol_path: A string representing a path for a .symbol file.	593 symbol_path: A string representing a path for a .symbol file.

582 mapping_lines: A list of strings containing /proc/.../maps.

583 maps_path: A string of the path of /proc/.../maps.	594 maps_path: A string of the path of /proc/.../maps.

	595 appeared_addresses: A list of known addresses.

	596 symbols: A dict mapping runtime addresses to symbol names.

584 """	597 """

585 with open(symbol_path, mode='a+') as symbol_f:	598 with open(symbol_path, mode='a+') as symbol_f:

586 symbol_lines = symbol_f.readlines()	599 symbol_lines = symbol_f.readlines()

587 if symbol_lines:	600 if symbol_lines:

588 for line in symbol_lines:	601 for line in symbol_lines:

589 items = line.split(None, 1)	602 items = line.split(None, 1)

590 address_symbol_dict[items[0]] = items[1].rstrip()	603 if len(items) == 1:

	604 items.append('??')

	605 symbols[items[0]] = items[1].rstrip()

	606 if symbols:

	607 sys.stderr.write(' Found %d symbols in cache.\n' % len(symbols))

	608 else:

	609 sys.stderr.write(' No symbols found in cache.\n')

591	610

592 unresolved_addresses = sorted(	611 unresolved_addresses = sorted(

593 a for a in appeared_addresses if a not in address_symbol_dict)	612 a for a in appeared_addresses if a not in symbols)

594	613

595 if unresolved_addresses:	614 if not unresolved_addresses:

	615 sys.stderr.write(' No need to resolve any more addresses.\n')

	616 else:

	617 sys.stderr.write(' %d addresses are unresolved.\n' %

	618 len(unresolved_addresses))

596 prepared_data_dir = tempfile.mkdtemp()	619 prepared_data_dir = tempfile.mkdtemp()

597 try:	620 try:

598 prepare_symbol_info(maps_path, prepared_data_dir)	621 prepare_symbol_info(maps_path, prepared_data_dir)

599	622

600 symbols = find_runtime_symbols_list(	623 symbol_list = find_runtime_symbols_list(

601 prepared_data_dir, unresolved_addresses)	624 prepared_data_dir, unresolved_addresses)

602	625

603 for address, symbol in zip(unresolved_addresses, symbols):	626 for address, symbol in zip(unresolved_addresses, symbol_list):

	627 if not symbol:

	628 symbol = '??'

604 stripped_symbol = symbol.strip()	629 stripped_symbol = symbol.strip()

605 address_symbol_dict[address] = stripped_symbol	630 symbols[address] = stripped_symbol

606 symbol_f.write('%s %s\n' % (address, stripped_symbol))	631 symbol_f.write('%s %s\n' % (address, stripped_symbol))

607 finally:	632 finally:

608 shutil.rmtree(prepared_data_dir)	633 shutil.rmtree(prepared_data_dir)

609	634

610	635

611 def parse_policy(policy_path):	636 def parse_policy(policy_path):

612 """Parses policy file.	637 """Parses policy file.

613	638

614 A policy file contains component's names and their	639 A policy file contains component's names and their

615 stacktrace pattern written in regular expression.	640 stacktrace pattern written in regular expression.

616 Those patterns are matched against each symbols of	641 Those patterns are matched against each symbols of

617 each stacktraces in the order written in the policy file	642 each stacktraces in the order written in the policy file

618	643

619 Args:	644 Args:

620 policy_path: A path for a policy file.	645 policy_path: A path for a policy file.

621 Returns:	646 Returns:

622 A list containing component's name and its regex object	647 A list containing component's name and its regex object

623 """	648 """

624 with open(policy_path, mode='r') as policy_f:	649 with open(policy_path, mode='r') as policy_f:

625 policy_lines = policy_f.readlines()	650 policy_lines = policy_f.readlines()

626	651

627 policy_version = POLICY_DEEP_1	652 policy_version = POLICY_DEEP_1

628 if policy_lines[0].startswith('heap profile policy: '):	653 if policy_lines[0].startswith('heap profile policy: '):

629 policy_version = policy_lines[0][21:].strip()	654 policy_version = policy_lines[0][21:].strip()

630 policy_lines.pop(0)	655 policy_lines.pop(0)

631 policy_list = []	656 rule_list = []

	657 components = []

632	658

633 if policy_version == POLICY_DEEP_2 or policy_version == POLICY_DEEP_1:	659 if policy_version == POLICY_DEEP_2 or policy_version == POLICY_DEEP_1:

634 sys.stderr.write(' heap profile policy version: %s\n' % policy_version)

635 for line in policy_lines:	660 for line in policy_lines:

636 if line[0] == '#':	661 if line[0] == '#':

637 continue	662 continue

638	663

639 if policy_version == POLICY_DEEP_2:	664 if policy_version == POLICY_DEEP_2:

640 (name, allocation_type, pattern) = line.strip().split(None, 2)	665 (name, allocation_type, pattern) = line.strip().split(None, 2)

641 mmap = False	666 mmap = False

642 if allocation_type == 'mmap':	667 if allocation_type == 'mmap':

643 mmap = True	668 mmap = True

644 elif policy_version == POLICY_DEEP_1:	669 elif policy_version == POLICY_DEEP_1:

645 name = line.split()[0]	670 name = line.split()[0]

646 pattern = line[len(name) : len(line)].strip()	671 pattern = line[len(name) : len(line)].strip()

647 mmap = False	672 mmap = False

648	673

649 if pattern != 'default':	674 if pattern != 'default':

650 policy_list.append(Policy(name, mmap, pattern))	675 rule_list.append(Rule(name, mmap, pattern))

651 if components.count(name) == 0:	676 if components.count(name) == 0:

652 components.append(name)	677 components.append(name)

653	678

654 else:	679 else:

655 sys.stderr.write(' invalid heap profile policy version: %s\n' % (	680 sys.stderr.write(' invalid heap profile policy version: %s\n' % (

656 policy_version))	681 policy_version))

657	682

658 return policy_list	683 return rule_list, policy_version, components

659	684

660	685

661 def main():	686 def find_prefix(path):

662 if (len(sys.argv) < 4) or (not (sys.argv[1] in ['--csv',	687 return re.sub('\.[0-9][0-9][0-9][0-9]\.heap', '', path)

663 '--json',

664 '--expand',

665 '--list',

666 '--stacktrace',

667 '--pprof'])):

668 sys.stderr.write("""Usage:

669 %s [options] <chrome-binary> <policy> <profile> [component-name] [depth]

670	688

671 Options:

672 --csv Output result in csv format

673 --json Output result in json format

674 --stacktrace Convert raw address to symbol names

675 --list Lists components and their sizes

676 --expand Show all stacktraces in the specified component

677 of given depth with their sizes

678 --pprof Format the profile file so it can be processed

679 by pprof

680	689

681 Examples:	690 def load_buckets(prefix):

682 dmprof --csv Debug/chrome dmpolicy hprof.12345.0001.heap > result.csv

683 dmprof --json Debug/chrome dmpolicy hprof.12345.0001.heap > result.json

684 dmprof --list Debug/chrome dmpolicy hprof.12345.0012.heap

685 dmprof --expand Debug/chrome dmpolicy hprof.12345.0012.heap tc-webkit 4

686 dmprof --pprof Debug/chrome dmpolicy hprof.12345.0012.heap > for_pprof.txt

687 """ % (sys.argv[0]))

688 sys.exit(1)

689

690 action = sys.argv[1]

691 chrome_path = sys.argv[2]

692 policy_path = sys.argv[3]

693 log_path = sys.argv[4]

694

695 sys.stderr.write('parsing a policy file\n')

696 policy_list = parse_policy(policy_path)

697

698 p = re.compile('\.[0-9][0-9][0-9][0-9]\.heap')

699 prefix = p.sub('', log_path)

700 symbol_path = prefix + '.symbols'

701

702 sys.stderr.write('parsing the maps file\n')

703 maps_path = prefix + '.maps'

704 with open(maps_path, 'r') as maps_f:

705 maps_lines = maps_f.readlines()

706

707 # Reading buckets	691 # Reading buckets

708 sys.stderr.write('parsing the bucket file\n')	692 sys.stderr.write('Loading bucket files.\n')

709 buckets = {}	693 buckets = {}

710 bucket_count = 0	694 bucket_count = 0

711 n = 0	695 n = 0

712 while True:	696 while True:

713 buckets_path = '%s.%04d.buckets' % (prefix, n)	697 buckets_path = '%s.%04d.buckets' % (prefix, n)

714 if not os.path.exists(buckets_path):	698 if not os.path.exists(buckets_path):

715 if n > 10:	699 if n > 10:

716 break	700 break

717 n += 1	701 n += 1

718 continue	702 continue

719 sys.stderr.write('reading buckets from %s\n' % (buckets_path))	703 sys.stderr.write(' %s\n' % buckets_path)

720 with open(buckets_path, 'r') as buckets_f:	704 with open(buckets_path, 'r') as buckets_f:

721 for line in buckets_f:	705 for line in buckets_f:

722 words = line.split()	706 words = line.split()

723 buckets[int(words[0])] = Bucket(words[2:], words[1] == 'mmap')	707 buckets[int(words[0])] = Bucket(words[2:], words[1] == 'mmap')

724 n += 1	708 n += 1

725	709

726 log_path_list = [log_path]	710 return buckets

727	711

728 if action in ('--csv', '--json'):	712

729 # search for the sequence of files	713 def determine_dump_path_list(dump_path, prefix):

730 n = int(log_path[len(log_path) - 9 : len(log_path) - 5])	714 dump_path_list = [dump_path]

731 n += 1 # skip current file	715

732 while True:	716 # search for the sequence of files

733 p = '%s.%04d.heap' % (prefix, n)	717 n = int(dump_path[len(dump_path) - 9 : len(dump_path) - 5])

734 if os.path.exists(p):	718 n += 1 # skip current file

735 log_path_list.append(p)	719 while True:

736 else:	720 p = '%s.%04d.heap' % (prefix, n)

737 break	721 if os.path.exists(p):

738 n += 1	722 dump_path_list.append(p)

739

740 logs = []

741 for path in log_path_list:

742 new_log = Log(path)

743 sys.stderr.write('Parsing a dump: %s\n' % path)

744 try:

745 new_log.parse_log(buckets)

746 except EmptyDumpException:

747 sys.stderr.write(' WARNING: ignored an empty dump: %s\n' % path)

748 except ParsingException, e:

749 sys.stderr.write(' Error in parsing heap profile dump: %s\n' % e)

750 sys.exit(1)

751 else:	723 else:

752 logs.append(new_log)	724 break

753	725 n += 1

754 sys.stderr.write('getting symbols\n')	726

755 update_symbols(symbol_path, maps_lines, maps_path)	727 return dump_path_list

756	728

757 # TODO(dmikurube): Many modes now. Split them into separete functions.	729

758 if action == '--stacktrace':	730 def load_single_dump(dump_path, buckets, appeared_addresses):

759 logs[0].dump_stacktrace(buckets)	731 new_dump = Dump(dump_path)

760	732 try:

761 elif action == '--csv':	733 new_dump.parse_dump(buckets, appeared_addresses)

762 sys.stdout.write(','.join(components))	734 except EmptyDumpException:

763 sys.stdout.write('\n')	735 sys.stderr.write('... ignored an empty dump')

764	736 except ParsingException, e:

765 for log in logs:	737 sys.stderr.write('... error in parsing: %s' % e)

766 component_sizes = log.apply_policy(policy_list, buckets, logs[0].log_time)	738 sys.exit(1)

	739 else:

	740 sys.stderr.write(' (version: %s)' % new_dump.dump_version)

	741

	742 return new_dump

	743

	744

	745 def load_dump(dump_path, buckets):

	746 sys.stderr.write('Loading a heap dump file: "%s"' % dump_path)

	747 appeared_addresses = set()

	748 dump = load_single_dump(dump_path, buckets, appeared_addresses)

	749 sys.stderr.write('.\n')

	750 return dump, appeared_addresses

	751

	752

	753 def load_dumps(dump_path_list, buckets):

	754 sys.stderr.write('Loading heap dump files.\n')

	755 appeared_addresses = set()

	756 dumps = []

	757 for path in dump_path_list:

	758 sys.stderr.write(' %s' % path)

	759 dumps.append(load_single_dump(path, buckets, appeared_addresses))

	760 sys.stderr.write('\n')

	761 return dumps, appeared_addresses

	762

	763

	764 def load_and_update_symbol_cache(prefix, appeared_addresses):

	765 maps_path = prefix + '.maps'

	766 symbol_path = prefix + '.symbols'

	767 sys.stderr.write('Loading and updating symbol cache: "%s".\n' % symbol_path)

	768 symbols = {}

	769 update_symbols(symbol_path, maps_path, appeared_addresses, symbols)

	770 return symbols

	771

	772

	773 def load_default_policies():

	774 with open(POLICIES_JSON_PATH, mode='r') as policies_f:

	775 default_policies = json.load(policies_f)

	776 return default_policies

	777

	778

	779 def load_policy(policies_dict, policy_label):

	780 policy_file = policies_dict[policy_label]['file']

	781 policy_path = os.path.join(os.path.dirname(__file__), policy_file)

	782 rule_list, policy_version, components = parse_policy(policy_path)

	783 sys.stderr.write(' %s: %s (version: %s)\n' %

	784 (policy_label, policy_path, policy_version))

	785 return Policy(rule_list, policy_version, components)

	786

	787

	788 def load_policies_dict(policies_dict):

	789 sys.stderr.write('Loading policy files.\n')

	790 policies = {}

	791 for policy_label in policies_dict:

	792 policies[policy_label] = load_policy(policies_dict, policy_label)

	793 return policies

	794

	795

	796 def load_policies(options_policy):

	797 default_policies = load_default_policies()

	798 if options_policy:

	799 policy_labels = options_policy.split(',')

	800 specified_policies = {}

	801 for specified_policy in policy_labels:

	802 if specified_policy in default_policies:

	803 specified_policies[specified_policy] = (

	804 default_policies[specified_policy])

	805 policies = load_policies_dict(specified_policies)

	806 else:

	807 policies = load_policies_dict(default_policies)

	808 return policies

	809

	810

	811 def do_stacktrace(sys_argv):

	812 parser = optparse.OptionParser(usage='Usage: %prog stacktrace <dump>')

	813 options, args = parser.parse_args(sys_argv)

	814

	815 if len(args) != 2:

	816 parser.error('needs 1 argument.')

	817 return 1

	818

	819 dump_path = args[1]

	820

	821 prefix = find_prefix(dump_path)

	822 buckets = load_buckets(prefix)

	823 dump, appeared_addresses = load_dump(dump_path, buckets)

	824 symbols = load_and_update_symbol_cache(prefix, appeared_addresses)

	825

	826 dump.print_stacktrace(buckets, symbols)

	827

	828 return 0

	829

	830

	831 def do_csv(sys_argv):

	832 parser = optparse.OptionParser('Usage: %prog csv [-p POLICY] <first-dump>')

	833 parser.add_option('-p', '--policy', type='string', dest='policy',

	834 help='profile with POLICY', metavar='POLICY')

	835 options, args = parser.parse_args(sys_argv)

	836

	837 if len(args) != 2:

	838 parser.error('needs 1 argument.')

	839 return 1

	840

	841 dump_path = args[1]

	842

	843 prefix = find_prefix(dump_path)

	844 buckets = load_buckets(prefix)

	845 dumps, appeared_addresses = load_dumps(

	846 determine_dump_path_list(dump_path, prefix), buckets)

	847 symbols = load_and_update_symbol_cache(prefix, appeared_addresses)

	848 policies = load_policies(options.policy)

	849

	850 max_components = 0

	851 for policy in policies:

	852 max_components = max(max_components, len(policies[policy].components))

	853

	854 for policy in sorted(policies):

	855 rule_list = policies[policy].rules

	856 components = policies[policy].components

	857

	858 if len(policies) > 1:

	859 sys.stdout.write('%s%s\n' % (policy, ',' * (max_components - 1)))

	860 sys.stdout.write('%s%s\n' % (

	861 ','.join(components), ',' * (max_components - len(components))))

	862

	863 for dump in dumps:

	864 component_sizes = dump.apply_policy(

	865 rule_list, buckets, dumps[0].dump_time, components, symbols)

767 s = []	866 s = []

768 for c in components:	867 for c in components:

769 if c in ('hour', 'minute', 'second'):	868 if c in ('hour', 'minute', 'second'):

770 s.append('%05.5f' % (component_sizes[c]))	869 s.append('%05.5f' % (component_sizes[c]))

771 else:	870 else:

772 s.append('%05.5f' % (component_sizes[c] / 1024.0 / 1024.0))	871 s.append('%05.5f' % (component_sizes[c] / 1024.0 / 1024.0))

773 sys.stdout.write(','.join(s))	872 sys.stdout.write('%s%s\n' % (

774 sys.stdout.write('\n')	873 ','.join(s), ',' * (max_components - len(components))))

775	874

776 elif action == '--json':	875 for bucket in buckets.itervalues():

777 json_base = {	876 bucket.clear_component_cache()

778 'version': 'JSON_DEEP_1',	877

	878 return 0

	879

	880

	881 def do_json(sys_argv):

	882 parser = optparse.OptionParser('Usage: %prog json [-p POLICY] <first-dump>')

	883 parser.add_option('-p', '--policy', type='string', dest='policy',

	884 help='profile with POLICY', metavar='POLICY')

	885 options, args = parser.parse_args(sys_argv)

	886

	887 if len(args) != 2:

	888 parser.error('needs 1 argument.')

	889 return 1

	890

	891 dump_path = args[1]

	892

	893 prefix = find_prefix(dump_path)

	894 buckets = load_buckets(prefix)

	895 dumps, appeared_addresses = load_dumps(

	896 determine_dump_path_list(dump_path, prefix), buckets)

	897 symbols = load_and_update_symbol_cache(prefix, appeared_addresses)

	898 policies = load_policies(options.policy)

	899

	900 json_base = {

	901 'version': 'JSON_DEEP_2',

	902 'policies': {},

	903 }

	904

	905 for policy in sorted(policies):

	906 rule_list = policies[policy].rules

	907 components = policies[policy].components

	908

	909 json_base['policies'][policy] = {

779 'legends': components,	910 'legends': components,

780 'snapshots': [],	911 'snapshots': [],

781 }	912 }

782 for log in logs:	913

783 component_sizes = log.apply_policy(policy_list, buckets, logs[0].log_time)	914 for dump in dumps:

784 component_sizes['log_path'] = log.log_path	915 component_sizes = dump.apply_policy(

785 component_sizes['log_time'] = datetime.fromtimestamp(	916 rule_list, buckets, dumps[0].dump_time, components, symbols)

786 log.log_time).strftime('%Y-%m-%d %H:%M:%S')	917 component_sizes['dump_path'] = dump.dump_path

787 json_base['snapshots'].append(component_sizes)	918 component_sizes['dump_time'] = datetime.fromtimestamp(

788 json.dump(json_base, sys.stdout, indent=2, sort_keys=True)	919 dump.dump_time).strftime('%Y-%m-%d %H:%M:%S')

789	920 json_base['policies'][policy]['snapshots'].append(component_sizes)

790 elif action == '--list':	921

791 component_sizes = logs[0].apply_policy(	922 for bucket in buckets.itervalues():

792 policy_list, buckets, logs[0].log_time)	923 bucket.clear_component_cache()

	924

	925 json.dump(json_base, sys.stdout, indent=2, sort_keys=True)

	926

	927 return 0

	928

	929

	930 def do_list(sys_argv):

	931 parser = optparse.OptionParser('Usage: %prog [-p POLICY] list <first-dump>')

	932 parser.add_option('-p', '--policy', type='string', dest='policy',

	933 help='profile with POLICY', metavar='POLICY')

	934 options, args = parser.parse_args(sys_argv)

	935

	936 if len(args) != 2:

	937 parser.error('needs 1 argument.')

	938 return 1

	939

	940 dump_path = args[1]

	941

	942 prefix = find_prefix(dump_path)

	943 buckets = load_buckets(prefix)

	944 dumps, appeared_addresses = load_dumps(

	945 determine_dump_path_list(dump_path, prefix), buckets)

	946 symbols = load_and_update_symbol_cache(prefix, appeared_addresses)

	947 policies = load_policies(options.policy)

	948

	949 for policy in sorted(policies):

	950 rule_list = policies[policy].rules

	951 components = policies[policy].components

	952

	953 component_sizes = dumps[0].apply_policy(

	954 rule_list, buckets, dumps[0].dump_time, components, symbols)

	955 sys.stdout.write('%s:\n' % policy)

793 for c in components:	956 for c in components:

794 if c in ['hour', 'minute', 'second']:	957 if c in ['hour', 'minute', 'second']:

795 sys.stdout.write('%30s %10.3f\n' % (c, component_sizes[c]))	958 sys.stdout.write('%30s %10.3f\n' % (c, component_sizes[c]))

796 else:	959 else:

797 sys.stdout.write('%30s %10.3f\n' % (	960 sys.stdout.write('%30s %10.3f\n' % (

798 c, component_sizes[c] / 1024.0 / 1024.0))	961 c, component_sizes[c] / 1024.0 / 1024.0))

799	962

800 elif action == '--expand':	963 for bucket in buckets.itervalues():

801 component_name = sys.argv[5]	964 bucket.clear_component_cache()

802 depth = sys.argv[6]

803 logs[0].expand(policy_list, buckets, component_name, int(depth))

804	965

805 elif action == '--pprof':	966 return 0

806 if len(sys.argv) > 5:	967

807 logs[0].dump_for_pprof(policy_list, buckets, maps_lines, sys.argv[5])	968

808 else:	969 def do_expand(sys_argv):

809 logs[0].dump_for_pprof(policy_list, buckets, maps_lines, None)	970 parser = optparse.OptionParser(

	971 'Usage: %prog expand <dump> <policy> <component> <depth>')

	972 options, args = parser.parse_args(sys_argv)

	973

	974 if len(args) != 5:

	975 parser.error('needs 4 arguments.')

	976 return 1

	977

	978 dump_path = args[1]

	979 target_policy = args[2]

	980 component_name = args[3]

	981 depth = args[4]

	982

	983 prefix = find_prefix(dump_path)

	984 buckets = load_buckets(prefix)

	985 dump, appeared_addresses = load_dump(dump_path, buckets)

	986 symbols = load_and_update_symbol_cache(prefix, appeared_addresses)

	987 policies = load_policies(target_policy)

	988

	989 rule_list = policies[target_policy].rules

	990

	991 dump.expand(rule_list, buckets, component_name, int(depth), symbols)

	992

	993 return 0

	994

	995

	996 def do_pprof(sys_argv):

	997 parser = optparse.OptionParser(

	998 'Usage: %prog pprof [-c COMPONENT] <dump> <policy>')

	999 parser.add_option('-c', '--component', type='string', dest='component',

	1000 help='restrict to COMPONENT', metavar='COMPONENT')

	1001 options, args = parser.parse_args(sys_argv)

	1002

	1003 if len(args) != 3:

	1004 parser.error('needs 2 arguments.')

	1005 return 1

	1006

	1007 dump_path = args[1]

	1008 target_policy = args[2]

	1009 component = options.component

	1010

	1011 prefix = find_prefix(dump_path)

	1012 buckets = load_buckets(prefix)

	1013 dump, appeared_addresses = load_dump(dump_path, buckets)

	1014 symbols = load_and_update_symbol_cache(prefix, appeared_addresses)

	1015 policies = load_policies(target_policy)

	1016

	1017 rule_list = policies[target_policy].rules

	1018

	1019 with open(prefix + '.maps', 'r') as maps_f:

	1020 maps_lines = maps_f.readlines()

	1021 dump.print_for_pprof(rule_list, buckets, maps_lines, component, symbols)

	1022

	1023 return 0

	1024

	1025

	1026 def main():

	1027 COMMANDS = {

	1028 'csv': do_csv,

	1029 'expand': do_expand,

	1030 'json': do_json,

	1031 'list': do_list,

	1032 'pprof': do_pprof,

	1033 'stacktrace': do_stacktrace,

	1034 }

	1035

	1036 # TODO(dmikurube): Remove this message after a while.

	1037 if len(sys.argv) >= 2 and sys.argv[1].startswith('--'):

	1038 sys.stderr.write("""

	1039 ************** NOTICE!! **************

	1040 The command line format has changed.

	1041 Please look at the description below.

	1042 ******************************************

	1043

	1044 """)

	1045

	1046 if len(sys.argv) < 2 or (not sys.argv[1] in COMMANDS):

	1047 sys.stderr.write("""Usage: %s <command> [options] [<args>]

	1048

	1049 Commands:

	1050 csv Classify memory usage in CSV

	1051 expand Show all stacktraces contained in the specified component

	1052 json Classify memory usage in JSON

	1053 list Classify memory usage in simple listing format

	1054 pprof Format the profile dump so that it can be processed by pprof

	1055 stacktrace Convert runtime addresses to symbol names

	1056

	1057 Quick Reference:

	1058 dmprof csv [-p POLICY] <first-dump>

	1059 dmprof expand <dump> <policy> <component> <depth>

	1060 dmprof json [-p POLICY] <first-dump>

	1061 dmprof list [-p POLICY] <first-dump>

	1062 dmprof pprof [-c COMPONENT] <dump> <policy>

	1063 dmprof stacktrace <dump>

	1064 """ % (sys.argv[0]))

	1065 sys.exit(1)

	1066 action = sys.argv.pop(1)

	1067

	1068 return COMMANDS[action](sys.argv)

810	1069

811	1070

812 if __name__ == '__main__':	1071 if __name__ == '__main__':

813 sys.exit(main())	1072 sys.exit(main())

OLD	NEW

« no previous file with comments | « no previous file | tools/deep_memory_profiler/policies.json » ('j') | no next file with comments »