tools/deep_memory_profiler/dmprof - Issue 10802049: Change dmprof commandline format, and clean up start-up routines.

Side by Side Diff: tools/deep_memory_profiler/dmprof

Issue 10802049: Change dmprof commandline format, and clean up start-up routines. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: rebased Created 8 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 #!/usr/bin/env python	1 #!/usr/bin/env python

2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.	2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.

3 # Use of this source code is governed by a BSD-style license that can be	3 # Use of this source code is governed by a BSD-style license that can be

4 # found in the LICENSE file.	4 # found in the LICENSE file.

5	5

6 """The deep heap profiler script for Chrome."""	6 """The deep heap profiler script for Chrome."""

7	7

8 from datetime import datetime	8 from datetime import datetime

9 import json	9 import json

10 import os	10 import os

11 import re	11 import re

	12 from optparse import OptionParser

12 import shutil	13 import shutil

13 import subprocess	14 import subprocess

14 import sys	15 import sys

15 import tempfile	16 import tempfile

16	17

17 FIND_RUNTIME_SYMBOLS_PATH = os.path.join(	18 FIND_RUNTIME_SYMBOLS_PATH = os.path.join(

18 os.path.dirname(os.path.abspath(__file__)),	19 os.path.dirname(os.path.abspath(__file__)),

19 os.pardir,	20 os.pardir,

20 'find_runtime_symbols')	21 'find_runtime_symbols')

21 sys.path.append(FIND_RUNTIME_SYMBOLS_PATH)	22 sys.path.append(FIND_RUNTIME_SYMBOLS_PATH)

22	23

23 from prepare_symbol_info import prepare_symbol_info	24 from prepare_symbol_info import prepare_symbol_info

24 from find_runtime_symbols import find_runtime_symbols_list	25 from find_runtime_symbols import find_runtime_symbols_list

25	26

26 BUCKET_ID = 5	27 BUCKET_ID = 5

27 VIRTUAL = 0	28 VIRTUAL = 0

28 COMMITTED = 1	29 COMMITTED = 1

29 ALLOC_COUNT = 2	30 ALLOC_COUNT = 2

30 FREE_COUNT = 3	31 FREE_COUNT = 3

31 NULL_REGEX = re.compile('')	32 NULL_REGEX = re.compile('')

32	33

	34 POLICIES_JSON_PATH = os.path.join(os.path.dirname(__file__), 'policies.json')
	M-A Ruel 2012/07/24 14:57:59 os.path.dirname(os.path.abspath(__file__)) for the os.path.dirname(os.path.abspath(__file__)) for the same reason I mentioned earlier. Dai Mikurube (NOT FULLTIME) 2012/07/24 16:19:24 Ah, I forgot it. Done. Show quoted text On 2012/07/24 14:57:59, Marc-Antoine Ruel wrote: > os.path.dirname(os.path.abspath(__file__)) for the same reason I mentioned > earlier. Ah, I forgot it. Done.
	35

33 # Heap Profile Dump versions	36 # Heap Profile Dump versions

34	37

35 # DUMP_DEEP_1 is OBSOLETE.	38 # DUMP_DEEP_1 is OBSOLETE.

36 # DUMP_DEEP_1 DOES NOT distinct mmap regions and malloc chunks.	39 # DUMP_DEEP_1 DOES NOT distinct mmap regions and malloc chunks.

37 # Their stacktraces DO contain mmap* or tc-* at their tops.	40 # Their stacktraces DO contain mmap* or tc-* at their tops.

38 # They should be processed by POLICY_DEEP_1.	41 # They should be processed by POLICY_DEEP_1.

39 DUMP_DEEP_1 = 'DUMP_DEEP_1'	42 DUMP_DEEP_1 = 'DUMP_DEEP_1'

40	43

41 # DUMP_DEEP_2 is OBSOLETE.	44 # DUMP_DEEP_2 is OBSOLETE.

42 # DUMP_DEEP_2 DOES distinct mmap regions and malloc chunks.	45 # DUMP_DEEP_2 DOES distinct mmap regions and malloc chunks.

(...skipping 22 matching lines...) Expand all Loading...
65 # Heap Profile Policy versions	68 # Heap Profile Policy versions

66	69

67 # POLICY_DEEP_1 DOES NOT include allocation_type columns.	70 # POLICY_DEEP_1 DOES NOT include allocation_type columns.

68 # mmap regions are distincted w/ mmap frames in the pattern column.	71 # mmap regions are distincted w/ mmap frames in the pattern column.

69 POLICY_DEEP_1 = 'POLICY_DEEP_1'	72 POLICY_DEEP_1 = 'POLICY_DEEP_1'

70	73

71 # POLICY_DEEP_2 DOES include allocation_type columns.	74 # POLICY_DEEP_2 DOES include allocation_type columns.

72 # mmap regions are distincted w/ the allocation_type column.	75 # mmap regions are distincted w/ the allocation_type column.

73 POLICY_DEEP_2 = 'POLICY_DEEP_2'	76 POLICY_DEEP_2 = 'POLICY_DEEP_2'

74	77

75 # TODO(dmikurube): Avoid global variables.

76 address_symbol_dict = {}

77 appeared_addresses = set()

78 components = []

79

80	78

81 class EmptyDumpException(Exception):	79 class EmptyDumpException(Exception):

82 def __init__(self, value):	80 def __init__(self, value):

83 self.value = value	81 self.value = value

84 def __str__(self):	82 def __str__(self):

85 return repr(self.value)	83 return repr(self.value)

86	84

87	85

88 class ParsingException(Exception):	86 class ParsingException(Exception):

89 def __init__(self, value):	87 def __init__(self, value):

90 self.value = value	88 self.value = value

91 def __str__(self):	89 def __str__(self):

92 return repr(self.value)	90 return repr(self.value)

93	91

94	92

95 class InvalidDumpException(ParsingException):	93 class InvalidDumpException(ParsingException):

96 def __init__(self, value):	94 def __init__(self, value):

97 self.value = value	95 self.value = value

98 def __str__(self):	96 def __str__(self):

99 return "invalid heap profile dump: %s" % repr(self.value)	97 return "invalid heap profile dump: %s" % repr(self.value)

100	98

101	99

102 class ObsoleteDumpVersionException(ParsingException):	100 class ObsoleteDumpVersionException(ParsingException):

103 def __init__(self, value):	101 def __init__(self, value):

104 self.value = value	102 self.value = value

105 def __str__(self):	103 def __str__(self):

106 return "obsolete heap profile dump version: %s" % repr(self.value)	104 return "obsolete heap profile dump version: %s" % repr(self.value)

107	105

108	106

109 class Policy(object):	107 class Rule(object):

	108 """Represents one matching rule in a policy file."""

110	109

111 def __init__(self, name, mmap, pattern):	110 def __init__(self, name, mmap, pattern):

112 self.name = name	111 self.name = name

113 self.mmap = mmap	112 self.mmap = mmap

114 self.condition = re.compile(pattern + r'\Z')	113 self.condition = re.compile(pattern + r'\Z')

115	114

116	115

117 def get_component(policy_list, bucket):	116 class Policy(object):

	117 """Represents a policy, a content of a policy file."""

	118

	119 def __init__(self, rules, version, components):

	120 self.rules = rules

	121 self.version = version

	122 self.components = components

	123

	124 def append_rule(self, rule):

	125 self.rules.append(rule)

	126

	127

	128 def get_component(rule_list, bucket, symbols):

118 """Returns a component name which a given bucket belongs to.	129 """Returns a component name which a given bucket belongs to.

119	130

120 Args:	131 Args:

121 policy_list: A list containing Policy objects. (Parsed policy data by	132 rule_list: A list of Rule objects.

122 parse_policy.)

123 bucket: A Bucket object to be searched for.	133 bucket: A Bucket object to be searched for.

	134 symbols: A dict mapping runtime addresses to symbol names.

124	135

125 Returns:	136 Returns:

126 A string representing a component name.	137 A string representing a component name.

127 """	138 """

128 if not bucket:	139 if not bucket:

129 return 'no-bucket'	140 return 'no-bucket'

130 if bucket.component:	141 if bucket.component_cache:

131 return bucket.component	142 return bucket.component_cache

132	143

133 stacktrace = ''.join(	144 stacktrace = ''.join(symbols[a] + ' ' for a in bucket.stacktrace).strip()

134 address_symbol_dict[a] + ' ' for a in bucket.stacktrace).strip()

135	145

136 for policy in policy_list:	146 for rule in rule_list:

137 if bucket.mmap == policy.mmap and policy.condition.match(stacktrace):	147 if bucket.mmap == rule.mmap and rule.condition.match(stacktrace):

138 bucket.component = policy.name	148 bucket.component_cache = rule.name

139 return policy.name	149 return rule.name

140	150

141 assert False	151 assert False

142	152

143	153

144 class Bucket(object):	154 class Bucket(object):

	155 """Represents a bucket, which is a unit of memory classification."""

145	156

146 def __init__(self, stacktrace, mmap):	157 def __init__(self, stacktrace, mmap):

147 self.stacktrace = stacktrace	158 self.stacktrace = stacktrace

148 self.mmap = mmap	159 self.mmap = mmap

149 self.component = ''	160 self.component_cache = ''

	161

	162 def clear_component_cache(self):

	163 self.component_cache = ''

150	164

151	165

152 class Log(object):	166 class Dump(object):

	167 """Represents one heap profile dump."""

153	168

154 """A class representing one dumped log data."""	169 def __init__(self, dump_path):

155 def __init__(self, log_path):	170 self.dump_path = dump_path

156 self.log_path = log_path	171 self.dump_lines = [

157 self.log_lines = [	172 l for l in open(self.dump_path, 'r') if l and not l.startswith('#')]

158 l for l in open(self.log_path, 'r') if l and not l.startswith('#')]	173 self.dump_version = ''

159 self.log_version = ''

160 sys.stderr.write('Loading a dump: %s\n' % log_path)

161 self.stacktrace_lines = []	174 self.stacktrace_lines = []

162 self.counters = {}	175 self.counters = {}

163 self.log_time = os.stat(self.log_path).st_mtime	176 self.dump_time = os.stat(self.dump_path).st_mtime

164	177

165 def dump_stacktrace(buckets):	178 def print_stacktrace(self, buckets, symbols):

166 """Prints a given stacktrace.	179 """Prints a given stacktrace.

167	180

168 Args:	181 Args:

169 buckets: A dict mapping bucket ids and their corresponding Bucket	182 buckets: A dict mapping bucket ids to Bucket objects.

170 objects.	183 symbols: A dict mapping runtime addresses to symbol names.

171 """	184 """

172 for line in self.stacktrace_lines:	185 for line in self.stacktrace_lines:

173 words = line.split()	186 words = line.split()

174 bucket = buckets.get(int(words[BUCKET_ID]))	187 bucket = buckets.get(int(words[BUCKET_ID]))

175 if not bucket:	188 if not bucket:

176 continue	189 continue

177 for i in range(0, BUCKET_ID - 1):	190 for i in range(0, BUCKET_ID - 1):

178 sys.stdout.write(words[i] + ' ')	191 sys.stdout.write(words[i] + ' ')

179 for address in bucket.stacktrace:	192 for address in bucket.stacktrace:

180 sys.stdout.write((address_symbol_dict.get(address) or address) + ' ')	193 sys.stdout.write((symbols.get(address) or address) + ' ')

181 sys.stdout.write('\n')	194 sys.stdout.write('\n')

182	195

183 @staticmethod	196 @staticmethod

184 def accumulate_size_for_pprof(stacktrace_lines, policy_list, buckets,	197 def accumulate_size_for_pprof(stacktrace_lines, rule_list, buckets,

185 component_name):	198 component_name, symbols):

186 """Accumulates size of committed chunks and the number of allocated chunks.	199 """Accumulates size of committed chunks and the number of allocated chunks.

187	200

188 Args:	201 Args:

189 stacktrace_lines: A list of strings which are valid as stacktraces.	202 stacktrace_lines: A list of strings which are valid as stacktraces.

190 policy_list: A list containing Policy objects. (Parsed policy data by	203 rule_list: A list of Rule objects.

191 parse_policy.)	204 buckets: A dict mapping bucket ids to Bucket objects.

192 buckets: A dict mapping bucket ids and their corresponding Bucket

193 objects.

194 component_name: A name of component for filtering.	205 component_name: A name of component for filtering.

	206 symbols: A dict mapping runtime addresses to symbol names.

195	207

196 Returns:	208 Returns:

197 Two integers which are the accumulated size of committed regions and the	209 Two integers which are the accumulated size of committed regions and the

198 number of allocated chunks, respectively.	210 number of allocated chunks, respectively.

199 """	211 """

200 com_committed = 0	212 com_committed = 0

201 com_allocs = 0	213 com_allocs = 0

202 for line in stacktrace_lines:	214 for line in stacktrace_lines:

203 words = line.split()	215 words = line.split()

204 bucket = buckets.get(int(words[BUCKET_ID]))	216 bucket = buckets.get(int(words[BUCKET_ID]))

205 if (not bucket or	217 if (not bucket or

206 (component_name and	218 (component_name and

207 component_name != get_component(policy_list, bucket))):	219 component_name != get_component(rule_list, bucket, symbols))):

208 continue	220 continue

209	221

210 com_committed += int(words[COMMITTED])	222 com_committed += int(words[COMMITTED])

211 com_allocs += int(words[ALLOC_COUNT]) - int(words[FREE_COUNT])	223 com_allocs += int(words[ALLOC_COUNT]) - int(words[FREE_COUNT])

212	224

213 return com_committed, com_allocs	225 return com_committed, com_allocs

214	226

215 @staticmethod	227 @staticmethod

216 def dump_stacktrace_lines_for_pprof(stacktrace_lines, policy_list,	228 def print_stacktrace_lines_for_pprof(stacktrace_lines, rule_list,

217 buckets, component_name):	229 buckets, component_name, symbols):

218 """Prints information of stacktrace lines for pprof.	230 """Prints information of stacktrace lines for pprof.

219	231

220 Args:	232 Args:

221 stacktrace_lines: A list of strings which are valid as stacktraces.	233 stacktrace_lines: A list of strings which are valid as stacktraces.

222 policy_list: A list containing Policy objects. (Parsed policy data by	234 rule_list: A list of Rule objects.

223 parse_policy.)	235 buckets: A dict mapping bucket ids to Bucket objects.

224 buckets: A dict mapping bucket ids and their corresponding Bucket

225 objects.

226 component_name: A name of component for filtering.	236 component_name: A name of component for filtering.

	237 symbols: A dict mapping runtime addresses to symbol names.

227 """	238 """

228 for line in stacktrace_lines:	239 for line in stacktrace_lines:

229 words = line.split()	240 words = line.split()

230 bucket = buckets.get(int(words[BUCKET_ID]))	241 bucket = buckets.get(int(words[BUCKET_ID]))

231 if (not bucket or	242 if (not bucket or

232 (component_name and	243 (component_name and

233 component_name != get_component(policy_list, bucket))):	244 component_name != get_component(rule_list, bucket, symbols))):

234 continue	245 continue

235	246

236 sys.stdout.write('%6d: %8s [%6d: %8s] @' % (	247 sys.stdout.write('%6d: %8s [%6d: %8s] @' % (

237 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]),	248 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]),

238 words[COMMITTED],	249 words[COMMITTED],

239 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]),	250 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]),

240 words[COMMITTED]))	251 words[COMMITTED]))

241 for address in bucket.stacktrace:	252 for address in bucket.stacktrace:

242 sys.stdout.write(' ' + address)	253 sys.stdout.write(' ' + address)

243 sys.stdout.write('\n')	254 sys.stdout.write('\n')

244	255

245 def dump_for_pprof(self, policy_list, buckets, mapping_lines, component_name):	256 def print_for_pprof(

246 """Converts the log file so it can be processed by pprof.	257 self, rule_list, buckets, maps_lines, component_name, symbols):

	258 """Converts the heap profile dump so it can be processed by pprof.

247	259

248 Args:	260 Args:

249 policy_list: A list containing Policy objects. (Parsed policy data by	261 rule_list: A list of Rule objects.

250 parse_policy.)	262 buckets: A dict mapping bucket ids to Bucket objects.

251 buckets: A dict mapping bucket ids and their corresponding Bucket	263 maps_lines: A list of strings containing /proc/.../maps.

252 objects.

253 mapping_lines: A list of strings containing /proc/.../maps.

254 component_name: A name of component for filtering.	264 component_name: A name of component for filtering.

	265 symbols: A dict mapping runtime addresses to symbol names.

255 """	266 """

256 sys.stdout.write('heap profile: ')	267 sys.stdout.write('heap profile: ')

257 com_committed, com_allocs = self.accumulate_size_for_pprof(	268 com_committed, com_allocs = self.accumulate_size_for_pprof(

258 self.stacktrace_lines, policy_list, buckets, component_name)	269 self.stacktrace_lines, rule_list, buckets, component_name, symbols)

259	270

260 sys.stdout.write('%6d: %8s [%6d: %8s] @ heapprofile\n' % (	271 sys.stdout.write('%6d: %8s [%6d: %8s] @ heapprofile\n' % (

261 com_allocs, com_committed, com_allocs, com_committed))	272 com_allocs, com_committed, com_allocs, com_committed))

262	273

263 self.dump_stacktrace_lines_for_pprof(	274 self.print_stacktrace_lines_for_pprof(

264 self.stacktrace_lines, policy_list, buckets, component_name)	275 self.stacktrace_lines, rule_list, buckets, component_name, symbols)

265	276

266 sys.stdout.write('MAPPED_LIBRARIES:\n')	277 sys.stdout.write('MAPPED_LIBRARIES:\n')

267 for line in mapping_lines:	278 for line in maps_lines:

268 sys.stdout.write(line)	279 sys.stdout.write(line)

269	280

270 @staticmethod	281 @staticmethod

271 def check_stacktrace_line(stacktrace_line, buckets):	282 def check_stacktrace_line(stacktrace_line, buckets, appeared_addresses):

272 """Checks if a given stacktrace_line is valid as stacktrace.	283 """Checks if a given stacktrace_line is valid as stacktrace.

273	284

274 Args:	285 Args:

275 stacktrace_line: A string to be checked.	286 stacktrace_line: A string to be checked.

276 buckets: A dict mapping bucket ids and their corresponding Bucket	287 buckets: A dict mapping bucket ids to Bucket objects.

277 objects.	288 appeared_addresses: A list where appeared addresses will be stored.

278	289

279 Returns:	290 Returns:

280 True if the given stacktrace_line is valid.	291 True if the given stacktrace_line is valid.

281 """	292 """

282 words = stacktrace_line.split()	293 words = stacktrace_line.split()

283 if len(words) < BUCKET_ID + 1:	294 if len(words) < BUCKET_ID + 1:

284 return False	295 return False

285 if words[BUCKET_ID - 1] != '@':	296 if words[BUCKET_ID - 1] != '@':

286 return False	297 return False

287 bucket = buckets.get(int(words[BUCKET_ID]))	298 bucket = buckets.get(int(words[BUCKET_ID]))

(...skipping 10 matching lines...) Expand all Loading...
298 A pair of an integer indicating a line number after skipped, and a	309 A pair of an integer indicating a line number after skipped, and a

299 boolean value which is True if found a line which skipping_condition	310 boolean value which is True if found a line which skipping_condition

300 is False for.	311 is False for.

301 """	312 """

302 while skipping_condition(line_number):	313 while skipping_condition(line_number):

303 line_number += 1	314 line_number += 1

304 if line_number >= max_line_number:	315 if line_number >= max_line_number:

305 return line_number, False	316 return line_number, False

306 return line_number, True	317 return line_number, True

307	318

308 def parse_stacktraces_while_valid(self, buckets, log_lines, line_number):	319 def parse_stacktraces_while_valid(

	320 self, buckets, dump_lines, line_number, appeared_addresses):

309 """Parses stacktrace lines while the lines are valid.	321 """Parses stacktrace lines while the lines are valid.

310	322

311 Args:	323 Args:

312 buckets: A dict mapping bucket ids and their corresponding Bucket	324 buckets: A dict mapping bucket ids to Bucket objects.

313 objects.	325 dump_lines: A list of lines to be parsed.

314 log_lines: A list of lines to be parsed.	326 line_number: A line number to start parsing in dump_lines.

315 line_number: An integer representing the starting line number in	327 appeared_addresses: A list where appeared addresses will be stored.

316 log_lines.

317	328

318 Returns:	329 Returns:

319 A pair of a list of valid lines and an integer representing the last	330 A pair of a list of valid lines and an integer representing the last

320 line number in log_lines.	331 line number in dump_lines.

321 """	332 """

322 (line_number, _) = self.skip_lines_while(	333 (line_number, _) = self.skip_lines_while(

323 line_number, len(log_lines),	334 line_number, len(dump_lines),

324 lambda n: not log_lines[n].split()[0].isdigit())	335 lambda n: not dump_lines[n].split()[0].isdigit())

325 stacktrace_lines_start = line_number	336 stacktrace_lines_start = line_number

326 (line_number, _) = self.skip_lines_while(	337 (line_number, _) = self.skip_lines_while(

327 line_number, len(log_lines),	338 line_number, len(dump_lines),

328 lambda n: self.check_stacktrace_line(log_lines[n], buckets))	339 lambda n: self.check_stacktrace_line(

329 return (log_lines[stacktrace_lines_start:line_number], line_number)	340 dump_lines[n], buckets, appeared_addresses))

	341 return (dump_lines[stacktrace_lines_start:line_number], line_number)

330	342

331 def parse_stacktraces(self, buckets, line_number):	343 def parse_stacktraces(self, buckets, line_number, appeared_addresses):

332 """Parses lines in self.log_lines as stacktrace.	344 """Parses lines in self.dump_lines as stacktrace.

333	345

334 Valid stacktrace lines are stored into self.stacktrace_lines.	346 Valid stacktrace lines are stored into self.stacktrace_lines.

335	347

336 Args:	348 Args:

337 buckets: A dict mapping bucket ids and their corresponding Bucket	349 buckets: A dict mapping bucket ids to Bucket objects.

338 objects.	350 line_number: A line number to start parsing in dump_lines.

339 line_number: An integer representing the starting line number in	351 appeared_addresses: A list where appeared addresses will be stored.

340 log_lines.

341	352

342 Raises:	353 Raises:

343 ParsingException for invalid dump versions.	354 ParsingException for invalid dump versions.

344 """	355 """

345 sys.stderr.write(' Version: %s\n' % self.log_version)	356 if self.dump_version == DUMP_DEEP_5:

346

347 if self.log_version == DUMP_DEEP_5:

348 (self.stacktrace_lines, line_number) = (	357 (self.stacktrace_lines, line_number) = (

349 self.parse_stacktraces_while_valid(	358 self.parse_stacktraces_while_valid(

350 buckets, self.log_lines, line_number))	359 buckets, self.dump_lines, line_number, appeared_addresses))

351	360

352 elif self.log_version in DUMP_DEEP_OBSOLETE:	361 elif self.dump_version in DUMP_DEEP_OBSOLETE:

353 raise ObsoleteDumpVersionException(self.log_version)	362 raise ObsoleteDumpVersionException(self.dump_version)

354	363

355 else:	364 else:

356 raise InvalidDumpException('Invalid version: %s' % self.log_version)	365 raise InvalidDumpException('Invalid version: %s' % self.dump_version)

357	366

358 def parse_global_stats(self):	367 def parse_global_stats(self):

359 """Parses lines in self.log_lines as global stats."""	368 """Parses lines in self.dump_lines as global stats."""

360 (ln, _) = self.skip_lines_while(	369 (ln, _) = self.skip_lines_while(

361 0, len(self.log_lines),	370 0, len(self.dump_lines),

362 lambda n: self.log_lines[n] != 'GLOBAL_STATS:\n')	371 lambda n: self.dump_lines[n] != 'GLOBAL_STATS:\n')

363	372

364 global_stat_names = [	373 global_stat_names = [

365 'total', 'file-exec', 'file-nonexec', 'anonymous', 'stack', 'other',	374 'total', 'file-exec', 'file-nonexec', 'anonymous', 'stack', 'other',

366 'nonprofiled-absent', 'nonprofiled-anonymous',	375 'nonprofiled-absent', 'nonprofiled-anonymous',

367 'nonprofiled-file-exec', 'nonprofiled-file-nonexec',	376 'nonprofiled-file-exec', 'nonprofiled-file-nonexec',

368 'nonprofiled-stack', 'nonprofiled-other',	377 'nonprofiled-stack', 'nonprofiled-other',

369 'profiled-mmap', 'profiled-malloc']	378 'profiled-mmap', 'profiled-malloc']

370	379

371 for prefix in global_stat_names:	380 for prefix in global_stat_names:

372 (ln, _) = self.skip_lines_while(	381 (ln, _) = self.skip_lines_while(

373 ln, len(self.log_lines),	382 ln, len(self.dump_lines),

374 lambda n: self.log_lines[n].split()[0] != prefix)	383 lambda n: self.dump_lines[n].split()[0] != prefix)

375 words = self.log_lines[ln].split()	384 words = self.dump_lines[ln].split()

376 self.counters[prefix + '_virtual'] = int(words[-2])	385 self.counters[prefix + '_virtual'] = int(words[-2])

377 self.counters[prefix + '_committed'] = int(words[-1])	386 self.counters[prefix + '_committed'] = int(words[-1])

378	387

379 def parse_version(self):	388 def parse_version(self):

380 """Parses a version string in self.log_lines.	389 """Parses a version string in self.dump_lines.

381	390

382 Returns:	391 Returns:

383 A pair of (a string representing a version of the stacktrace dump,	392 A pair of (a string representing a version of the stacktrace dump,

384 and an integer indicating a line number next to the version string).	393 and an integer indicating a line number next to the version string).

385	394

386 Raises:	395 Raises:

387 ParsingException for invalid dump versions.	396 ParsingException for invalid dump versions.

388 """	397 """

389 version = ''	398 version = ''

390	399

391 # Skip until an identifiable line.	400 # Skip until an identifiable line.

392 headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ')	401 headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ')

393 if not self.log_lines:	402 if not self.dump_lines:

394 raise EmptyDumpException('Empty heap dump file.')	403 raise EmptyDumpException('Empty heap dump file.')

395 (ln, found) = self.skip_lines_while(	404 (ln, found) = self.skip_lines_while(

396 0, len(self.log_lines),	405 0, len(self.dump_lines),

397 lambda n: not self.log_lines[n].startswith(headers))	406 lambda n: not self.dump_lines[n].startswith(headers))

398 if not found:	407 if not found:

399 raise InvalidDumpException('No version header.')	408 raise InvalidDumpException('No version header.')

400	409

401 # Identify a version.	410 # Identify a version.

402 if self.log_lines[ln].startswith('heap profile: '):	411 if self.dump_lines[ln].startswith('heap profile: '):

403 version = self.log_lines[ln][13:].strip()	412 version = self.dump_lines[ln][13:].strip()

404 if version == DUMP_DEEP_5:	413 if version == DUMP_DEEP_5:

405 (ln, _) = self.skip_lines_while(	414 (ln, _) = self.skip_lines_while(

406 ln, len(self.log_lines),	415 ln, len(self.dump_lines),

407 lambda n: self.log_lines[n] != 'STACKTRACES:\n')	416 lambda n: self.dump_lines[n] != 'STACKTRACES:\n')

408 elif version in DUMP_DEEP_OBSOLETE:	417 elif version in DUMP_DEEP_OBSOLETE:

409 raise ObsoleteDumpVersionException(version)	418 raise ObsoleteDumpVersionException(version)

410 else:	419 else:

411 raise InvalidDumpException('Invalid version: %s' % version)	420 raise InvalidDumpException('Invalid version: %s' % version)

412 elif self.log_lines[ln] == 'STACKTRACES:\n':	421 elif self.dump_lines[ln] == 'STACKTRACES:\n':

413 raise ObsoleteDumpVersionException(DUMP_DEEP_1)	422 raise ObsoleteDumpVersionException(DUMP_DEEP_1)

414 elif self.log_lines[ln] == 'MMAP_STACKTRACES:\n':	423 elif self.dump_lines[ln] == 'MMAP_STACKTRACES:\n':

415 raise ObsoleteDumpVersionException(DUMP_DEEP_2)	424 raise ObsoleteDumpVersionException(DUMP_DEEP_2)

416	425

417 return (version, ln)	426 return (version, ln)

418	427

419 def parse_log(self, buckets):	428 def parse_dump(self, buckets, appeared_addresses):

420 self.log_version, ln = self.parse_version()	429 self.dump_version, ln = self.parse_version()

421 self.parse_global_stats()	430 self.parse_global_stats()

422 self.parse_stacktraces(buckets, ln)	431 self.parse_stacktraces(buckets, ln, appeared_addresses)

423	432

424 @staticmethod	433 @staticmethod

425 def accumulate_size_for_policy(stacktrace_lines,	434 def accumulate_size_for_policy(stacktrace_lines,

426 policy_list, buckets, sizes):	435 rule_list, buckets, sizes, symbols):

427 for line in stacktrace_lines:	436 for line in stacktrace_lines:

428 words = line.split()	437 words = line.split()

429 bucket = buckets.get(int(words[BUCKET_ID]))	438 bucket = buckets.get(int(words[BUCKET_ID]))

430 component_match = get_component(policy_list, bucket)	439 component_match = get_component(rule_list, bucket, symbols)

	440

431 sizes[component_match] += int(words[COMMITTED])	441 sizes[component_match] += int(words[COMMITTED])

432	442

433 if component_match.startswith('tc-'):	443 if component_match.startswith('tc-'):

434 sizes['tc-total-log'] += int(words[COMMITTED])	444 sizes['tc-total-log'] += int(words[COMMITTED])

435 elif component_match.startswith('mmap-'):	445 elif component_match.startswith('mmap-'):

436 sizes['mmap-total-log'] += int(words[COMMITTED])	446 sizes['mmap-total-log'] += int(words[COMMITTED])

437 else:	447 else:

438 sizes['other-total-log'] += int(words[COMMITTED])	448 sizes['other-total-log'] += int(words[COMMITTED])

439	449

440 def apply_policy(self, policy_list, buckets, first_log_time):	450 def apply_policy(

	451 self, rule_list, buckets, first_dump_time, components, symbols):

441 """Aggregates the total memory size of each component.	452 """Aggregates the total memory size of each component.

442	453

443 Iterate through all stacktraces and attribute them to one of the components	454 Iterate through all stacktraces and attribute them to one of the components

444 based on the policy. It is important to apply policy in right order.	455 based on the policy. It is important to apply policy in right order.

445	456

446 Args:	457 Args:

447 policy_list: A list containing Policy objects. (Parsed policy data by	458 rule_list: A list of Rule objects.

448 parse_policy.)	459 buckets: A dict mapping bucket ids to Bucket objects.

449 buckets: A dict mapping bucket ids and their corresponding Bucket	460 first_dump_time: An integer representing time when the first dump is

450 objects.

451 first_log_time: An integer representing time when the first log is

452 dumped.	461 dumped.

	462 components: A list of strings of component names.

	463 symbols: A dict mapping runtime addresses to symbol names.

453	464

454 Returns:	465 Returns:

455 A dict mapping components and their corresponding sizes.	466 A dict mapping components and their corresponding sizes.

456 """	467 """

457	468

458 sys.stderr.write('apply policy:%s\n' % (self.log_path))	469 sys.stderr.write('Applying policy: "%s".\n' % self.dump_path)

459 sizes = dict((c, 0) for c in components)	470 sizes = dict((c, 0) for c in components)

460	471

461 self.accumulate_size_for_policy(self.stacktrace_lines,	472 self.accumulate_size_for_policy(self.stacktrace_lines,

462 policy_list, buckets, sizes)	473 rule_list, buckets, sizes, symbols)

463	474

464 mmap_prefix = 'profiled-mmap'	475 mmap_prefix = 'profiled-mmap'

465 malloc_prefix = 'profiled-malloc'	476 malloc_prefix = 'profiled-malloc'

466	477

467 sizes['mmap-no-log'] = (	478 sizes['mmap-no-log'] = (

468 self.counters['%s_committed' % mmap_prefix] - sizes['mmap-total-log'])	479 self.counters['%s_committed' % mmap_prefix] - sizes['mmap-total-log'])

469 sizes['mmap-total-record'] = self.counters['%s_committed' % mmap_prefix]	480 sizes['mmap-total-record'] = self.counters['%s_committed' % mmap_prefix]

470 sizes['mmap-total-record-vm'] = self.counters['%s_virtual' % mmap_prefix]	481 sizes['mmap-total-record-vm'] = self.counters['%s_virtual' % mmap_prefix]

471	482

472 sizes['tc-no-log'] = (	483 sizes['tc-no-log'] = (

(...skipping 34 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
507 'nonprofiled-stack_committed',	518 'nonprofiled-stack_committed',

508 'nonprofiled-other_committed')	519 'nonprofiled-other_committed')

509 sizes['mustbezero'] = (	520 sizes['mustbezero'] = (

510 self.counters['total_committed'] -	521 self.counters['total_committed'] -

511 sum(self.counters[i] for i in removed))	522 sum(self.counters[i] for i in removed))

512 if 'total-exclude-profiler' in sizes:	523 if 'total-exclude-profiler' in sizes:

513 sizes['total-exclude-profiler'] = (	524 sizes['total-exclude-profiler'] = (

514 self.counters['total_committed'] -	525 self.counters['total_committed'] -

515 (sizes['mmap-profiler'] + sizes['mmap-allocated-type']))	526 (sizes['mmap-profiler'] + sizes['mmap-allocated-type']))

516 if 'hour' in sizes:	527 if 'hour' in sizes:

517 sizes['hour'] = (self.log_time - first_log_time) / 60.0 / 60.0	528 sizes['hour'] = (self.dump_time - first_dump_time) / 60.0 / 60.0

518 if 'minute' in sizes:	529 if 'minute' in sizes:

519 sizes['minute'] = (self.log_time - first_log_time) / 60.0	530 sizes['minute'] = (self.dump_time - first_dump_time) / 60.0

520 if 'second' in sizes:	531 if 'second' in sizes:

521 sizes['second'] = self.log_time - first_log_time	532 sizes['second'] = self.dump_time - first_dump_time

522	533

523 return sizes	534 return sizes

524	535

525 @staticmethod	536 @staticmethod

526 def accumulate_size_for_expand(stacktrace_lines, policy_list, buckets,	537 def accumulate_size_for_expand(stacktrace_lines, rule_list, buckets,

527 component_name, depth, sizes):	538 component_name, depth, sizes, symbols):

528 for line in stacktrace_lines:	539 for line in stacktrace_lines:

529 words = line.split()	540 words = line.split()

530 bucket = buckets.get(int(words[BUCKET_ID]))	541 bucket = buckets.get(int(words[BUCKET_ID]))

531 component_match = get_component(policy_list, bucket)	542 component_match = get_component(rule_list, bucket, symbols)

532 if component_match == component_name:	543 if component_match == component_name:

533 stacktrace_sequence = ''	544 stacktrace_sequence = ''

534 for address in bucket.stacktrace[0 : min(len(bucket.stacktrace),	545 for address in bucket.stacktrace[0 : min(len(bucket.stacktrace),

535 1 + depth)]:	546 1 + depth)]:

536 stacktrace_sequence += address_symbol_dict[address] + ' '	547 stacktrace_sequence += symbols[address] + ' '

537 if not stacktrace_sequence in sizes:	548 if not stacktrace_sequence in sizes:

538 sizes[stacktrace_sequence] = 0	549 sizes[stacktrace_sequence] = 0

539 sizes[stacktrace_sequence] += int(words[COMMITTED])	550 sizes[stacktrace_sequence] += int(words[COMMITTED])

540	551

541 def expand(self, policy_list, buckets, component_name, depth):	552 def expand(self, rule_list, buckets, component_name, depth, symbols):

542 """Prints all stacktraces in a given component of given depth.	553 """Prints all stacktraces in a given component of given depth.

543	554

544 Args:	555 Args:

545 policy_list: A list containing Policy objects. (Parsed policy data by	556 rule_list: A list of Rule objects.

546 parse_policy.)	557 buckets: A dict mapping bucket ids to Bucket objects.

547 buckets: A dict mapping bucket ids and their corresponding Bucket

548 objects.

549 component_name: A name of component for filtering.	558 component_name: A name of component for filtering.

550 depth: An integer representing depth to be printed.	559 depth: An integer representing depth to be printed.

	560 symbols: A dict mapping runtime addresses to symbol names.

551 """	561 """

552 sizes = {}	562 sizes = {}

553	563

554 self.accumulate_size_for_expand(	564 self.accumulate_size_for_expand(

555 self.stacktrace_lines, policy_list, buckets, component_name,	565 self.stacktrace_lines, rule_list, buckets, component_name,

556 depth, sizes)	566 depth, sizes, symbols)

557	567

558 sorted_sizes_list = sorted(	568 sorted_sizes_list = sorted(

559 sizes.iteritems(), key=(lambda x: x[1]), reverse=True)	569 sizes.iteritems(), key=(lambda x: x[1]), reverse=True)

560 total = 0	570 total = 0

561 for size_pair in sorted_sizes_list:	571 for size_pair in sorted_sizes_list:

562 sys.stdout.write('%10d %s\n' % (size_pair[1], size_pair[0]))	572 sys.stdout.write('%10d %s\n' % (size_pair[1], size_pair[0]))

563 total += size_pair[1]	573 total += size_pair[1]

564 sys.stderr.write('total: %d\n' % (total))	574 sys.stderr.write('total: %d\n' % (total))

565	575

566	576

567 def update_symbols(symbol_path, mapping_lines, maps_path):	577 def update_symbols(

	578 symbol_path, maps_path, appeared_addresses, symbols):

568 """Updates address/symbol mapping on memory and in a .symbol cache file.	579 """Updates address/symbol mapping on memory and in a .symbol cache file.

569	580

570 It reads cached address/symbol mapping from a .symbol file if it exists.	581 It reads cached address/symbol mapping from a .symbol file if it exists.

571 Then, it resolves unresolved addresses from a Chrome binary with pprof.	582 Then, it resolves unresolved addresses from a Chrome binary with pprof.

572 Both mappings on memory and in a .symbol cache file are updated.	583 Both mappings on memory and in a .symbol cache file are updated.

573	584

574 Symbol files are formatted as follows:	585 Symbol files are formatted as follows:

575 <Address> <Symbol>	586 <Address> <Symbol>

576 <Address> <Symbol>	587 <Address> <Symbol>

577 <Address> <Symbol>	588 <Address> <Symbol>

578 ...	589 ...

579	590

580 Args:	591 Args:

581 symbol_path: A string representing a path for a .symbol file.	592 symbol_path: A string representing a path for a .symbol file.

582 mapping_lines: A list of strings containing /proc/.../maps.

583 maps_path: A string of the path of /proc/.../maps.	593 maps_path: A string of the path of /proc/.../maps.

	594 appeared_addresses: A list of known addresses.

	595 symbols: A dict mapping runtime addresses to symbol names.

584 """	596 """

585 with open(symbol_path, mode='a+') as symbol_f:	597 with open(symbol_path, mode='a+') as symbol_f:

586 symbol_lines = symbol_f.readlines()	598 symbol_lines = symbol_f.readlines()

587 if symbol_lines:	599 if symbol_lines:

588 for line in symbol_lines:	600 for line in symbol_lines:

589 items = line.split(None, 1)	601 items = line.split(None, 1)

590 address_symbol_dict[items[0]] = items[1].rstrip()	602 if len(items) == 1:

	603 items.append('??')

	604 symbols[items[0]] = items[1].rstrip()

	605 if symbols:

	606 sys.stderr.write(' Found %d symbols in cache.\n' % len(symbols))

	607 else:

	608 sys.stderr.write(' No symbols found in cache.\n')

591	609

592 unresolved_addresses = sorted(	610 unresolved_addresses = sorted(

593 a for a in appeared_addresses if a not in address_symbol_dict)	611 a for a in appeared_addresses if a not in symbols)

594	612

595 if unresolved_addresses:	613 if not unresolved_addresses:

	614 sys.stderr.write(' No need to resolve any more addresses.\n')

	615 else:

	616 sys.stderr.write(' %d addresses are unresolved.\n' %

	617 len(unresolved_addresses))

596 prepared_data_dir = tempfile.mkdtemp()	618 prepared_data_dir = tempfile.mkdtemp()

597 try:	619 try:

598 prepare_symbol_info(maps_path, prepared_data_dir)	620 prepare_symbol_info(maps_path, prepared_data_dir)

599	621

600 symbols = find_runtime_symbols_list(	622 symbol_list = find_runtime_symbols_list(

601 prepared_data_dir, unresolved_addresses)	623 prepared_data_dir, unresolved_addresses)

602	624

603 for address, symbol in zip(unresolved_addresses, symbols):	625 for address, symbol in zip(unresolved_addresses, symbol_list):

	626 if not symbol:

	627 symbol = '??'

604 stripped_symbol = symbol.strip()	628 stripped_symbol = symbol.strip()

605 address_symbol_dict[address] = stripped_symbol	629 symbols[address] = stripped_symbol

606 symbol_f.write('%s %s\n' % (address, stripped_symbol))	630 symbol_f.write('%s %s\n' % (address, stripped_symbol))

607 finally:	631 finally:

608 shutil.rmtree(prepared_data_dir)	632 shutil.rmtree(prepared_data_dir)

609	633

610	634

611 def parse_policy(policy_path):	635 def parse_policy(policy_path):

612 """Parses policy file.	636 """Parses policy file.

613	637

614 A policy file contains component's names and their	638 A policy file contains component's names and their

615 stacktrace pattern written in regular expression.	639 stacktrace pattern written in regular expression.

616 Those patterns are matched against each symbols of	640 Those patterns are matched against each symbols of

617 each stacktraces in the order written in the policy file	641 each stacktraces in the order written in the policy file

618	642

619 Args:	643 Args:

620 policy_path: A path for a policy file.	644 policy_path: A path for a policy file.

621 Returns:	645 Returns:

622 A list containing component's name and its regex object	646 A list containing component's name and its regex object

623 """	647 """

624 with open(policy_path, mode='r') as policy_f:	648 with open(policy_path, mode='r') as policy_f:

625 policy_lines = policy_f.readlines()	649 policy_lines = policy_f.readlines()

626	650

627 policy_version = POLICY_DEEP_1	651 policy_version = POLICY_DEEP_1

628 if policy_lines[0].startswith('heap profile policy: '):	652 if policy_lines[0].startswith('heap profile policy: '):

629 policy_version = policy_lines[0][21:].strip()	653 policy_version = policy_lines[0][21:].strip()

630 policy_lines.pop(0)	654 policy_lines.pop(0)

631 policy_list = []	655 rule_list = []

	656 components = []

632	657

633 if policy_version == POLICY_DEEP_2 or policy_version == POLICY_DEEP_1:	658 if policy_version == POLICY_DEEP_2 or policy_version == POLICY_DEEP_1:

634 sys.stderr.write(' heap profile policy version: %s\n' % policy_version)

635 for line in policy_lines:	659 for line in policy_lines:

636 if line[0] == '#':	660 if line[0] == '#':

637 continue	661 continue

638	662

639 if policy_version == POLICY_DEEP_2:	663 if policy_version == POLICY_DEEP_2:

640 (name, allocation_type, pattern) = line.strip().split(None, 2)	664 (name, allocation_type, pattern) = line.strip().split(None, 2)

641 mmap = False	665 mmap = False

642 if allocation_type == 'mmap':	666 if allocation_type == 'mmap':

643 mmap = True	667 mmap = True

644 elif policy_version == POLICY_DEEP_1:	668 elif policy_version == POLICY_DEEP_1:

645 name = line.split()[0]	669 name = line.split()[0]

646 pattern = line[len(name) : len(line)].strip()	670 pattern = line[len(name) : len(line)].strip()

647 mmap = False	671 mmap = False

648	672

649 if pattern != 'default':	673 if pattern != 'default':

650 policy_list.append(Policy(name, mmap, pattern))	674 rule_list.append(Rule(name, mmap, pattern))

651 if components.count(name) == 0:	675 if components.count(name) == 0:

652 components.append(name)	676 components.append(name)

653	677

654 else:	678 else:

655 sys.stderr.write(' invalid heap profile policy version: %s\n' % (	679 sys.stderr.write(' invalid heap profile policy version: %s\n' % (

656 policy_version))	680 policy_version))

657	681

658 return policy_list	682 return rule_list, policy_version, components

659	683

660	684

661 def main():	685 def find_prefix(path):

662 if (len(sys.argv) < 4) or (not (sys.argv[1] in ['--csv',	686 return re.sub('\.[0-9][0-9][0-9][0-9]\.heap', '', path)

663 '--json',

664 '--expand',

665 '--list',

666 '--stacktrace',

667 '--pprof'])):

668 sys.stderr.write("""Usage:

669 %s [options] <chrome-binary> <policy> <profile> [component-name] [depth]

670	687

671 Options:

672 --csv Output result in csv format

673 --json Output result in json format

674 --stacktrace Convert raw address to symbol names

675 --list Lists components and their sizes

676 --expand Show all stacktraces in the specified component

677 of given depth with their sizes

678 --pprof Format the profile file so it can be processed

679 by pprof

680	688

681 Examples:	689 def load_buckets(prefix):

682 dmprof --csv Debug/chrome dmpolicy hprof.12345.0001.heap > result.csv

683 dmprof --json Debug/chrome dmpolicy hprof.12345.0001.heap > result.json

684 dmprof --list Debug/chrome dmpolicy hprof.12345.0012.heap

685 dmprof --expand Debug/chrome dmpolicy hprof.12345.0012.heap tc-webkit 4

686 dmprof --pprof Debug/chrome dmpolicy hprof.12345.0012.heap > for_pprof.txt

687 """ % (sys.argv[0]))

688 sys.exit(1)

689

690 action = sys.argv[1]

691 chrome_path = sys.argv[2]

692 policy_path = sys.argv[3]

693 log_path = sys.argv[4]

694

695 sys.stderr.write('parsing a policy file\n')

696 policy_list = parse_policy(policy_path)

697

698 p = re.compile('\.[0-9][0-9][0-9][0-9]\.heap')

699 prefix = p.sub('', log_path)

700 symbol_path = prefix + '.symbols'

701

702 sys.stderr.write('parsing the maps file\n')

703 maps_path = prefix + '.maps'

704 with open(maps_path, 'r') as maps_f:

705 maps_lines = maps_f.readlines()

706

707 # Reading buckets	690 # Reading buckets

708 sys.stderr.write('parsing the bucket file\n')	691 sys.stderr.write('Loading bucket files.\n')

709 buckets = {}	692 buckets = {}

710 bucket_count = 0	693 bucket_count = 0

711 n = 0	694 n = 0

712 while True:	695 while True:

713 buckets_path = '%s.%04d.buckets' % (prefix, n)	696 buckets_path = '%s.%04d.buckets' % (prefix, n)

714 if not os.path.exists(buckets_path):	697 if not os.path.exists(buckets_path):

715 if n > 10:	698 if n > 10:

716 break	699 break

717 n += 1	700 n += 1

718 continue	701 continue

719 sys.stderr.write('reading buckets from %s\n' % (buckets_path))	702 sys.stderr.write(' %s\n' % buckets_path)

720 with open(buckets_path, 'r') as buckets_f:	703 with open(buckets_path, 'r') as buckets_f:

721 for line in buckets_f:	704 for line in buckets_f:

722 words = line.split()	705 words = line.split()

723 buckets[int(words[0])] = Bucket(words[2:], words[1] == 'mmap')	706 buckets[int(words[0])] = Bucket(words[2:], words[1] == 'mmap')

724 n += 1	707 n += 1

725	708

726 log_path_list = [log_path]	709 return buckets

727	710

728 if action in ('--csv', '--json'):	711

729 # search for the sequence of files	712 def determine_dump_path_list(dump_path, prefix):

730 n = int(log_path[len(log_path) - 9 : len(log_path) - 5])	713 dump_path_list = [dump_path]

731 n += 1 # skip current file	714

732 while True:	715 # search for the sequence of files

733 p = '%s.%04d.heap' % (prefix, n)	716 n = int(dump_path[len(dump_path) - 9 : len(dump_path) - 5])

734 if os.path.exists(p):	717 n += 1 # skip current file

735 log_path_list.append(p)	718 while True:

736 else:	719 p = '%s.%04d.heap' % (prefix, n)

737 break	720 if os.path.exists(p):

738 n += 1	721 dump_path_list.append(p)

739

740 logs = []

741 for path in log_path_list:

742 new_log = Log(path)

743 sys.stderr.write('Parsing a dump: %s\n' % path)

744 try:

745 new_log.parse_log(buckets)

746 except EmptyDumpException:

747 sys.stderr.write(' WARNING: ignored an empty dump: %s\n' % path)

748 except ParsingException, e:

749 sys.stderr.write(' Error in parsing heap profile dump: %s\n' % e)

750 sys.exit(1)

751 else:	722 else:

752 logs.append(new_log)	723 break

753	724 n += 1

754 sys.stderr.write('getting symbols\n')	725

755 update_symbols(symbol_path, maps_lines, maps_path)	726 return dump_path_list

756	727

757 # TODO(dmikurube): Many modes now. Split them into separete functions.	728

758 if action == '--stacktrace':	729 def load_single_dump(dump_path, buckets, appeared_addresses):

759 logs[0].dump_stacktrace(buckets)	730 new_dump = Dump(dump_path)

760	731 try:

761 elif action == '--csv':	732 new_dump.parse_dump(buckets, appeared_addresses)

762 sys.stdout.write(','.join(components))	733 except EmptyDumpException:

763 sys.stdout.write('\n')	734 sys.stderr.write('... ignored an empty dump')

764	735 except ParsingException, e:

765 for log in logs:	736 sys.stderr.write('... error in parsing: %s' % e)

766 component_sizes = log.apply_policy(policy_list, buckets, logs[0].log_time)	737 sys.exit(1)

	738 else:

	739 sys.stderr.write(' (version: %s)' % new_dump.dump_version)

	740

	741 return new_dump

	742

	743

	744 def load_dump(dump_path, buckets):

	745 sys.stderr.write('Loading a heap dump file: "%s"' % dump_path)

	746 appeared_addresses = set()

	747 dump = load_single_dump(dump_path, buckets, appeared_addresses)

	748 sys.stderr.write('.\n')

	749 return dump, appeared_addresses

	750

	751

	752 def load_dumps(dump_path_list, buckets):

	753 sys.stderr.write('Loading heap dump files.\n')

	754 appeared_addresses = set()

	755 dumps = []

	756 for path in dump_path_list:

	757 sys.stderr.write(' %s' % path)

	758 dumps.append(load_single_dump(path, buckets, appeared_addresses))

	759 sys.stderr.write('\n')

	760 return dumps, appeared_addresses

	761

	762

	763 def load_and_update_symbol_cache(prefix, appeared_addresses):

	764 maps_path = prefix + '.maps'

	765 symbol_path = prefix + '.symbols'

	766 sys.stderr.write('Loading and updating symbol cache: "%s".\n' % symbol_path)

	767 symbols = {}

	768 update_symbols(symbol_path, maps_path, appeared_addresses, symbols)

	769 return symbols

	770

	771

	772 def load_default_policies():

	773 with open(POLICIES_JSON_PATH, mode='r') as policies_f:

	774 default_policies = json.load(policies_f)

	775 return default_policies

	776

	777

	778 def load_policy(policies_dict, policy_label):

	779 policy_file = policies_dict[policy_label]['file']

	780 policy_path = os.path.join(os.path.dirname(__file__), policy_file)

	781 rule_list, policy_version, components = parse_policy(policy_path)

	782 sys.stderr.write(' %s: %s (version: %s)\n' %

	783 (policy_label, policy_path, policy_version))

	784 return Policy(rule_list, policy_version, components)

	785

	786

	787 def load_policies_dict(policies_dict):

	788 sys.stderr.write('Loading policy files.\n')

	789 policies = {}

	790 for policy_label in policies_dict:

	791 policies[policy_label] = load_policy(policies_dict, policy_label)

	792 return policies

	793

	794

	795 def load_policies(options_policy):

	796 default_policies = load_default_policies()

	797 if options_policy:

	798 policy_labels = options_policy.split(',')

	799 specified_policies = {}

	800 for specified_policy in policy_labels:

	801 if specified_policy in default_policies:

	802 specified_policies[specified_policy] = (

	803 default_policies[specified_policy])

	804 policies = load_policies_dict(specified_policies)

	805 else:

	806 policies = load_policies_dict(default_policies)

	807 return policies

	808

	809

	810 def do_stacktrace(sys_argv):

	811 parser = OptionParser(usage='Usage: %prog stacktrace <dump>')

	812 options, args = parser.parse_args(sys_argv)

	813

	814 if len(args) < 2:

	815 parser.error('needs 1 argument.')

	816

	817 dump_path = args[1]

	818

	819 prefix = find_prefix(dump_path)

	820 buckets = load_buckets(prefix)

	821 dump, appeared_addresses = load_dump(dump_path, buckets)

	822 symbols = load_and_update_symbol_cache(prefix, appeared_addresses)

	823

	824 dump.print_stacktrace(buckets, symbols)

	825

	826 return 0

	827

	828

	829 def do_csv(sys_argv):

	830 parser = OptionParser('Usage: %prog csv [-p POLICY] <first-dump>')

	831 parser.add_option('-p', '--policy', type='string', dest='policy',

	832 help='profile with POLICY', metavar='POLICY')

	833 options, args = parser.parse_args(sys_argv)

	834

	835 if len(args) < 2:

	836 parser.error('needs 1 argument.')

	837

	838 dump_path = args[1]

	839

	840 prefix = find_prefix(dump_path)

	841 buckets = load_buckets(prefix)

	842 dumps, appeared_addresses = load_dumps(

	843 determine_dump_path_list(dump_path, prefix), buckets)

	844 symbols = load_and_update_symbol_cache(prefix, appeared_addresses)

	845 policies = load_policies(options.policy)

	846

	847 max_components = 0

	848 for policy in policies:

	849 max_components = max(max_components, len(policies[policy].components))

	850

	851 for policy in sorted(policies):

	852 rule_list = policies[policy].rules

	853 components = policies[policy].components

	854

	855 if len(policies) > 1:

	856 sys.stdout.write('%s%s\n' % (policy, ',' * (max_components - 1)))

	857 sys.stdout.write('%s%s\n' % (

	858 ','.join(components), ',' * (max_components - len(components))))

	859

	860 for dump in dumps:

	861 component_sizes = dump.apply_policy(

	862 rule_list, buckets, dumps[0].dump_time, components, symbols)

767 s = []	863 s = []

768 for c in components:	864 for c in components:

769 if c in ('hour', 'minute', 'second'):	865 if c in ('hour', 'minute', 'second'):

770 s.append('%05.5f' % (component_sizes[c]))	866 s.append('%05.5f' % (component_sizes[c]))

771 else:	867 else:

772 s.append('%05.5f' % (component_sizes[c] / 1024.0 / 1024.0))	868 s.append('%05.5f' % (component_sizes[c] / 1024.0 / 1024.0))

773 sys.stdout.write(','.join(s))	869 sys.stdout.write('%s%s\n' % (

774 sys.stdout.write('\n')	870 ','.join(s), ',' * (max_components - len(components))))

775	871

776 elif action == '--json':	872 for bucket in buckets.itervalues():

777 json_base = {	873 bucket.clear_component_cache()

778 'version': 'JSON_DEEP_1',	874

	875 return 0

	876

	877

	878 def do_json(sys_argv):

	879 parser = OptionParser('Usage: %prog json [-p POLICY] <first-dump>')

	880 parser.add_option('-p', '--policy', type='string', dest='policy',

	881 help='profile with POLICY', metavar='POLICY')

	882 options, args = parser.parse_args(sys_argv)

	883

	884 if len(args) < 2:

	885 parser.error('needs 1 argument.')

	886

	887 dump_path = args[1]

	888

	889 prefix = find_prefix(dump_path)

	890 buckets = load_buckets(prefix)

	891 dumps, appeared_addresses = load_dumps(

	892 determine_dump_path_list(dump_path, prefix), buckets)

	893 symbols = load_and_update_symbol_cache(prefix, appeared_addresses)

	894 policies = load_policies(options.policy)

	895

	896 json_base = {

	897 'version': 'JSON_DEEP_2',

	898 'policies': {},

	899 }

	900

	901 for policy in sorted(policies):

	902 rule_list = policies[policy].rules

	903 components = policies[policy].components

	904

	905 json_base['policies'][policy] = {

779 'legends': components,	906 'legends': components,

780 'snapshots': [],	907 'snapshots': [],

781 }	908 }

782 for log in logs:	909

783 component_sizes = log.apply_policy(policy_list, buckets, logs[0].log_time)	910 for dump in dumps:

784 component_sizes['log_path'] = log.log_path	911 component_sizes = dump.apply_policy(

785 component_sizes['log_time'] = datetime.fromtimestamp(	912 rule_list, buckets, dumps[0].dump_time, components, symbols)

786 log.log_time).strftime('%Y-%m-%d %H:%M:%S')	913 component_sizes['dump_path'] = dump.dump_path

787 json_base['snapshots'].append(component_sizes)	914 component_sizes['dump_time'] = datetime.fromtimestamp(

788 json.dump(json_base, sys.stdout, indent=2, sort_keys=True)	915 dump.dump_time).strftime('%Y-%m-%d %H:%M:%S')

789	916 json_base['policies'][policy]['snapshots'].append(component_sizes)

790 elif action == '--list':	917

791 component_sizes = logs[0].apply_policy(	918 for bucket in buckets.itervalues():

792 policy_list, buckets, logs[0].log_time)	919 bucket.clear_component_cache()

	920

	921 json.dump(json_base, sys.stdout, indent=2, sort_keys=True)

	922

	923 return 0

	924

	925

	926 def do_list(sys_argv):

	927 parser = OptionParser('Usage: %prog [-p POLICY] list <first-dump>')

	928 parser.add_option('-p', '--policy', type='string', dest='policy',

	929 help='profile with POLICY', metavar='POLICY')

	930 options, args = parser.parse_args(sys_argv)

	931

	932 if len(args) < 2:

	933 parser.error('needs 1 argument.')

	934

	935 dump_path = args[1]

	936

	937 prefix = find_prefix(dump_path)

	938 buckets = load_buckets(prefix)

	939 dumps, appeared_addresses = load_dumps(

	940 determine_dump_path_list(dump_path, prefix), buckets)

	941 symbols = load_and_update_symbol_cache(prefix, appeared_addresses)

	942 policies = load_policies(options.policy)

	943

	944 for policy in sorted(policies):

	945 rule_list = policies[policy].rules

	946 components = policies[policy].components

	947

	948 component_sizes = dumps[0].apply_policy(

	949 rule_list, buckets, dumps[0].dump_time, components, symbols)

	950 sys.stdout.write('%s:\n' % policy)

793 for c in components:	951 for c in components:

794 if c in ['hour', 'minute', 'second']:	952 if c in ['hour', 'minute', 'second']:

795 sys.stdout.write('%30s %10.3f\n' % (c, component_sizes[c]))	953 sys.stdout.write('%30s %10.3f\n' % (c, component_sizes[c]))

796 else:	954 else:

797 sys.stdout.write('%30s %10.3f\n' % (	955 sys.stdout.write('%30s %10.3f\n' % (

798 c, component_sizes[c] / 1024.0 / 1024.0))	956 c, component_sizes[c] / 1024.0 / 1024.0))

799	957

800 elif action == '--expand':	958 for bucket in buckets.itervalues():

801 component_name = sys.argv[5]	959 bucket.clear_component_cache()

802 depth = sys.argv[6]

803 logs[0].expand(policy_list, buckets, component_name, int(depth))

804	960

805 elif action == '--pprof':	961 return 0

806 if len(sys.argv) > 5:	962

807 logs[0].dump_for_pprof(policy_list, buckets, maps_lines, sys.argv[5])	963

808 else:	964 def do_expand(sys_argv):

809 logs[0].dump_for_pprof(policy_list, buckets, maps_lines, None)	965 parser = OptionParser(

	966 'Usage: %prog expand <dump> <policy> <component> <depth>')

	967 options, args = parser.parse_args(sys_argv)

	968

	969 if len(args) < 5:

	970 parser.error('needs 4 arguments.')

	971

	972 dump_path = args[1]

	973 target_policy = args[2]

	974 component_name = args[3]

	975 depth = args[4]

	976

	977 prefix = find_prefix(dump_path)

	978 buckets = load_buckets(prefix)

	979 dump, appeared_addresses = load_dump(dump_path, buckets)

	980 symbols = load_and_update_symbol_cache(prefix, appeared_addresses)

	981 policies = load_policies(target_policy)

	982

	983 rule_list = policies[target_policy].rules

	984

	985 dump.expand(rule_list, buckets, component_name, int(depth), symbols)

	986

	987 return 0

	988

	989

	990 def do_pprof(sys_argv):

	991 parser = OptionParser(

	992 'Usage: %prog pprof [-c COMPONENT] <dump> <policy>')

	993 parser.add_option('-c', '--component', type='string', dest='component',

	994 help='restrict to COMPONENT', metavar='COMPONENT')

	995 options, args = parser.parse_args(sys_argv)

	996

	997 if len(args) < 3:

	998 parser.error('needs 2 arguments.')

	999

	1000 dump_path = args[1]

	1001 target_policy = args[2]

	1002 component = options.component

	1003

	1004 prefix = find_prefix(dump_path)

	1005 buckets = load_buckets(prefix)

	1006 dump, appeared_addresses = load_dump(dump_path, buckets)

	1007 symbols = load_and_update_symbol_cache(prefix, appeared_addresses)

	1008 policies = load_policies(target_policy)

	1009

	1010 rule_list = policies[target_policy].rules

	1011

	1012 with open(prefix + '.maps', 'r') as maps_f:

	1013 maps_lines = maps_f.readlines()

	1014 dump.print_for_pprof(rule_list, buckets, maps_lines, component, symbols)

	1015

	1016 return 0

	1017

	1018

	1019 def main():

	1020 COMMANDS = {

	1021 'csv': do_csv,

	1022 'expand': do_expand,

	1023 'json': do_json,

	1024 'list': do_list,

	1025 'pprof': do_pprof,

	1026 'stacktrace': do_stacktrace,

	1027 }

	1028

	1029 # TODO(dmikurube): Remove this message after a while.

	1030 if len(sys.argv) >= 2 and sys.argv[1].startswith('--'):

	1031 sys.stderr.write("""

	1032 ************** NOTICE!! **************

	1033 The command line format has changed.

	1034 Please look at the description below.

	1035 ******************************************

	1036

	1037 """)

	1038

	1039 if len(sys.argv) < 2 or (not sys.argv[1] in COMMANDS):

	1040 sys.stderr.write("""Usage: %s <command> [options] [<args>]

	1041

	1042 Commands:

	1043 csv Classify memory usage in CSV

	1044 expand Show all stacktraces contained in the specified component

	1045 json Classify memory usage in JSON

	1046 list Classify memory usage in simple listing format

	1047 pprof Format the profile dump so that it can be processed by pprof

	1048 stacktrace Convert runtime addresses to symbol names

	1049

	1050 Quick Reference:

	1051 dmprof csv [-p POLICY] <first-dump>

	1052 dmprof expand <dump> <policy> <component> <depth>

	1053 dmprof json [-p POLICY] <first-dump>

	1054 dmprof list [-p POLICY] <first-dump>

	1055 dmprof pprof [-c COMPONENT] <dump> <policy>

	1056 dmprof stacktrace <dump>

	1057 """ % (sys.argv[0]))

	1058 sys.exit(1)

	1059 action = sys.argv.pop(1)

	1060

	1061 return COMMANDS[action](sys.argv)

810	1062

811	1063

812 if __name__ == '__main__':	1064 if __name__ == '__main__':

813 sys.exit(main())	1065 sys.exit(main())

OLD	NEW

« no previous file with comments | « no previous file | tools/deep_memory_profiler/policies.json » ('j') | no next file with comments »