tools/deep_memory_profiler/dmprof - Issue 10802049: Change dmprof commandline format, and clean up start-up routines.

Side by Side Diff: tools/deep_memory_profiler/dmprof

Issue 10802049: Change dmprof commandline format, and clean up start-up routines. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: policy setting. Created 8 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 #!/usr/bin/env python	1 #!/usr/bin/env python

2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.	2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.

3 # Use of this source code is governed by a BSD-style license that can be	3 # Use of this source code is governed by a BSD-style license that can be

4 # found in the LICENSE file.	4 # found in the LICENSE file.

5	5

6 """The deep heap profiler script for Chrome."""	6 """The deep heap profiler script for Chrome."""

7	7

8 from datetime import datetime	8 from datetime import datetime

9 import json	9 import json

10 import os	10 import os

11 import re	11 import re

	12 from optparse import OptionParser
	M-A Ruel 2012/07/24 14:10:53 just import optparse just import optparse Dai Mikurube (NOT FULLTIME) 2012/07/24 16:19:24 Done. Show quoted text On 2012/07/24 14:10:53, Marc-Antoine Ruel wrote: > just import optparse Done.
12 import shutil	13 import shutil

13 import subprocess	14 import subprocess

14 import sys	15 import sys

15 import tempfile	16 import tempfile

16	17

17 FIND_RUNTIME_SYMBOLS_PATH = os.path.join(	18 FIND_RUNTIME_SYMBOLS_PATH = os.path.join(

18 os.path.dirname(os.path.abspath(__file__)),	19 os.path.dirname(os.path.abspath(__file__)),

19 os.pardir,	20 os.pardir,

20 'find_runtime_symbols')	21 'find_runtime_symbols')

21 sys.path.append(FIND_RUNTIME_SYMBOLS_PATH)	22 sys.path.append(FIND_RUNTIME_SYMBOLS_PATH)

22	23

23 from prepare_symbol_info import prepare_symbol_info	24 from prepare_symbol_info import prepare_symbol_info

24 from find_runtime_symbols import find_runtime_symbols_list	25 from find_runtime_symbols import find_runtime_symbols_list

25	26

26 BUCKET_ID = 5	27 BUCKET_ID = 5

27 VIRTUAL = 0	28 VIRTUAL = 0

28 COMMITTED = 1	29 COMMITTED = 1

29 ALLOC_COUNT = 2	30 ALLOC_COUNT = 2

30 FREE_COUNT = 3	31 FREE_COUNT = 3

31 NULL_REGEX = re.compile('')	32 NULL_REGEX = re.compile('')

32	33

	34 POLICIES_JSON_PATH = os.path.join(os.path.dirname(__file__), 'policies.json')
	M-A Ruel 2012/07/24 14:10:53 no no Dai Mikurube (NOT FULLTIME) 2012/07/24 14:53:45 Sorry, what do you mean by this? Show quoted text On 2012/07/24 14:10:53, Marc-Antoine Ruel wrote: > no Sorry, what do you mean by this?
	35

33 # Heap Profile Dump versions	36 # Heap Profile Dump versions

34	37

35 # DUMP_DEEP_1 is OBSOLETE.	38 # DUMP_DEEP_1 is OBSOLETE.

36 # DUMP_DEEP_1 DOES NOT distinct mmap regions and malloc chunks.	39 # DUMP_DEEP_1 DOES NOT distinct mmap regions and malloc chunks.

37 # Their stacktraces DO contain mmap* or tc-* at their tops.	40 # Their stacktraces DO contain mmap* or tc-* at their tops.

38 # They should be processed by POLICY_DEEP_1.	41 # They should be processed by POLICY_DEEP_1.

39 DUMP_DEEP_1 = 'DUMP_DEEP_1'	42 DUMP_DEEP_1 = 'DUMP_DEEP_1'

40	43

41 # DUMP_DEEP_2 is OBSOLETE.	44 # DUMP_DEEP_2 is OBSOLETE.

42 # DUMP_DEEP_2 DOES distinct mmap regions and malloc chunks.	45 # DUMP_DEEP_2 DOES distinct mmap regions and malloc chunks.

(...skipping 22 matching lines...) Expand all Loading...
65 # Heap Profile Policy versions	68 # Heap Profile Policy versions

66	69

67 # POLICY_DEEP_1 DOES NOT include allocation_type columns.	70 # POLICY_DEEP_1 DOES NOT include allocation_type columns.

68 # mmap regions are distincted w/ mmap frames in the pattern column.	71 # mmap regions are distincted w/ mmap frames in the pattern column.

69 POLICY_DEEP_1 = 'POLICY_DEEP_1'	72 POLICY_DEEP_1 = 'POLICY_DEEP_1'

70	73

71 # POLICY_DEEP_2 DOES include allocation_type columns.	74 # POLICY_DEEP_2 DOES include allocation_type columns.

72 # mmap regions are distincted w/ the allocation_type column.	75 # mmap regions are distincted w/ the allocation_type column.

73 POLICY_DEEP_2 = 'POLICY_DEEP_2'	76 POLICY_DEEP_2 = 'POLICY_DEEP_2'

74	77

75 # TODO(dmikurube): Avoid global variables.

76 address_symbol_dict = {}

77 appeared_addresses = set()

78 components = []

79

80	78

81 class EmptyDumpException(Exception):	79 class EmptyDumpException(Exception):

82 def __init__(self, value):	80 def __init__(self, value):

83 self.value = value	81 self.value = value

84 def __str__(self):	82 def __str__(self):

85 return repr(self.value)	83 return repr(self.value)

86	84

87	85

88 class ParsingException(Exception):	86 class ParsingException(Exception):

89 def __init__(self, value):	87 def __init__(self, value):

90 self.value = value	88 self.value = value

91 def __str__(self):	89 def __str__(self):

92 return repr(self.value)	90 return repr(self.value)

93	91

94	92

95 class InvalidDumpException(ParsingException):	93 class InvalidDumpException(ParsingException):

96 def __init__(self, value):	94 def __init__(self, value):

97 self.value = value	95 self.value = value

98 def __str__(self):	96 def __str__(self):

99 return "invalid heap profile dump: %s" % repr(self.value)	97 return "invalid heap profile dump: %s" % repr(self.value)

100	98

101	99

102 class ObsoleteDumpVersionException(ParsingException):	100 class ObsoleteDumpVersionException(ParsingException):

103 def __init__(self, value):	101 def __init__(self, value):

104 self.value = value	102 self.value = value

105 def __str__(self):	103 def __str__(self):

106 return "obsolete heap profile dump version: %s" % repr(self.value)	104 return "obsolete heap profile dump version: %s" % repr(self.value)

107	105

108	106

109 class Policy(object):	107 class Rule(object):

	108 """Represents one matching rule in a policy file."""

110	109

111 def __init__(self, name, mmap, pattern):	110 def __init__(self, name, mmap, pattern):

112 self.name = name	111 self.name = name

113 self.mmap = mmap	112 self.mmap = mmap

114 self.condition = re.compile(pattern + r'\Z')	113 self.condition = re.compile(pattern + r'\Z')

115	114

116	115

117 def get_component(policy_list, bucket):	116 class Policy(object):

	117 """Represents a policy, a content of a policy file."""

	118

	119 def __init__(self, rules, version, components):

	120 self.rules = rules

	121 self.version = version

	122 self.components = components

	123

	124 def append_rule(self, rule):

	125 self.rules.append(rule)

	126

	127

	128 def get_component(rule_list, bucket, symbols):

118 """Returns a component name which a given bucket belongs to.	129 """Returns a component name which a given bucket belongs to.

119	130

120 Args:	131 Args:

121 policy_list: A list containing Policy objects. (Parsed policy data by	132 rule_list: A list of Rule objects.

122 parse_policy.)

123 bucket: A Bucket object to be searched for.	133 bucket: A Bucket object to be searched for.

	134 symbols: A dict mapping runtime addresses to symbol names.

124	135

125 Returns:	136 Returns:

126 A string representing a component name.	137 A string representing a component name.

127 """	138 """

128 if not bucket:	139 if not bucket:

129 return 'no-bucket'	140 return 'no-bucket'

130 if bucket.component:	141 if bucket.component_cache:

131 return bucket.component	142 return bucket.component_cache

132	143

133 stacktrace = ''.join(	144 stacktrace = ''.join(symbols[a] + ' ' for a in bucket.stacktrace).strip()

134 address_symbol_dict[a] + ' ' for a in bucket.stacktrace).strip()

135	145

136 for policy in policy_list:	146 for rule in rule_list:

137 if bucket.mmap == policy.mmap and policy.condition.match(stacktrace):	147 if bucket.mmap == rule.mmap and rule.condition.match(stacktrace):

138 bucket.component = policy.name	148 bucket.component_cache = rule.name

139 return policy.name	149 return rule.name

140	150

141 assert False	151 assert False

142	152

143	153

144 class Bucket(object):	154 class Bucket(object):

	155 """Represents a bucket, which is a unit of memory classification."""

145	156

146 def __init__(self, stacktrace, mmap):	157 def __init__(self, stacktrace, mmap):

147 self.stacktrace = stacktrace	158 self.stacktrace = stacktrace

148 self.mmap = mmap	159 self.mmap = mmap

149 self.component = ''	160 self.component_cache = ''

	161

	162 def clear_component_cache(self):

	163 self.component_cache = ''

150	164

151	165

152 class Log(object):	166 class Dump(object):

	167 """Represents one heap profile dump."""

153	168

154 """A class representing one dumped log data."""	169 def __init__(self, dump_path):

155 def __init__(self, log_path):	170 self.dump_path = dump_path

156 self.log_path = log_path	171 self.dump_lines = [

157 self.log_lines = [	172 l for l in open(self.dump_path, 'r') if l and not l.startswith('#')]

158 l for l in open(self.log_path, 'r') if l and not l.startswith('#')]	173 self.dump_version = ''

159 self.log_version = ''

160 sys.stderr.write('Loading a dump: %s\n' % log_path)

161 self.stacktrace_lines = []	174 self.stacktrace_lines = []

162 self.counters = {}	175 self.counters = {}

163 self.log_time = os.stat(self.log_path).st_mtime	176 self.dump_time = os.stat(self.dump_path).st_mtime

164	177

165 def dump_stacktrace(buckets):	178 def print_stacktrace(self, buckets, symbols):

166 """Prints a given stacktrace.	179 """Prints a given stacktrace.

167	180

168 Args:	181 Args:

169 buckets: A dict mapping bucket ids and their corresponding Bucket	182 buckets: A dict mapping bucket ids to Bucket objects.

170 objects.	183 symbols: A dict mapping runtime addresses to symbol names.

171 """	184 """

172 for line in self.stacktrace_lines:	185 for line in self.stacktrace_lines:

173 words = line.split()	186 words = line.split()

174 bucket = buckets.get(int(words[BUCKET_ID]))	187 bucket = buckets.get(int(words[BUCKET_ID]))

175 if not bucket:	188 if not bucket:

176 continue	189 continue

177 for i in range(0, BUCKET_ID - 1):	190 for i in range(0, BUCKET_ID - 1):

178 sys.stdout.write(words[i] + ' ')	191 sys.stdout.write(words[i] + ' ')

179 for address in bucket.stacktrace:	192 for address in bucket.stacktrace:

180 sys.stdout.write((address_symbol_dict.get(address) or address) + ' ')	193 sys.stdout.write((symbols.get(address) or address) + ' ')

181 sys.stdout.write('\n')	194 sys.stdout.write('\n')

182	195

183 @staticmethod	196 @staticmethod

184 def accumulate_size_for_pprof(stacktrace_lines, policy_list, buckets,	197 def accumulate_size_for_pprof(stacktrace_lines, rule_list, buckets,

185 component_name):	198 component_name, symbols):

186 """Accumulates size of committed chunks and the number of allocated chunks.	199 """Accumulates size of committed chunks and the number of allocated chunks.

187	200

188 Args:	201 Args:

189 stacktrace_lines: A list of strings which are valid as stacktraces.	202 stacktrace_lines: A list of strings which are valid as stacktraces.

190 policy_list: A list containing Policy objects. (Parsed policy data by	203 rule_list: A list of Rule objects.

191 parse_policy.)	204 buckets: A dict mapping bucket ids to Bucket objects.

192 buckets: A dict mapping bucket ids and their corresponding Bucket

193 objects.

194 component_name: A name of component for filtering.	205 component_name: A name of component for filtering.

	206 symbols: A dict mapping runtime addresses to symbol names.

195	207

196 Returns:	208 Returns:

197 Two integers which are the accumulated size of committed regions and the	209 Two integers which are the accumulated size of committed regions and the

198 number of allocated chunks, respectively.	210 number of allocated chunks, respectively.

199 """	211 """

200 com_committed = 0	212 com_committed = 0

201 com_allocs = 0	213 com_allocs = 0

202 for line in stacktrace_lines:	214 for line in stacktrace_lines:

203 words = line.split()	215 words = line.split()

204 bucket = buckets.get(int(words[BUCKET_ID]))	216 bucket = buckets.get(int(words[BUCKET_ID]))

205 if (not bucket or	217 if (not bucket or

206 (component_name and	218 (component_name and

207 component_name != get_component(policy_list, bucket))):	219 component_name != get_component(rule_list, bucket, symbols))):

208 continue	220 continue

209	221

210 com_committed += int(words[COMMITTED])	222 com_committed += int(words[COMMITTED])

211 com_allocs += int(words[ALLOC_COUNT]) - int(words[FREE_COUNT])	223 com_allocs += int(words[ALLOC_COUNT]) - int(words[FREE_COUNT])

212	224

213 return com_committed, com_allocs	225 return com_committed, com_allocs

214	226

215 @staticmethod	227 @staticmethod

216 def dump_stacktrace_lines_for_pprof(stacktrace_lines, policy_list,	228 def print_stacktrace_lines_for_pprof(stacktrace_lines, rule_list,

217 buckets, component_name):	229 buckets, component_name, symbols):

218 """Prints information of stacktrace lines for pprof.	230 """Prints information of stacktrace lines for pprof.

219	231

220 Args:	232 Args:

221 stacktrace_lines: A list of strings which are valid as stacktraces.	233 stacktrace_lines: A list of strings which are valid as stacktraces.

222 policy_list: A list containing Policy objects. (Parsed policy data by	234 rule_list: A list of Rule objects.

223 parse_policy.)	235 buckets: A dict mapping bucket ids to Bucket objects.

224 buckets: A dict mapping bucket ids and their corresponding Bucket

225 objects.

226 component_name: A name of component for filtering.	236 component_name: A name of component for filtering.

	237 symbols: A dict mapping runtime addresses to symbol names.

227 """	238 """

228 for line in stacktrace_lines:	239 for line in stacktrace_lines:

229 words = line.split()	240 words = line.split()

230 bucket = buckets.get(int(words[BUCKET_ID]))	241 bucket = buckets.get(int(words[BUCKET_ID]))

231 if (not bucket or	242 if (not bucket or

232 (component_name and	243 (component_name and

233 component_name != get_component(policy_list, bucket))):	244 component_name != get_component(rule_list, bucket, symbols))):

234 continue	245 continue

235	246

236 sys.stdout.write('%6d: %8s [%6d: %8s] @' % (	247 sys.stdout.write('%6d: %8s [%6d: %8s] @' % (

237 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]),	248 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]),

238 words[COMMITTED],	249 words[COMMITTED],

239 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]),	250 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]),

240 words[COMMITTED]))	251 words[COMMITTED]))

241 for address in bucket.stacktrace:	252 for address in bucket.stacktrace:

242 sys.stdout.write(' ' + address)	253 sys.stdout.write(' ' + address)

243 sys.stdout.write('\n')	254 sys.stdout.write('\n')

244	255

245 def dump_for_pprof(self, policy_list, buckets, mapping_lines, component_name):	256 def print_for_pprof(

246 """Converts the log file so it can be processed by pprof.	257 self, rule_list, buckets, maps_lines, component_name, symbols):

	258 """Converts the heap profile dump so it can be processed by pprof.

247	259

248 Args:	260 Args:

249 policy_list: A list containing Policy objects. (Parsed policy data by	261 rule_list: A list of Rule objects.

250 parse_policy.)	262 buckets: A dict mapping bucket ids to Bucket objects.

251 buckets: A dict mapping bucket ids and their corresponding Bucket	263 maps_lines: A list of strings containing /proc/.../maps.

252 objects.

253 mapping_lines: A list of strings containing /proc/.../maps.

254 component_name: A name of component for filtering.	264 component_name: A name of component for filtering.

	265 symbols: A dict mapping runtime addresses to symbol names.

255 """	266 """

256 sys.stdout.write('heap profile: ')	267 sys.stdout.write('heap profile: ')

257 com_committed, com_allocs = self.accumulate_size_for_pprof(	268 com_committed, com_allocs = self.accumulate_size_for_pprof(

258 self.stacktrace_lines, policy_list, buckets, component_name)	269 self.stacktrace_lines, rule_list, buckets, component_name, symbols)

259	270

260 sys.stdout.write('%6d: %8s [%6d: %8s] @ heapprofile\n' % (	271 sys.stdout.write('%6d: %8s [%6d: %8s] @ heapprofile\n' % (

261 com_allocs, com_committed, com_allocs, com_committed))	272 com_allocs, com_committed, com_allocs, com_committed))

262	273

263 self.dump_stacktrace_lines_for_pprof(	274 self.print_stacktrace_lines_for_pprof(

264 self.stacktrace_lines, policy_list, buckets, component_name)	275 self.stacktrace_lines, rule_list, buckets, component_name, symbols)

265	276

266 sys.stdout.write('MAPPED_LIBRARIES:\n')	277 sys.stdout.write('MAPPED_LIBRARIES:\n')

267 for line in mapping_lines:	278 for line in maps_lines:

268 sys.stdout.write(line)	279 sys.stdout.write(line)

269	280

270 @staticmethod	281 @staticmethod

271 def check_stacktrace_line(stacktrace_line, buckets):	282 def check_stacktrace_line(stacktrace_line, buckets, appeared_addresses):

272 """Checks if a given stacktrace_line is valid as stacktrace.	283 """Checks if a given stacktrace_line is valid as stacktrace.

273	284

274 Args:	285 Args:

275 stacktrace_line: A string to be checked.	286 stacktrace_line: A string to be checked.

276 buckets: A dict mapping bucket ids and their corresponding Bucket	287 buckets: A dict mapping bucket ids to Bucket objects.

277 objects.	288 appeared_addresses: A list where appeared addresses will be stored.

278	289

279 Returns:	290 Returns:

280 True if the given stacktrace_line is valid.	291 True if the given stacktrace_line is valid.

281 """	292 """

282 words = stacktrace_line.split()	293 words = stacktrace_line.split()

283 if len(words) < BUCKET_ID + 1:	294 if len(words) < BUCKET_ID + 1:

284 return False	295 return False

285 if words[BUCKET_ID - 1] != '@':	296 if words[BUCKET_ID - 1] != '@':

286 return False	297 return False

287 bucket = buckets.get(int(words[BUCKET_ID]))	298 bucket = buckets.get(int(words[BUCKET_ID]))

(...skipping 10 matching lines...) Expand all Loading...
298 A pair of an integer indicating a line number after skipped, and a	309 A pair of an integer indicating a line number after skipped, and a

299 boolean value which is True if found a line which skipping_condition	310 boolean value which is True if found a line which skipping_condition

300 is False for.	311 is False for.

301 """	312 """

302 while skipping_condition(line_number):	313 while skipping_condition(line_number):

303 line_number += 1	314 line_number += 1

304 if line_number >= max_line_number:	315 if line_number >= max_line_number:

305 return line_number, False	316 return line_number, False

306 return line_number, True	317 return line_number, True

307	318

308 def parse_stacktraces_while_valid(self, buckets, log_lines, line_number):	319 def parse_stacktraces_while_valid(

	320 self, buckets, dump_lines, line_number, appeared_addresses):

309 """Parses stacktrace lines while the lines are valid.	321 """Parses stacktrace lines while the lines are valid.

310	322

311 Args:	323 Args:

312 buckets: A dict mapping bucket ids and their corresponding Bucket	324 buckets: A dict mapping bucket ids to Bucket objects.

313 objects.	325 dump_lines: A list of lines to be parsed.

314 log_lines: A list of lines to be parsed.	326 line_number: A line number to start parsing in dump_lines.

315 line_number: An integer representing the starting line number in	327 appeared_addresses: A list where appeared addresses will be stored.

316 log_lines.

317	328

318 Returns:	329 Returns:

319 A pair of a list of valid lines and an integer representing the last	330 A pair of a list of valid lines and an integer representing the last

320 line number in log_lines.	331 line number in dump_lines.

321 """	332 """

322 (line_number, _) = self.skip_lines_while(	333 (line_number, _) = self.skip_lines_while(

323 line_number, len(log_lines),	334 line_number, len(dump_lines),

324 lambda n: not log_lines[n].split()[0].isdigit())	335 lambda n: not dump_lines[n].split()[0].isdigit())

325 stacktrace_lines_start = line_number	336 stacktrace_lines_start = line_number

326 (line_number, _) = self.skip_lines_while(	337 (line_number, _) = self.skip_lines_while(

327 line_number, len(log_lines),	338 line_number, len(dump_lines),

328 lambda n: self.check_stacktrace_line(log_lines[n], buckets))	339 lambda n: self.check_stacktrace_line(

329 return (log_lines[stacktrace_lines_start:line_number], line_number)	340 dump_lines[n], buckets, appeared_addresses))

	341 return (dump_lines[stacktrace_lines_start:line_number], line_number)

330	342

331 def parse_stacktraces(self, buckets, line_number):	343 def parse_stacktraces(self, buckets, line_number, appeared_addresses):

332 """Parses lines in self.log_lines as stacktrace.	344 """Parses lines in self.dump_lines as stacktrace.

333	345

334 Valid stacktrace lines are stored into self.stacktrace_lines.	346 Valid stacktrace lines are stored into self.stacktrace_lines.

335	347

336 Args:	348 Args:

337 buckets: A dict mapping bucket ids and their corresponding Bucket	349 buckets: A dict mapping bucket ids to Bucket objects.

338 objects.	350 line_number: A line number to start parsing in dump_lines.

339 line_number: An integer representing the starting line number in	351 appeared_addresses: A list where appeared addresses will be stored.

340 log_lines.

341	352

342 Raises:	353 Raises:

343 ParsingException for invalid dump versions.	354 ParsingException for invalid dump versions.

344 """	355 """

345 sys.stderr.write(' Version: %s\n' % self.log_version)	356 if self.dump_version == DUMP_DEEP_5:

346

347 if self.log_version == DUMP_DEEP_5:

348 (self.stacktrace_lines, line_number) = (	357 (self.stacktrace_lines, line_number) = (

349 self.parse_stacktraces_while_valid(	358 self.parse_stacktraces_while_valid(

350 buckets, self.log_lines, line_number))	359 buckets, self.dump_lines, line_number, appeared_addresses))

351	360

352 elif self.log_version in DUMP_DEEP_OBSOLETE:	361 elif self.dump_version in DUMP_DEEP_OBSOLETE:

353 raise ObsoleteDumpVersionException(self.log_version)	362 raise ObsoleteDumpVersionException(self.dump_version)

354	363

355 else:	364 else:

356 raise InvalidDumpException('Invalid version: %s' % self.log_version)	365 raise InvalidDumpException('Invalid version: %s' % self.dump_version)

357	366

358 def parse_global_stats(self):	367 def parse_global_stats(self):

359 """Parses lines in self.log_lines as global stats."""	368 """Parses lines in self.dump_lines as global stats."""

360 (ln, _) = self.skip_lines_while(	369 (ln, _) = self.skip_lines_while(

361 0, len(self.log_lines),	370 0, len(self.dump_lines),

362 lambda n: self.log_lines[n] != 'GLOBAL_STATS:\n')	371 lambda n: self.dump_lines[n] != 'GLOBAL_STATS:\n')

363	372

364 global_stat_names = [	373 global_stat_names = [

365 'total', 'file-exec', 'file-nonexec', 'anonymous', 'stack', 'other',	374 'total', 'file-exec', 'file-nonexec', 'anonymous', 'stack', 'other',

366 'nonprofiled-absent', 'nonprofiled-anonymous',	375 'nonprofiled-absent', 'nonprofiled-anonymous',

367 'nonprofiled-file-exec', 'nonprofiled-file-nonexec',	376 'nonprofiled-file-exec', 'nonprofiled-file-nonexec',

368 'nonprofiled-stack', 'nonprofiled-other',	377 'nonprofiled-stack', 'nonprofiled-other',

369 'profiled-mmap', 'profiled-malloc']	378 'profiled-mmap', 'profiled-malloc']

370	379

371 for prefix in global_stat_names:	380 for prefix in global_stat_names:

372 (ln, _) = self.skip_lines_while(	381 (ln, _) = self.skip_lines_while(

373 ln, len(self.log_lines),	382 ln, len(self.dump_lines),

374 lambda n: self.log_lines[n].split()[0] != prefix)	383 lambda n: self.dump_lines[n].split()[0] != prefix)

375 words = self.log_lines[ln].split()	384 words = self.dump_lines[ln].split()

376 self.counters[prefix + '_virtual'] = int(words[-2])	385 self.counters[prefix + '_virtual'] = int(words[-2])

377 self.counters[prefix + '_committed'] = int(words[-1])	386 self.counters[prefix + '_committed'] = int(words[-1])

378	387

379 def parse_version(self):	388 def parse_version(self):

380 """Parses a version string in self.log_lines.	389 """Parses a version string in self.dump_lines.

381	390

382 Returns:	391 Returns:

383 A pair of (a string representing a version of the stacktrace dump,	392 A pair of (a string representing a version of the stacktrace dump,

384 and an integer indicating a line number next to the version string).	393 and an integer indicating a line number next to the version string).

385	394

386 Raises:	395 Raises:

387 ParsingException for invalid dump versions.	396 ParsingException for invalid dump versions.

388 """	397 """

389 version = ''	398 version = ''

390	399

391 # Skip until an identifiable line.	400 # Skip until an identifiable line.

392 headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ')	401 headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ')

393 if not self.log_lines:	402 if not self.dump_lines:

394 raise EmptyDumpException('Empty heap dump file.')	403 raise EmptyDumpException('Empty heap dump file.')

395 (ln, found) = self.skip_lines_while(	404 (ln, found) = self.skip_lines_while(

396 0, len(self.log_lines),	405 0, len(self.dump_lines),

397 lambda n: not self.log_lines[n].startswith(headers))	406 lambda n: not self.dump_lines[n].startswith(headers))

398 if not found:	407 if not found:

399 raise InvalidDumpException('No version header.')	408 raise InvalidDumpException('No version header.')

400	409

401 # Identify a version.	410 # Identify a version.

402 if self.log_lines[ln].startswith('heap profile: '):	411 if self.dump_lines[ln].startswith('heap profile: '):

403 version = self.log_lines[ln][13:].strip()	412 version = self.dump_lines[ln][13:].strip()

404 if version == DUMP_DEEP_5:	413 if version == DUMP_DEEP_5:

405 (ln, _) = self.skip_lines_while(	414 (ln, _) = self.skip_lines_while(

406 ln, len(self.log_lines),	415 ln, len(self.dump_lines),

407 lambda n: self.log_lines[n] != 'STACKTRACES:\n')	416 lambda n: self.dump_lines[n] != 'STACKTRACES:\n')

408 elif version in DUMP_DEEP_OBSOLETE:	417 elif version in DUMP_DEEP_OBSOLETE:

409 raise ObsoleteDumpVersionException(version)	418 raise ObsoleteDumpVersionException(version)

410 else:	419 else:

411 raise InvalidDumpException('Invalid version: %s' % version)	420 raise InvalidDumpException('Invalid version: %s' % version)

412 elif self.log_lines[ln] == 'STACKTRACES:\n':	421 elif self.dump_lines[ln] == 'STACKTRACES:\n':

413 raise ObsoleteDumpVersionException(DUMP_DEEP_1)	422 raise ObsoleteDumpVersionException(DUMP_DEEP_1)

414 elif self.log_lines[ln] == 'MMAP_STACKTRACES:\n':	423 elif self.dump_lines[ln] == 'MMAP_STACKTRACES:\n':

415 raise ObsoleteDumpVersionException(DUMP_DEEP_2)	424 raise ObsoleteDumpVersionException(DUMP_DEEP_2)

416	425

417 return (version, ln)	426 return (version, ln)

418	427

419 def parse_log(self, buckets):	428 def parse_dump(self, buckets, appeared_addresses):

420 self.log_version, ln = self.parse_version()	429 self.dump_version, ln = self.parse_version()

421 self.parse_global_stats()	430 self.parse_global_stats()

422 self.parse_stacktraces(buckets, ln)	431 self.parse_stacktraces(buckets, ln, appeared_addresses)

423	432

424 @staticmethod	433 @staticmethod

425 def accumulate_size_for_policy(stacktrace_lines,	434 def accumulate_size_for_policy(stacktrace_lines,

426 policy_list, buckets, sizes):	435 rule_list, buckets, sizes, symbols):

427 for line in stacktrace_lines:	436 for line in stacktrace_lines:

428 words = line.split()	437 words = line.split()

429 bucket = buckets.get(int(words[BUCKET_ID]))	438 bucket = buckets.get(int(words[BUCKET_ID]))

430 component_match = get_component(policy_list, bucket)	439 component_match = get_component(rule_list, bucket, symbols)

	440
	M-A Ruel 2012/07/24 14:10:53 This new line is gratuitous. Intended? This new line is gratuitous. Intended? Dai Mikurube (NOT FULLTIME) 2012/07/24 16:19:24 It was unintended. Thanks. Show quoted text On 2012/07/24 14:10:53, Marc-Antoine Ruel wrote: > This new line is gratuitous. Intended? It was unintended. Thanks.
431 sizes[component_match] += int(words[COMMITTED])	441 sizes[component_match] += int(words[COMMITTED])

432	442

433 if component_match.startswith('tc-'):	443 if component_match.startswith('tc-'):

434 sizes['tc-total-log'] += int(words[COMMITTED])	444 sizes['tc-total-log'] += int(words[COMMITTED])

435 elif component_match.startswith('mmap-'):	445 elif component_match.startswith('mmap-'):

436 sizes['mmap-total-log'] += int(words[COMMITTED])	446 sizes['mmap-total-log'] += int(words[COMMITTED])

437 else:	447 else:

438 sizes['other-total-log'] += int(words[COMMITTED])	448 sizes['other-total-log'] += int(words[COMMITTED])

439	449

440 def apply_policy(self, policy_list, buckets, first_log_time):	450 def apply_policy(

	451 self, rule_list, buckets, first_dump_time, components, symbols):

441 """Aggregates the total memory size of each component.	452 """Aggregates the total memory size of each component.

442	453

443 Iterate through all stacktraces and attribute them to one of the components	454 Iterate through all stacktraces and attribute them to one of the components

444 based on the policy. It is important to apply policy in right order.	455 based on the policy. It is important to apply policy in right order.

445	456

446 Args:	457 Args:

447 policy_list: A list containing Policy objects. (Parsed policy data by	458 rule_list: A list of Rule objects.

448 parse_policy.)	459 buckets: A dict mapping bucket ids to Bucket objects.

449 buckets: A dict mapping bucket ids and their corresponding Bucket	460 first_dump_time: An integer representing time when the first dump is

450 objects.

451 first_log_time: An integer representing time when the first log is

452 dumped.	461 dumped.

	462 components: A list of strings of component names.

	463 symbols: A dict mapping runtime addresses to symbol names.

453	464

454 Returns:	465 Returns:

455 A dict mapping components and their corresponding sizes.	466 A dict mapping components and their corresponding sizes.

456 """	467 """

457	468

458 sys.stderr.write('apply policy:%s\n' % (self.log_path))	469 sys.stderr.write('Applying policy: "%s".\n' % self.dump_path)

459 sizes = dict((c, 0) for c in components)	470 sizes = dict((c, 0) for c in components)

460	471

461 self.accumulate_size_for_policy(self.stacktrace_lines,	472 self.accumulate_size_for_policy(self.stacktrace_lines,

462 policy_list, buckets, sizes)	473 rule_list, buckets, sizes, symbols)

463	474

464 mmap_prefix = 'profiled-mmap'	475 mmap_prefix = 'profiled-mmap'

465 malloc_prefix = 'profiled-malloc'	476 malloc_prefix = 'profiled-malloc'

466	477

467 sizes['mmap-no-log'] = (	478 sizes['mmap-no-log'] = (

468 self.counters['%s_committed' % mmap_prefix] - sizes['mmap-total-log'])	479 self.counters['%s_committed' % mmap_prefix] - sizes['mmap-total-log'])

469 sizes['mmap-total-record'] = self.counters['%s_committed' % mmap_prefix]	480 sizes['mmap-total-record'] = self.counters['%s_committed' % mmap_prefix]

470 sizes['mmap-total-record-vm'] = self.counters['%s_virtual' % mmap_prefix]	481 sizes['mmap-total-record-vm'] = self.counters['%s_virtual' % mmap_prefix]

471	482

472 sizes['tc-no-log'] = (	483 sizes['tc-no-log'] = (

(...skipping 33 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
506 'nonprofiled-file-nonexec_committed',	517 'nonprofiled-file-nonexec_committed',

507 'nonprofiled-stack_committed',	518 'nonprofiled-stack_committed',

508 'nonprofiled-other_committed')	519 'nonprofiled-other_committed')

509 sizes['mustbezero'] = (	520 sizes['mustbezero'] = (

510 self.counters['total_committed'] -	521 self.counters['total_committed'] -

511 sum(self.counters[i] for i in removed))	522 sum(self.counters[i] for i in removed))

512 if 'total-exclude-profiler' in sizes:	523 if 'total-exclude-profiler' in sizes:

513 sizes['total-exclude-profiler'] = (	524 sizes['total-exclude-profiler'] = (

514 self.counters['total_committed'] - sizes['mmap-profiler'])	525 self.counters['total_committed'] - sizes['mmap-profiler'])

515 if 'hour' in sizes:	526 if 'hour' in sizes:

516 sizes['hour'] = (self.log_time - first_log_time) / 60.0 / 60.0	527 sizes['hour'] = (self.dump_time - first_dump_time) / 60.0 / 60.0

517 if 'minute' in sizes:	528 if 'minute' in sizes:

518 sizes['minute'] = (self.log_time - first_log_time) / 60.0	529 sizes['minute'] = (self.dump_time - first_dump_time) / 60.0

519 if 'second' in sizes:	530 if 'second' in sizes:

520 sizes['second'] = self.log_time - first_log_time	531 sizes['second'] = self.dump_time - first_dump_time

521	532

522 return sizes	533 return sizes

523	534

524 @staticmethod	535 @staticmethod

525 def accumulate_size_for_expand(stacktrace_lines, policy_list, buckets,	536 def accumulate_size_for_expand(stacktrace_lines, rule_list, buckets,

526 component_name, depth, sizes):	537 component_name, depth, sizes, symbols):

527 for line in stacktrace_lines:	538 for line in stacktrace_lines:

528 words = line.split()	539 words = line.split()

529 bucket = buckets.get(int(words[BUCKET_ID]))	540 bucket = buckets.get(int(words[BUCKET_ID]))

530 component_match = get_component(policy_list, bucket)	541 component_match = get_component(rule_list, bucket, symbols)

531 if component_match == component_name:	542 if component_match == component_name:

532 stacktrace_sequence = ''	543 stacktrace_sequence = ''

533 for address in bucket.stacktrace[0 : min(len(bucket.stacktrace),	544 for address in bucket.stacktrace[0 : min(len(bucket.stacktrace),

534 1 + depth)]:	545 1 + depth)]:

535 stacktrace_sequence += address_symbol_dict[address] + ' '	546 stacktrace_sequence += symbols[address] + ' '

536 if not stacktrace_sequence in sizes:	547 if not stacktrace_sequence in sizes:

537 sizes[stacktrace_sequence] = 0	548 sizes[stacktrace_sequence] = 0

538 sizes[stacktrace_sequence] += int(words[COMMITTED])	549 sizes[stacktrace_sequence] += int(words[COMMITTED])

539	550

540 def expand(self, policy_list, buckets, component_name, depth):	551 def expand(self, rule_list, buckets, component_name, depth, symbols):

541 """Prints all stacktraces in a given component of given depth.	552 """Prints all stacktraces in a given component of given depth.

542	553

543 Args:	554 Args:

544 policy_list: A list containing Policy objects. (Parsed policy data by	555 rule_list: A list of Rule objects.

545 parse_policy.)	556 buckets: A dict mapping bucket ids to Bucket objects.

546 buckets: A dict mapping bucket ids and their corresponding Bucket

547 objects.

548 component_name: A name of component for filtering.	557 component_name: A name of component for filtering.

549 depth: An integer representing depth to be printed.	558 depth: An integer representing depth to be printed.

	559 symbols: A dict mapping runtime addresses to symbol names.

550 """	560 """

551 sizes = {}	561 sizes = {}

552	562

553 self.accumulate_size_for_expand(	563 self.accumulate_size_for_expand(

554 self.stacktrace_lines, policy_list, buckets, component_name,	564 self.stacktrace_lines, rule_list, buckets, component_name,

555 depth, sizes)	565 depth, sizes, symbols)

556	566

557 sorted_sizes_list = sorted(	567 sorted_sizes_list = sorted(

558 sizes.iteritems(), key=(lambda x: x[1]), reverse=True)	568 sizes.iteritems(), key=(lambda x: x[1]), reverse=True)

559 total = 0	569 total = 0

560 for size_pair in sorted_sizes_list:	570 for size_pair in sorted_sizes_list:

561 sys.stdout.write('%10d %s\n' % (size_pair[1], size_pair[0]))	571 sys.stdout.write('%10d %s\n' % (size_pair[1], size_pair[0]))

562 total += size_pair[1]	572 total += size_pair[1]

563 sys.stderr.write('total: %d\n' % (total))	573 sys.stderr.write('total: %d\n' % (total))

564	574

565	575

566 def update_symbols(symbol_path, mapping_lines, maps_path):	576 def update_symbols(

	577 symbol_path, maps_path, appeared_addresses, symbols):

567 """Updates address/symbol mapping on memory and in a .symbol cache file.	578 """Updates address/symbol mapping on memory and in a .symbol cache file.

568	579

569 It reads cached address/symbol mapping from a .symbol file if it exists.	580 It reads cached address/symbol mapping from a .symbol file if it exists.

570 Then, it resolves unresolved addresses from a Chrome binary with pprof.	581 Then, it resolves unresolved addresses from a Chrome binary with pprof.

571 Both mappings on memory and in a .symbol cache file are updated.	582 Both mappings on memory and in a .symbol cache file are updated.

572	583

573 Symbol files are formatted as follows:	584 Symbol files are formatted as follows:

574 <Address> <Symbol>	585 <Address> <Symbol>

575 <Address> <Symbol>	586 <Address> <Symbol>

576 <Address> <Symbol>	587 <Address> <Symbol>

577 ...	588 ...

578	589

579 Args:	590 Args:

580 symbol_path: A string representing a path for a .symbol file.	591 symbol_path: A string representing a path for a .symbol file.

581 mapping_lines: A list of strings containing /proc/.../maps.

582 maps_path: A string of the path of /proc/.../maps.	592 maps_path: A string of the path of /proc/.../maps.

	593 appeared_addresses: A list of known addresses.

	594 symbols: A dict mapping runtime addresses to symbol names.

583 """	595 """

584 with open(symbol_path, mode='a+') as symbol_f:	596 with open(symbol_path, mode='a+') as symbol_f:

585 symbol_lines = symbol_f.readlines()	597 symbol_lines = symbol_f.readlines()

586 if symbol_lines:	598 if symbol_lines:

587 for line in symbol_lines:	599 for line in symbol_lines:

588 items = line.split(None, 1)	600 items = line.split(None, 1)

589 address_symbol_dict[items[0]] = items[1].rstrip()	601 if len(items) == 1:

	602 items.append('??')

	603 symbols[items[0]] = items[1].rstrip()

	604 if symbols:

	605 sys.stderr.write(' Found %d symbols in cache.\n' % len(symbols))

	606 else:

	607 sys.stderr.write(' No symbols found in cache.\n')

590	608

591 unresolved_addresses = sorted(	609 unresolved_addresses = sorted(

592 a for a in appeared_addresses if a not in address_symbol_dict)	610 a for a in appeared_addresses if a not in symbols)

593	611

594 if unresolved_addresses:	612 if not unresolved_addresses:

	613 sys.stderr.write(' No need to resolve any more addresses.\n')

	614 else:

	615 sys.stderr.write(' %d addresses are unresolved.\n' %

	616 len(unresolved_addresses))

595 prepared_data_dir = tempfile.mkdtemp()	617 prepared_data_dir = tempfile.mkdtemp()

596 prepare_symbol_info(maps_path, prepared_data_dir)	618 prepare_symbol_info(maps_path, prepared_data_dir)

597	619

598 symbols = find_runtime_symbols_list(	620 symbol_list = find_runtime_symbols_list(

599 prepared_data_dir, unresolved_addresses)	621 prepared_data_dir, unresolved_addresses)

600	622

601 for address, symbol in zip(unresolved_addresses, symbols):	623 for address, symbol in zip(unresolved_addresses, symbol_list):

	624 if not symbol:

	625 symbol = '??'

602 stripped_symbol = symbol.strip()	626 stripped_symbol = symbol.strip()

603 address_symbol_dict[address] = stripped_symbol	627 symbols[address] = stripped_symbol

604 symbol_f.write('%s %s\n' % (address, stripped_symbol))	628 symbol_f.write('%s %s\n' % (address, stripped_symbol))

605	629

606 shutil.rmtree(prepared_data_dir)	630 shutil.rmtree(prepared_data_dir)

607	631

608	632

609 def parse_policy(policy_path):	633 def parse_policy(policy_path):

610 """Parses policy file.	634 """Parses policy file.

611	635

612 A policy file contains component's names and their	636 A policy file contains component's names and their

613 stacktrace pattern written in regular expression.	637 stacktrace pattern written in regular expression.

614 Those patterns are matched against each symbols of	638 Those patterns are matched against each symbols of

615 each stacktraces in the order written in the policy file	639 each stacktraces in the order written in the policy file

616	640

617 Args:	641 Args:

618 policy_path: A path for a policy file.	642 policy_path: A path for a policy file.

619 Returns:	643 Returns:

620 A list containing component's name and its regex object	644 A list containing component's name and its regex object

621 """	645 """

622 with open(policy_path, mode='r') as policy_f:	646 with open(policy_path, mode='r') as policy_f:

623 policy_lines = policy_f.readlines()	647 policy_lines = policy_f.readlines()

624	648

625 policy_version = POLICY_DEEP_1	649 policy_version = POLICY_DEEP_1

626 if policy_lines[0].startswith('heap profile policy: '):	650 if policy_lines[0].startswith('heap profile policy: '):

627 policy_version = policy_lines[0][21:].strip()	651 policy_version = policy_lines[0][21:].strip()

628 policy_lines.pop(0)	652 policy_lines.pop(0)

629 policy_list = []	653 rule_list = []

	654 components = []

630	655

631 if policy_version == POLICY_DEEP_2 or policy_version == POLICY_DEEP_1:	656 if policy_version == POLICY_DEEP_2 or policy_version == POLICY_DEEP_1:

632 sys.stderr.write(' heap profile policy version: %s\n' % policy_version)

633 for line in policy_lines:	657 for line in policy_lines:

634 if line[0] == '#':	658 if line[0] == '#':

635 continue	659 continue

636	660

637 if policy_version == POLICY_DEEP_2:	661 if policy_version == POLICY_DEEP_2:

638 (name, allocation_type, pattern) = line.strip().split(None, 2)	662 (name, allocation_type, pattern) = line.strip().split(None, 2)

639 mmap = False	663 mmap = False

640 if allocation_type == 'mmap':	664 if allocation_type == 'mmap':

641 mmap = True	665 mmap = True

642 elif policy_version == POLICY_DEEP_1:	666 elif policy_version == POLICY_DEEP_1:

643 name = line.split()[0]	667 name = line.split()[0]

644 pattern = line[len(name) : len(line)].strip()	668 pattern = line[len(name) : len(line)].strip()

645 mmap = False	669 mmap = False

646	670

647 if pattern != 'default':	671 if pattern != 'default':

648 policy_list.append(Policy(name, mmap, pattern))	672 rule_list.append(Rule(name, mmap, pattern))

649 if components.count(name) == 0:	673 if components.count(name) == 0:

650 components.append(name)	674 components.append(name)

651	675

652 else:	676 else:

653 sys.stderr.write(' invalid heap profile policy version: %s\n' % (	677 sys.stderr.write(' invalid heap profile policy version: %s\n' % (

654 policy_version))	678 policy_version))

655	679

656 return policy_list	680 return rule_list, policy_version, components

657	681

658	682

659 def main():	683 def find_prefix(path):

660 if (len(sys.argv) < 4) or (not (sys.argv[1] in ['--csv',	684 return re.sub('\.[0-9][0-9][0-9][0-9]\.heap', '', path)

661 '--json',

662 '--expand',

663 '--list',

664 '--stacktrace',

665 '--pprof'])):

666 sys.stderr.write("""Usage:

667 %s [options] <chrome-binary> <policy> <profile> [component-name] [depth]

668	685

669 Options:

670 --csv Output result in csv format

671 --json Output result in json format

672 --stacktrace Convert raw address to symbol names

673 --list Lists components and their sizes

674 --expand Show all stacktraces in the specified component

675 of given depth with their sizes

676 --pprof Format the profile file so it can be processed

677 by pprof

678	686

679 Examples:	687 def load_buckets(prefix):

680 dmprof --csv Debug/chrome dmpolicy hprof.12345.0001.heap > result.csv

681 dmprof --json Debug/chrome dmpolicy hprof.12345.0001.heap > result.json

682 dmprof --list Debug/chrome dmpolicy hprof.12345.0012.heap

683 dmprof --expand Debug/chrome dmpolicy hprof.12345.0012.heap tc-webkit 4

684 dmprof --pprof Debug/chrome dmpolicy hprof.12345.0012.heap > for_pprof.txt

685 """ % (sys.argv[0]))

686 sys.exit(1)

687

688 action = sys.argv[1]

689 chrome_path = sys.argv[2]

690 policy_path = sys.argv[3]

691 log_path = sys.argv[4]

692

693 sys.stderr.write('parsing a policy file\n')

694 policy_list = parse_policy(policy_path)

695

696 p = re.compile('\.[0-9][0-9][0-9][0-9]\.heap')

697 prefix = p.sub('', log_path)

698 symbol_path = prefix + '.symbols'

699

700 sys.stderr.write('parsing the maps file\n')

701 maps_path = prefix + '.maps'

702 with open(maps_path, 'r') as maps_f:

703 maps_lines = maps_f.readlines()

704

705 # Reading buckets	688 # Reading buckets

706 sys.stderr.write('parsing the bucket file\n')	689 sys.stderr.write('Loading bucket files.\n')

707 buckets = {}	690 buckets = {}

708 bucket_count = 0	691 bucket_count = 0

709 n = 0	692 n = 0

710 while True:	693 while True:

711 buckets_path = '%s.%04d.buckets' % (prefix, n)	694 buckets_path = '%s.%04d.buckets' % (prefix, n)

712 if not os.path.exists(buckets_path):	695 if not os.path.exists(buckets_path):

713 if n > 10:	696 if n > 10:

714 break	697 break

715 n += 1	698 n += 1

716 continue	699 continue

717 sys.stderr.write('reading buckets from %s\n' % (buckets_path))	700 sys.stderr.write(' %s\n' % buckets_path)

718 with open(buckets_path, 'r') as buckets_f:	701 with open(buckets_path, 'r') as buckets_f:

719 for line in buckets_f:	702 for line in buckets_f:

720 words = line.split()	703 words = line.split()

721 buckets[int(words[0])] = Bucket(words[2:], words[1] == 'mmap')	704 buckets[int(words[0])] = Bucket(words[2:], words[1] == 'mmap')

722 n += 1	705 n += 1

723	706

724 log_path_list = [log_path]	707 return buckets

725	708

726 if action in ('--csv', '--json'):	709

727 # search for the sequence of files	710 def determine_dump_path_list(dump_path, prefix):

728 n = int(log_path[len(log_path) - 9 : len(log_path) - 5])	711 dump_path_list = [dump_path]

729 n += 1 # skip current file	712

730 while True:	713 # search for the sequence of files

731 p = '%s.%04d.heap' % (prefix, n)	714 n = int(dump_path[len(dump_path) - 9 : len(dump_path) - 5])

732 if os.path.exists(p):	715 n += 1 # skip current file

733 log_path_list.append(p)	716 while True:

734 else:	717 p = '%s.%04d.heap' % (prefix, n)

735 break	718 if os.path.exists(p):

736 n += 1	719 dump_path_list.append(p)

737

738 logs = []

739 for path in log_path_list:

740 new_log = Log(path)

741 sys.stderr.write('Parsing a dump: %s\n' % path)

742 try:

743 new_log.parse_log(buckets)

744 except EmptyDumpException:

745 sys.stderr.write(' WARNING: ignored an empty dump: %s\n' % path)

746 except ParsingException, e:

747 sys.stderr.write(' Error in parsing heap profile dump: %s\n' % e)

748 sys.exit(1)

749 else:	720 else:

750 logs.append(new_log)	721 break

751	722 n += 1

752 sys.stderr.write('getting symbols\n')	723

753 update_symbols(symbol_path, maps_lines, maps_path)	724 return dump_path_list

754	725

755 # TODO(dmikurube): Many modes now. Split them into separete functions.	726

756 if action == '--stacktrace':	727 def load_single_dump(dump_path, buckets, appeared_addresses):

757 logs[0].dump_stacktrace(buckets)	728 new_dump = Dump(dump_path)

758	729 try:

759 elif action == '--csv':	730 new_dump.parse_dump(buckets, appeared_addresses)

760 sys.stdout.write(','.join(components))	731 except EmptyDumpException:

761 sys.stdout.write('\n')	732 sys.stderr.write('... ignored an empty dump')

762	733 except ParsingException, e:

763 for log in logs:	734 sys.stderr.write('... error in parsing: %s' % e)

764 component_sizes = log.apply_policy(policy_list, buckets, logs[0].log_time)	735 sys.exit(1)

	736 else:

	737 sys.stderr.write(' (version: %s)' % new_dump.dump_version)

	738

	739 return new_dump

	740

	741

	742 def load_dump(dump_path, buckets):

	743 sys.stderr.write('Loading a heap dump file: "%s"' % dump_path)

	744 appeared_addresses = set()

	745 dump = load_single_dump(dump_path, buckets, appeared_addresses)

	746 sys.stderr.write('.\n')

	747 return dump, appeared_addresses

	748

	749

	750 def load_dumps(dump_path_list, buckets):

	751 sys.stderr.write('Loading heap dump files.\n')

	752 appeared_addresses = set()

	753 dumps = []

	754 for path in dump_path_list:

	755 sys.stderr.write(' %s' % path)

	756 dumps.append(load_single_dump(path, buckets, appeared_addresses))

	757 sys.stderr.write('\n')

	758 return dumps, appeared_addresses

	759

	760

	761 def load_and_update_symbol_cache(prefix, appeared_addresses):

	762 maps_path = prefix + '.maps'

	763 symbol_path = prefix + '.symbols'

	764 sys.stderr.write('Loading and updating symbol cache: "%s".\n' % symbol_path)

	765 symbols = {}

	766 update_symbols(symbol_path, maps_path, appeared_addresses, symbols)

	767 return symbols

	768

	769

	770 def load_default_policies():

	771 with open(POLICIES_JSON_PATH, mode='r') as policies_f:

	772 default_policies = json.load(policies_f)

	773 return default_policies

	774

	775

	776 def load_policy(policies_dict, policy_label):

	777 policy_file = policies_dict[policy_label]['file']

	778 policy_path = os.path.join(os.path.dirname(__file__), policy_file)

	779 rule_list, policy_version, components = parse_policy(policy_path)

	780 sys.stderr.write(' %s: %s (version: %s)\n' %

	781 (policy_label, policy_path, policy_version))

	782 return Policy(rule_list, policy_version, components)

	783

	784

	785 def load_policies_dict(policies_dict):

	786 sys.stderr.write('Loading policy files.\n')

	787 policies = {}

	788 for policy_label in policies_dict:

	789 policies[policy_label] = load_policy(policies_dict, policy_label)

	790 return policies

	791

	792

	793 def load_policies(options_policy):

	794 default_policies = load_default_policies()

	795 if options_policy:

	796 policy_labels = options_policy.split(',')

	797 specified_policies = {}

	798 for specified_policy in policy_labels:

	799 if specified_policy in default_policies:

	800 specified_policies[specified_policy] = (

	801 default_policies[specified_policy])

	802 policies = load_policies_dict(specified_policies)

	803 else:

	804 policies = load_policies_dict(default_policies)

	805 return policies

	806

	807

	808 def do_stacktrace(sys_argv):

	809 parser = OptionParser(usage='Usage: %prog stacktrace <dump>')

	810 options, args = parser.parse_args(sys_argv)

	811

	812 if len(args) < 2:

	813 parser.error('needs 1 argument.')

	814

	815 dump_path = args[1]

	816

	817 prefix = find_prefix(dump_path)

	818 buckets = load_buckets(prefix)

	819 dump, appeared_addresses = load_dump(dump_path, buckets)

	820 symbols = load_and_update_symbol_cache(prefix, appeared_addresses)

	821

	822 dump.print_stacktrace(buckets, symbols)

	823

	824 return 0

	825

	826

	827 def do_csv(sys_argv):

	828 parser = OptionParser('Usage: %prog csv [-p POLICY] <first-dump>')

	829 parser.add_option('-p', '--policy', type='string', dest='policy',

	830 help='profile with POLICY', metavar='POLICY')

	831 options, args = parser.parse_args(sys_argv)

	832

	833 if len(args) < 2:
	M-A Ruel 2012/07/24 14:10:53 what with 10 args? what with 10 args? Dai Mikurube (NOT FULLTIME) 2012/07/24 14:53:45 It just ignores extra args. Should it warn or abo Show quoted text On 2012/07/24 14:10:53, Marc-Antoine Ruel wrote: > what with 10 args? It just ignores extra args. Should it warn or abort? M-A Ruel 2012/07/24 14:57:59 Please abort. Unless it is necessary to ignore dur Show quoted text On 2012/07/24 14:53:45, Dai Mikurube wrote: > On 2012/07/24 14:10:53, Marc-Antoine Ruel wrote: > > what with 10 args? > > It just ignores extra args. Should it warn or abort? Please abort. Unless it is necessary to ignore during a transition period. Otherwise this creates problems that are hard to debug. Dai Mikurube (NOT FULLTIME) 2012/07/24 16:19:24 Done. Show quoted text On 2012/07/24 14:57:59, Marc-Antoine Ruel wrote: > On 2012/07/24 14:53:45, Dai Mikurube wrote: > > On 2012/07/24 14:10:53, Marc-Antoine Ruel wrote: > > > what with 10 args? > > > > It just ignores extra args. Should it warn or abort? > > Please abort. Unless it is necessary to ignore during a transition period. > Otherwise this creates problems that are hard to debug. Done.
	834 parser.error('needs 1 argument.')

	835

	836 dump_path = args[1]

	837

	838 prefix = find_prefix(dump_path)

	839 buckets = load_buckets(prefix)

	840 dumps, appeared_addresses = load_dumps(

	841 determine_dump_path_list(dump_path, prefix), buckets)

	842 symbols = load_and_update_symbol_cache(prefix, appeared_addresses)

	843 policies = load_policies(options.policy)

	844

	845 max_components = 0

	846 for policy in policies:

	847 max_components = max(max_components, len(policies[policy].components))

	848

	849 for policy in sorted(policies):

	850 rule_list = policies[policy].rules

	851 components = policies[policy].components

	852

	853 if len(policies) > 1:

	854 sys.stdout.write('%s%s\n' % (policy, ',' * (max_components - 1)))

	855 sys.stdout.write('%s%s\n' % (

	856 ','.join(components), ',' * (max_components - len(components))))

	857

	858 for dump in dumps:

	859 component_sizes = dump.apply_policy(

	860 rule_list, buckets, dumps[0].dump_time, components, symbols)

765 s = []	861 s = []

766 for c in components:	862 for c in components:

767 if c in ('hour', 'minute', 'second'):	863 if c in ('hour', 'minute', 'second'):

768 s.append('%05.5f' % (component_sizes[c]))	864 s.append('%05.5f' % (component_sizes[c]))

769 else:	865 else:

770 s.append('%05.5f' % (component_sizes[c] / 1024.0 / 1024.0))	866 s.append('%05.5f' % (component_sizes[c] / 1024.0 / 1024.0))

771 sys.stdout.write(','.join(s))	867 sys.stdout.write('%s%s\n' % (

772 sys.stdout.write('\n')	868 ','.join(s), ',' * (max_components - len(components))))

773	869

774 elif action == '--json':	870 for bucket in buckets.itervalues():

775 json_base = {	871 bucket.clear_component_cache()

776 'version': 'JSON_DEEP_1',	872

	873 return 0

	874

	875

	876 def do_json(sys_argv):

	877 parser = OptionParser('Usage: %prog json [-p POLICY] <first-dump>')

	878 parser.add_option('-p', '--policy', type='string', dest='policy',

	879 help='profile with POLICY', metavar='POLICY')

	880 options, args = parser.parse_args(sys_argv)

	881

	882 if len(args) < 2:

	883 parser.error('needs 1 argument.')

	884

	885 dump_path = args[1]

	886

	887 prefix = find_prefix(dump_path)

	888 buckets = load_buckets(prefix)

	889 dumps, appeared_addresses = load_dumps(

	890 determine_dump_path_list(dump_path, prefix), buckets)

	891 symbols = load_and_update_symbol_cache(prefix, appeared_addresses)

	892 policies = load_policies(options.policy)

	893

	894 json_base = {

	895 'version': 'JSON_DEEP_2',

	896 'policies': {},

	897 }

	898

	899 for policy in sorted(policies):

	900 rule_list = policies[policy].rules

	901 components = policies[policy].components

	902

	903 json_base['policies'][policy] = {

777 'legends': components,	904 'legends': components,

778 'snapshots': [],	905 'snapshots': [],

779 }	906 }

780 for log in logs:	907

781 component_sizes = log.apply_policy(policy_list, buckets, logs[0].log_time)	908 for dump in dumps:

782 component_sizes['log_path'] = log.log_path	909 component_sizes = dump.apply_policy(

783 component_sizes['log_time'] = datetime.fromtimestamp(	910 rule_list, buckets, dumps[0].dump_time, components, symbols)

784 log.log_time).strftime('%Y-%m-%d %H:%M:%S')	911 component_sizes['dump_path'] = dump.dump_path

785 json_base['snapshots'].append(component_sizes)	912 component_sizes['dump_time'] = datetime.fromtimestamp(

786 json.dump(json_base, sys.stdout, indent=2, sort_keys=True)	913 dump.dump_time).strftime('%Y-%m-%d %H:%M:%S')

787	914 json_base['policies'][policy]['snapshots'].append(component_sizes)

788 elif action == '--list':	915

789 component_sizes = logs[0].apply_policy(	916 for bucket in buckets.itervalues():

790 policy_list, buckets, logs[0].log_time)	917 bucket.clear_component_cache()

	918

	919 json.dump(json_base, sys.stdout, indent=2, sort_keys=True)

	920

	921 return 0

	922

	923

	924 def do_list(sys_argv):

	925 parser = OptionParser('Usage: %prog [-p POLICY] list <first-dump>')

	926 parser.add_option('-p', '--policy', type='string', dest='policy',

	927 help='profile with POLICY', metavar='POLICY')

	928 options, args = parser.parse_args(sys_argv)

	929

	930 if len(args) < 2:

	931 parser.error('needs 1 argument.')

	932

	933 dump_path = args[1]

	934

	935 prefix = find_prefix(dump_path)

	936 buckets = load_buckets(prefix)

	937 dumps, appeared_addresses = load_dumps(

	938 determine_dump_path_list(dump_path, prefix), buckets)

	939 symbols = load_and_update_symbol_cache(prefix, appeared_addresses)

	940 policies = load_policies(options.policy)

	941

	942 for policy in sorted(policies):

	943 rule_list = policies[policy].rules

	944 components = policies[policy].components

	945

	946 component_sizes = dumps[0].apply_policy(

	947 rule_list, buckets, dumps[0].dump_time, components, symbols)

	948 sys.stdout.write('%s:\n' % policy)

791 for c in components:	949 for c in components:

792 if c in ['hour', 'minute', 'second']:	950 if c in ['hour', 'minute', 'second']:

793 sys.stdout.write('%30s %10.3f\n' % (c, component_sizes[c]))	951 sys.stdout.write('%30s %10.3f\n' % (c, component_sizes[c]))

794 else:	952 else:

795 sys.stdout.write('%30s %10.3f\n' % (	953 sys.stdout.write('%30s %10.3f\n' % (

796 c, component_sizes[c] / 1024.0 / 1024.0))	954 c, component_sizes[c] / 1024.0 / 1024.0))

797	955

798 elif action == '--expand':	956 for bucket in buckets.itervalues():

799 component_name = sys.argv[5]	957 bucket.clear_component_cache()

800 depth = sys.argv[6]

801 logs[0].expand(policy_list, buckets, component_name, int(depth))

802	958

803 elif action == '--pprof':	959 return 0

804 if len(sys.argv) > 5:	960

805 logs[0].dump_for_pprof(policy_list, buckets, maps_lines, sys.argv[5])	961

806 else:	962 def do_expand(sys_argv):

807 logs[0].dump_for_pprof(policy_list, buckets, maps_lines, None)	963 parser = OptionParser(

	964 'Usage: %prog expand <dump> <policy> <component> <depth>')

	965 options, args = parser.parse_args(sys_argv)

	966

	967 if len(args) < 5:

	968 parser.error('needs 4 arguments.')

	969

	970 dump_path = args[1]

	971 target_policy = args[2]

	972 component_name = args[3]

	973 depth = args[4]

	974

	975 prefix = find_prefix(dump_path)

	976 buckets = load_buckets(prefix)

	977 dump, appeared_addresses = load_dump(dump_path, buckets)

	978 symbols = load_and_update_symbol_cache(prefix, appeared_addresses)

	979 policies = load_policies(target_policy)

	980

	981 rule_list = policies[target_policy].rules

	982

	983 dump.expand(rule_list, buckets, component_name, int(depth), symbols)

	984

	985 return 0

	986

	987

	988 def do_pprof(sys_argv):

	989 parser = OptionParser(

	990 'Usage: %prog pprof [-c COMPONENT] <dump> <policy>')

	991 parser.add_option('-c', '--component', type='string', dest='component',

	992 help='restrict to COMPONENT', metavar='COMPONENT')

	993 options, args = parser.parse_args(sys_argv)

	994

	995 if len(args) < 3:

	996 parser.error('needs 2 arguments.')

	997

	998 dump_path = args[1]

	999 target_policy = args[2]

	1000 component = options.component

	1001

	1002 prefix = find_prefix(dump_path)

	1003 buckets = load_buckets(prefix)

	1004 dump, appeared_addresses = load_dump(dump_path, buckets)

	1005 symbols = load_and_update_symbol_cache(prefix, appeared_addresses)

	1006 policies = load_policies(target_policy)

	1007

	1008 rule_list = policies[target_policy].rules

	1009

	1010 with open(prefix + '.maps', 'r') as maps_f:

	1011 maps_lines = maps_f.readlines()

	1012 dump.print_for_pprof(rule_list, buckets, maps_lines, component, symbols)

	1013

	1014 return 0

	1015

	1016

	1017 def main():

	1018 COMMANDS = {

	1019 'csv': do_csv,

	1020 'expand': do_expand,

	1021 'json': do_json,

	1022 'list': do_list,

	1023 'pprof': do_pprof,

	1024 'stacktrace': do_stacktrace,

	1025 }

	1026

	1027 # TODO(dmikurube): Remove this message after a while.

	1028 if len(sys.argv) >= 2 and sys.argv[1].startswith('--'):

	1029 sys.stderr.write("""

	1030 ************** NOTICE!! **************

	1031 The command line format has changed.

	1032 Please look at the description below.

	1033 ******************************************

	1034

	1035 """)

	1036

	1037 if len(sys.argv) < 2 or (not sys.argv[1] in COMMANDS):

	1038 sys.stderr.write("""Usage: %s <command> [options] [<args>]

	1039

	1040 Commands:

	1041 csv Classify memory usage in CSV

	1042 expand Show all stacktraces contained in the specified component

	1043 json Classify memory usage in JSON

	1044 list Classify memory usage in simple listing format

	1045 pprof Format the profile dump so that it can be processed by pprof

	1046 stacktrace Convert runtime addresses to symbol names

	1047

	1048 Quick Reference:

	1049 dmprof csv [-p POLICY] <first-dump>

	1050 dmprof expand <dump> <policy> <component> <depth>

	1051 dmprof json [-p POLICY] <first-dump>

	1052 dmprof list [-p POLICY] <first-dump>

	1053 dmprof pprof [-c COMPONENT] <dump> <policy>

	1054 dmprof stacktrace <dump>

	1055 """ % (sys.argv[0]))

	1056 sys.exit(1)

	1057 action = sys.argv.pop(1)

	1058

	1059 return COMMANDS[action](sys.argv)

808	1060

809	1061

810 if __name__ == '__main__':	1062 if __name__ == '__main__':

811 sys.exit(main())	1063 sys.exit(main())

OLD	NEW

« no previous file with comments | « no previous file | tools/deep_memory_profiler/policies.json » ('j') | no next file with comments »