Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(617)

Side by Side Diff: tools/deep_memory_profiler/dmprof

Issue 10802049: Change dmprof commandline format, and clean up start-up routines. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: policy setting. Created 8 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | tools/deep_memory_profiler/policies.json » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be 3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file. 4 # found in the LICENSE file.
5 5
6 """The deep heap profiler script for Chrome.""" 6 """The deep heap profiler script for Chrome."""
7 7
8 from datetime import datetime 8 from datetime import datetime
9 import json 9 import json
10 import os 10 import os
11 import re 11 import re
12 from optparse import OptionParser
M-A Ruel 2012/07/24 14:10:53 just import optparse
Dai Mikurube (NOT FULLTIME) 2012/07/24 16:19:24 Done.
12 import shutil 13 import shutil
13 import subprocess 14 import subprocess
14 import sys 15 import sys
15 import tempfile 16 import tempfile
16 17
17 FIND_RUNTIME_SYMBOLS_PATH = os.path.join( 18 FIND_RUNTIME_SYMBOLS_PATH = os.path.join(
18 os.path.dirname(os.path.abspath(__file__)), 19 os.path.dirname(os.path.abspath(__file__)),
19 os.pardir, 20 os.pardir,
20 'find_runtime_symbols') 21 'find_runtime_symbols')
21 sys.path.append(FIND_RUNTIME_SYMBOLS_PATH) 22 sys.path.append(FIND_RUNTIME_SYMBOLS_PATH)
22 23
23 from prepare_symbol_info import prepare_symbol_info 24 from prepare_symbol_info import prepare_symbol_info
24 from find_runtime_symbols import find_runtime_symbols_list 25 from find_runtime_symbols import find_runtime_symbols_list
25 26
26 BUCKET_ID = 5 27 BUCKET_ID = 5
27 VIRTUAL = 0 28 VIRTUAL = 0
28 COMMITTED = 1 29 COMMITTED = 1
29 ALLOC_COUNT = 2 30 ALLOC_COUNT = 2
30 FREE_COUNT = 3 31 FREE_COUNT = 3
31 NULL_REGEX = re.compile('') 32 NULL_REGEX = re.compile('')
32 33
34 POLICIES_JSON_PATH = os.path.join(os.path.dirname(__file__), 'policies.json')
M-A Ruel 2012/07/24 14:10:53 no
Dai Mikurube (NOT FULLTIME) 2012/07/24 14:53:45 Sorry, what do you mean by this?
35
33 # Heap Profile Dump versions 36 # Heap Profile Dump versions
34 37
35 # DUMP_DEEP_1 is OBSOLETE. 38 # DUMP_DEEP_1 is OBSOLETE.
36 # DUMP_DEEP_1 DOES NOT distinct mmap regions and malloc chunks. 39 # DUMP_DEEP_1 DOES NOT distinct mmap regions and malloc chunks.
37 # Their stacktraces DO contain mmap* or tc-* at their tops. 40 # Their stacktraces DO contain mmap* or tc-* at their tops.
38 # They should be processed by POLICY_DEEP_1. 41 # They should be processed by POLICY_DEEP_1.
39 DUMP_DEEP_1 = 'DUMP_DEEP_1' 42 DUMP_DEEP_1 = 'DUMP_DEEP_1'
40 43
41 # DUMP_DEEP_2 is OBSOLETE. 44 # DUMP_DEEP_2 is OBSOLETE.
42 # DUMP_DEEP_2 DOES distinct mmap regions and malloc chunks. 45 # DUMP_DEEP_2 DOES distinct mmap regions and malloc chunks.
(...skipping 22 matching lines...) Expand all
65 # Heap Profile Policy versions 68 # Heap Profile Policy versions
66 69
67 # POLICY_DEEP_1 DOES NOT include allocation_type columns. 70 # POLICY_DEEP_1 DOES NOT include allocation_type columns.
68 # mmap regions are distincted w/ mmap frames in the pattern column. 71 # mmap regions are distincted w/ mmap frames in the pattern column.
69 POLICY_DEEP_1 = 'POLICY_DEEP_1' 72 POLICY_DEEP_1 = 'POLICY_DEEP_1'
70 73
71 # POLICY_DEEP_2 DOES include allocation_type columns. 74 # POLICY_DEEP_2 DOES include allocation_type columns.
72 # mmap regions are distincted w/ the allocation_type column. 75 # mmap regions are distincted w/ the allocation_type column.
73 POLICY_DEEP_2 = 'POLICY_DEEP_2' 76 POLICY_DEEP_2 = 'POLICY_DEEP_2'
74 77
75 # TODO(dmikurube): Avoid global variables.
76 address_symbol_dict = {}
77 appeared_addresses = set()
78 components = []
79
80 78
81 class EmptyDumpException(Exception): 79 class EmptyDumpException(Exception):
82 def __init__(self, value): 80 def __init__(self, value):
83 self.value = value 81 self.value = value
84 def __str__(self): 82 def __str__(self):
85 return repr(self.value) 83 return repr(self.value)
86 84
87 85
88 class ParsingException(Exception): 86 class ParsingException(Exception):
89 def __init__(self, value): 87 def __init__(self, value):
90 self.value = value 88 self.value = value
91 def __str__(self): 89 def __str__(self):
92 return repr(self.value) 90 return repr(self.value)
93 91
94 92
95 class InvalidDumpException(ParsingException): 93 class InvalidDumpException(ParsingException):
96 def __init__(self, value): 94 def __init__(self, value):
97 self.value = value 95 self.value = value
98 def __str__(self): 96 def __str__(self):
99 return "invalid heap profile dump: %s" % repr(self.value) 97 return "invalid heap profile dump: %s" % repr(self.value)
100 98
101 99
102 class ObsoleteDumpVersionException(ParsingException): 100 class ObsoleteDumpVersionException(ParsingException):
103 def __init__(self, value): 101 def __init__(self, value):
104 self.value = value 102 self.value = value
105 def __str__(self): 103 def __str__(self):
106 return "obsolete heap profile dump version: %s" % repr(self.value) 104 return "obsolete heap profile dump version: %s" % repr(self.value)
107 105
108 106
109 class Policy(object): 107 class Rule(object):
108 """Represents one matching rule in a policy file."""
110 109
111 def __init__(self, name, mmap, pattern): 110 def __init__(self, name, mmap, pattern):
112 self.name = name 111 self.name = name
113 self.mmap = mmap 112 self.mmap = mmap
114 self.condition = re.compile(pattern + r'\Z') 113 self.condition = re.compile(pattern + r'\Z')
115 114
116 115
117 def get_component(policy_list, bucket): 116 class Policy(object):
117 """Represents a policy, a content of a policy file."""
118
119 def __init__(self, rules, version, components):
120 self.rules = rules
121 self.version = version
122 self.components = components
123
124 def append_rule(self, rule):
125 self.rules.append(rule)
126
127
128 def get_component(rule_list, bucket, symbols):
118 """Returns a component name which a given bucket belongs to. 129 """Returns a component name which a given bucket belongs to.
119 130
120 Args: 131 Args:
121 policy_list: A list containing Policy objects. (Parsed policy data by 132 rule_list: A list of Rule objects.
122 parse_policy.)
123 bucket: A Bucket object to be searched for. 133 bucket: A Bucket object to be searched for.
134 symbols: A dict mapping runtime addresses to symbol names.
124 135
125 Returns: 136 Returns:
126 A string representing a component name. 137 A string representing a component name.
127 """ 138 """
128 if not bucket: 139 if not bucket:
129 return 'no-bucket' 140 return 'no-bucket'
130 if bucket.component: 141 if bucket.component_cache:
131 return bucket.component 142 return bucket.component_cache
132 143
133 stacktrace = ''.join( 144 stacktrace = ''.join(symbols[a] + ' ' for a in bucket.stacktrace).strip()
134 address_symbol_dict[a] + ' ' for a in bucket.stacktrace).strip()
135 145
136 for policy in policy_list: 146 for rule in rule_list:
137 if bucket.mmap == policy.mmap and policy.condition.match(stacktrace): 147 if bucket.mmap == rule.mmap and rule.condition.match(stacktrace):
138 bucket.component = policy.name 148 bucket.component_cache = rule.name
139 return policy.name 149 return rule.name
140 150
141 assert False 151 assert False
142 152
143 153
144 class Bucket(object): 154 class Bucket(object):
155 """Represents a bucket, which is a unit of memory classification."""
145 156
146 def __init__(self, stacktrace, mmap): 157 def __init__(self, stacktrace, mmap):
147 self.stacktrace = stacktrace 158 self.stacktrace = stacktrace
148 self.mmap = mmap 159 self.mmap = mmap
149 self.component = '' 160 self.component_cache = ''
161
162 def clear_component_cache(self):
163 self.component_cache = ''
150 164
151 165
152 class Log(object): 166 class Dump(object):
167 """Represents one heap profile dump."""
153 168
154 """A class representing one dumped log data.""" 169 def __init__(self, dump_path):
155 def __init__(self, log_path): 170 self.dump_path = dump_path
156 self.log_path = log_path 171 self.dump_lines = [
157 self.log_lines = [ 172 l for l in open(self.dump_path, 'r') if l and not l.startswith('#')]
158 l for l in open(self.log_path, 'r') if l and not l.startswith('#')] 173 self.dump_version = ''
159 self.log_version = ''
160 sys.stderr.write('Loading a dump: %s\n' % log_path)
161 self.stacktrace_lines = [] 174 self.stacktrace_lines = []
162 self.counters = {} 175 self.counters = {}
163 self.log_time = os.stat(self.log_path).st_mtime 176 self.dump_time = os.stat(self.dump_path).st_mtime
164 177
165 def dump_stacktrace(buckets): 178 def print_stacktrace(self, buckets, symbols):
166 """Prints a given stacktrace. 179 """Prints a given stacktrace.
167 180
168 Args: 181 Args:
169 buckets: A dict mapping bucket ids and their corresponding Bucket 182 buckets: A dict mapping bucket ids to Bucket objects.
170 objects. 183 symbols: A dict mapping runtime addresses to symbol names.
171 """ 184 """
172 for line in self.stacktrace_lines: 185 for line in self.stacktrace_lines:
173 words = line.split() 186 words = line.split()
174 bucket = buckets.get(int(words[BUCKET_ID])) 187 bucket = buckets.get(int(words[BUCKET_ID]))
175 if not bucket: 188 if not bucket:
176 continue 189 continue
177 for i in range(0, BUCKET_ID - 1): 190 for i in range(0, BUCKET_ID - 1):
178 sys.stdout.write(words[i] + ' ') 191 sys.stdout.write(words[i] + ' ')
179 for address in bucket.stacktrace: 192 for address in bucket.stacktrace:
180 sys.stdout.write((address_symbol_dict.get(address) or address) + ' ') 193 sys.stdout.write((symbols.get(address) or address) + ' ')
181 sys.stdout.write('\n') 194 sys.stdout.write('\n')
182 195
183 @staticmethod 196 @staticmethod
184 def accumulate_size_for_pprof(stacktrace_lines, policy_list, buckets, 197 def accumulate_size_for_pprof(stacktrace_lines, rule_list, buckets,
185 component_name): 198 component_name, symbols):
186 """Accumulates size of committed chunks and the number of allocated chunks. 199 """Accumulates size of committed chunks and the number of allocated chunks.
187 200
188 Args: 201 Args:
189 stacktrace_lines: A list of strings which are valid as stacktraces. 202 stacktrace_lines: A list of strings which are valid as stacktraces.
190 policy_list: A list containing Policy objects. (Parsed policy data by 203 rule_list: A list of Rule objects.
191 parse_policy.) 204 buckets: A dict mapping bucket ids to Bucket objects.
192 buckets: A dict mapping bucket ids and their corresponding Bucket
193 objects.
194 component_name: A name of component for filtering. 205 component_name: A name of component for filtering.
206 symbols: A dict mapping runtime addresses to symbol names.
195 207
196 Returns: 208 Returns:
197 Two integers which are the accumulated size of committed regions and the 209 Two integers which are the accumulated size of committed regions and the
198 number of allocated chunks, respectively. 210 number of allocated chunks, respectively.
199 """ 211 """
200 com_committed = 0 212 com_committed = 0
201 com_allocs = 0 213 com_allocs = 0
202 for line in stacktrace_lines: 214 for line in stacktrace_lines:
203 words = line.split() 215 words = line.split()
204 bucket = buckets.get(int(words[BUCKET_ID])) 216 bucket = buckets.get(int(words[BUCKET_ID]))
205 if (not bucket or 217 if (not bucket or
206 (component_name and 218 (component_name and
207 component_name != get_component(policy_list, bucket))): 219 component_name != get_component(rule_list, bucket, symbols))):
208 continue 220 continue
209 221
210 com_committed += int(words[COMMITTED]) 222 com_committed += int(words[COMMITTED])
211 com_allocs += int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]) 223 com_allocs += int(words[ALLOC_COUNT]) - int(words[FREE_COUNT])
212 224
213 return com_committed, com_allocs 225 return com_committed, com_allocs
214 226
215 @staticmethod 227 @staticmethod
216 def dump_stacktrace_lines_for_pprof(stacktrace_lines, policy_list, 228 def print_stacktrace_lines_for_pprof(stacktrace_lines, rule_list,
217 buckets, component_name): 229 buckets, component_name, symbols):
218 """Prints information of stacktrace lines for pprof. 230 """Prints information of stacktrace lines for pprof.
219 231
220 Args: 232 Args:
221 stacktrace_lines: A list of strings which are valid as stacktraces. 233 stacktrace_lines: A list of strings which are valid as stacktraces.
222 policy_list: A list containing Policy objects. (Parsed policy data by 234 rule_list: A list of Rule objects.
223 parse_policy.) 235 buckets: A dict mapping bucket ids to Bucket objects.
224 buckets: A dict mapping bucket ids and their corresponding Bucket
225 objects.
226 component_name: A name of component for filtering. 236 component_name: A name of component for filtering.
237 symbols: A dict mapping runtime addresses to symbol names.
227 """ 238 """
228 for line in stacktrace_lines: 239 for line in stacktrace_lines:
229 words = line.split() 240 words = line.split()
230 bucket = buckets.get(int(words[BUCKET_ID])) 241 bucket = buckets.get(int(words[BUCKET_ID]))
231 if (not bucket or 242 if (not bucket or
232 (component_name and 243 (component_name and
233 component_name != get_component(policy_list, bucket))): 244 component_name != get_component(rule_list, bucket, symbols))):
234 continue 245 continue
235 246
236 sys.stdout.write('%6d: %8s [%6d: %8s] @' % ( 247 sys.stdout.write('%6d: %8s [%6d: %8s] @' % (
237 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]), 248 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]),
238 words[COMMITTED], 249 words[COMMITTED],
239 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]), 250 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]),
240 words[COMMITTED])) 251 words[COMMITTED]))
241 for address in bucket.stacktrace: 252 for address in bucket.stacktrace:
242 sys.stdout.write(' ' + address) 253 sys.stdout.write(' ' + address)
243 sys.stdout.write('\n') 254 sys.stdout.write('\n')
244 255
245 def dump_for_pprof(self, policy_list, buckets, mapping_lines, component_name): 256 def print_for_pprof(
246 """Converts the log file so it can be processed by pprof. 257 self, rule_list, buckets, maps_lines, component_name, symbols):
258 """Converts the heap profile dump so it can be processed by pprof.
247 259
248 Args: 260 Args:
249 policy_list: A list containing Policy objects. (Parsed policy data by 261 rule_list: A list of Rule objects.
250 parse_policy.) 262 buckets: A dict mapping bucket ids to Bucket objects.
251 buckets: A dict mapping bucket ids and their corresponding Bucket 263 maps_lines: A list of strings containing /proc/.../maps.
252 objects.
253 mapping_lines: A list of strings containing /proc/.../maps.
254 component_name: A name of component for filtering. 264 component_name: A name of component for filtering.
265 symbols: A dict mapping runtime addresses to symbol names.
255 """ 266 """
256 sys.stdout.write('heap profile: ') 267 sys.stdout.write('heap profile: ')
257 com_committed, com_allocs = self.accumulate_size_for_pprof( 268 com_committed, com_allocs = self.accumulate_size_for_pprof(
258 self.stacktrace_lines, policy_list, buckets, component_name) 269 self.stacktrace_lines, rule_list, buckets, component_name, symbols)
259 270
260 sys.stdout.write('%6d: %8s [%6d: %8s] @ heapprofile\n' % ( 271 sys.stdout.write('%6d: %8s [%6d: %8s] @ heapprofile\n' % (
261 com_allocs, com_committed, com_allocs, com_committed)) 272 com_allocs, com_committed, com_allocs, com_committed))
262 273
263 self.dump_stacktrace_lines_for_pprof( 274 self.print_stacktrace_lines_for_pprof(
264 self.stacktrace_lines, policy_list, buckets, component_name) 275 self.stacktrace_lines, rule_list, buckets, component_name, symbols)
265 276
266 sys.stdout.write('MAPPED_LIBRARIES:\n') 277 sys.stdout.write('MAPPED_LIBRARIES:\n')
267 for line in mapping_lines: 278 for line in maps_lines:
268 sys.stdout.write(line) 279 sys.stdout.write(line)
269 280
270 @staticmethod 281 @staticmethod
271 def check_stacktrace_line(stacktrace_line, buckets): 282 def check_stacktrace_line(stacktrace_line, buckets, appeared_addresses):
272 """Checks if a given stacktrace_line is valid as stacktrace. 283 """Checks if a given stacktrace_line is valid as stacktrace.
273 284
274 Args: 285 Args:
275 stacktrace_line: A string to be checked. 286 stacktrace_line: A string to be checked.
276 buckets: A dict mapping bucket ids and their corresponding Bucket 287 buckets: A dict mapping bucket ids to Bucket objects.
277 objects. 288 appeared_addresses: A list where appeared addresses will be stored.
278 289
279 Returns: 290 Returns:
280 True if the given stacktrace_line is valid. 291 True if the given stacktrace_line is valid.
281 """ 292 """
282 words = stacktrace_line.split() 293 words = stacktrace_line.split()
283 if len(words) < BUCKET_ID + 1: 294 if len(words) < BUCKET_ID + 1:
284 return False 295 return False
285 if words[BUCKET_ID - 1] != '@': 296 if words[BUCKET_ID - 1] != '@':
286 return False 297 return False
287 bucket = buckets.get(int(words[BUCKET_ID])) 298 bucket = buckets.get(int(words[BUCKET_ID]))
(...skipping 10 matching lines...) Expand all
298 A pair of an integer indicating a line number after skipped, and a 309 A pair of an integer indicating a line number after skipped, and a
299 boolean value which is True if found a line which skipping_condition 310 boolean value which is True if found a line which skipping_condition
300 is False for. 311 is False for.
301 """ 312 """
302 while skipping_condition(line_number): 313 while skipping_condition(line_number):
303 line_number += 1 314 line_number += 1
304 if line_number >= max_line_number: 315 if line_number >= max_line_number:
305 return line_number, False 316 return line_number, False
306 return line_number, True 317 return line_number, True
307 318
308 def parse_stacktraces_while_valid(self, buckets, log_lines, line_number): 319 def parse_stacktraces_while_valid(
320 self, buckets, dump_lines, line_number, appeared_addresses):
309 """Parses stacktrace lines while the lines are valid. 321 """Parses stacktrace lines while the lines are valid.
310 322
311 Args: 323 Args:
312 buckets: A dict mapping bucket ids and their corresponding Bucket 324 buckets: A dict mapping bucket ids to Bucket objects.
313 objects. 325 dump_lines: A list of lines to be parsed.
314 log_lines: A list of lines to be parsed. 326 line_number: A line number to start parsing in dump_lines.
315 line_number: An integer representing the starting line number in 327 appeared_addresses: A list where appeared addresses will be stored.
316 log_lines.
317 328
318 Returns: 329 Returns:
319 A pair of a list of valid lines and an integer representing the last 330 A pair of a list of valid lines and an integer representing the last
320 line number in log_lines. 331 line number in dump_lines.
321 """ 332 """
322 (line_number, _) = self.skip_lines_while( 333 (line_number, _) = self.skip_lines_while(
323 line_number, len(log_lines), 334 line_number, len(dump_lines),
324 lambda n: not log_lines[n].split()[0].isdigit()) 335 lambda n: not dump_lines[n].split()[0].isdigit())
325 stacktrace_lines_start = line_number 336 stacktrace_lines_start = line_number
326 (line_number, _) = self.skip_lines_while( 337 (line_number, _) = self.skip_lines_while(
327 line_number, len(log_lines), 338 line_number, len(dump_lines),
328 lambda n: self.check_stacktrace_line(log_lines[n], buckets)) 339 lambda n: self.check_stacktrace_line(
329 return (log_lines[stacktrace_lines_start:line_number], line_number) 340 dump_lines[n], buckets, appeared_addresses))
341 return (dump_lines[stacktrace_lines_start:line_number], line_number)
330 342
331 def parse_stacktraces(self, buckets, line_number): 343 def parse_stacktraces(self, buckets, line_number, appeared_addresses):
332 """Parses lines in self.log_lines as stacktrace. 344 """Parses lines in self.dump_lines as stacktrace.
333 345
334 Valid stacktrace lines are stored into self.stacktrace_lines. 346 Valid stacktrace lines are stored into self.stacktrace_lines.
335 347
336 Args: 348 Args:
337 buckets: A dict mapping bucket ids and their corresponding Bucket 349 buckets: A dict mapping bucket ids to Bucket objects.
338 objects. 350 line_number: A line number to start parsing in dump_lines.
339 line_number: An integer representing the starting line number in 351 appeared_addresses: A list where appeared addresses will be stored.
340 log_lines.
341 352
342 Raises: 353 Raises:
343 ParsingException for invalid dump versions. 354 ParsingException for invalid dump versions.
344 """ 355 """
345 sys.stderr.write(' Version: %s\n' % self.log_version) 356 if self.dump_version == DUMP_DEEP_5:
346
347 if self.log_version == DUMP_DEEP_5:
348 (self.stacktrace_lines, line_number) = ( 357 (self.stacktrace_lines, line_number) = (
349 self.parse_stacktraces_while_valid( 358 self.parse_stacktraces_while_valid(
350 buckets, self.log_lines, line_number)) 359 buckets, self.dump_lines, line_number, appeared_addresses))
351 360
352 elif self.log_version in DUMP_DEEP_OBSOLETE: 361 elif self.dump_version in DUMP_DEEP_OBSOLETE:
353 raise ObsoleteDumpVersionException(self.log_version) 362 raise ObsoleteDumpVersionException(self.dump_version)
354 363
355 else: 364 else:
356 raise InvalidDumpException('Invalid version: %s' % self.log_version) 365 raise InvalidDumpException('Invalid version: %s' % self.dump_version)
357 366
358 def parse_global_stats(self): 367 def parse_global_stats(self):
359 """Parses lines in self.log_lines as global stats.""" 368 """Parses lines in self.dump_lines as global stats."""
360 (ln, _) = self.skip_lines_while( 369 (ln, _) = self.skip_lines_while(
361 0, len(self.log_lines), 370 0, len(self.dump_lines),
362 lambda n: self.log_lines[n] != 'GLOBAL_STATS:\n') 371 lambda n: self.dump_lines[n] != 'GLOBAL_STATS:\n')
363 372
364 global_stat_names = [ 373 global_stat_names = [
365 'total', 'file-exec', 'file-nonexec', 'anonymous', 'stack', 'other', 374 'total', 'file-exec', 'file-nonexec', 'anonymous', 'stack', 'other',
366 'nonprofiled-absent', 'nonprofiled-anonymous', 375 'nonprofiled-absent', 'nonprofiled-anonymous',
367 'nonprofiled-file-exec', 'nonprofiled-file-nonexec', 376 'nonprofiled-file-exec', 'nonprofiled-file-nonexec',
368 'nonprofiled-stack', 'nonprofiled-other', 377 'nonprofiled-stack', 'nonprofiled-other',
369 'profiled-mmap', 'profiled-malloc'] 378 'profiled-mmap', 'profiled-malloc']
370 379
371 for prefix in global_stat_names: 380 for prefix in global_stat_names:
372 (ln, _) = self.skip_lines_while( 381 (ln, _) = self.skip_lines_while(
373 ln, len(self.log_lines), 382 ln, len(self.dump_lines),
374 lambda n: self.log_lines[n].split()[0] != prefix) 383 lambda n: self.dump_lines[n].split()[0] != prefix)
375 words = self.log_lines[ln].split() 384 words = self.dump_lines[ln].split()
376 self.counters[prefix + '_virtual'] = int(words[-2]) 385 self.counters[prefix + '_virtual'] = int(words[-2])
377 self.counters[prefix + '_committed'] = int(words[-1]) 386 self.counters[prefix + '_committed'] = int(words[-1])
378 387
379 def parse_version(self): 388 def parse_version(self):
380 """Parses a version string in self.log_lines. 389 """Parses a version string in self.dump_lines.
381 390
382 Returns: 391 Returns:
383 A pair of (a string representing a version of the stacktrace dump, 392 A pair of (a string representing a version of the stacktrace dump,
384 and an integer indicating a line number next to the version string). 393 and an integer indicating a line number next to the version string).
385 394
386 Raises: 395 Raises:
387 ParsingException for invalid dump versions. 396 ParsingException for invalid dump versions.
388 """ 397 """
389 version = '' 398 version = ''
390 399
391 # Skip until an identifiable line. 400 # Skip until an identifiable line.
392 headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ') 401 headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ')
393 if not self.log_lines: 402 if not self.dump_lines:
394 raise EmptyDumpException('Empty heap dump file.') 403 raise EmptyDumpException('Empty heap dump file.')
395 (ln, found) = self.skip_lines_while( 404 (ln, found) = self.skip_lines_while(
396 0, len(self.log_lines), 405 0, len(self.dump_lines),
397 lambda n: not self.log_lines[n].startswith(headers)) 406 lambda n: not self.dump_lines[n].startswith(headers))
398 if not found: 407 if not found:
399 raise InvalidDumpException('No version header.') 408 raise InvalidDumpException('No version header.')
400 409
401 # Identify a version. 410 # Identify a version.
402 if self.log_lines[ln].startswith('heap profile: '): 411 if self.dump_lines[ln].startswith('heap profile: '):
403 version = self.log_lines[ln][13:].strip() 412 version = self.dump_lines[ln][13:].strip()
404 if version == DUMP_DEEP_5: 413 if version == DUMP_DEEP_5:
405 (ln, _) = self.skip_lines_while( 414 (ln, _) = self.skip_lines_while(
406 ln, len(self.log_lines), 415 ln, len(self.dump_lines),
407 lambda n: self.log_lines[n] != 'STACKTRACES:\n') 416 lambda n: self.dump_lines[n] != 'STACKTRACES:\n')
408 elif version in DUMP_DEEP_OBSOLETE: 417 elif version in DUMP_DEEP_OBSOLETE:
409 raise ObsoleteDumpVersionException(version) 418 raise ObsoleteDumpVersionException(version)
410 else: 419 else:
411 raise InvalidDumpException('Invalid version: %s' % version) 420 raise InvalidDumpException('Invalid version: %s' % version)
412 elif self.log_lines[ln] == 'STACKTRACES:\n': 421 elif self.dump_lines[ln] == 'STACKTRACES:\n':
413 raise ObsoleteDumpVersionException(DUMP_DEEP_1) 422 raise ObsoleteDumpVersionException(DUMP_DEEP_1)
414 elif self.log_lines[ln] == 'MMAP_STACKTRACES:\n': 423 elif self.dump_lines[ln] == 'MMAP_STACKTRACES:\n':
415 raise ObsoleteDumpVersionException(DUMP_DEEP_2) 424 raise ObsoleteDumpVersionException(DUMP_DEEP_2)
416 425
417 return (version, ln) 426 return (version, ln)
418 427
419 def parse_log(self, buckets): 428 def parse_dump(self, buckets, appeared_addresses):
420 self.log_version, ln = self.parse_version() 429 self.dump_version, ln = self.parse_version()
421 self.parse_global_stats() 430 self.parse_global_stats()
422 self.parse_stacktraces(buckets, ln) 431 self.parse_stacktraces(buckets, ln, appeared_addresses)
423 432
424 @staticmethod 433 @staticmethod
425 def accumulate_size_for_policy(stacktrace_lines, 434 def accumulate_size_for_policy(stacktrace_lines,
426 policy_list, buckets, sizes): 435 rule_list, buckets, sizes, symbols):
427 for line in stacktrace_lines: 436 for line in stacktrace_lines:
428 words = line.split() 437 words = line.split()
429 bucket = buckets.get(int(words[BUCKET_ID])) 438 bucket = buckets.get(int(words[BUCKET_ID]))
430 component_match = get_component(policy_list, bucket) 439 component_match = get_component(rule_list, bucket, symbols)
440
M-A Ruel 2012/07/24 14:10:53 This new line is gratuitous. Intended?
Dai Mikurube (NOT FULLTIME) 2012/07/24 16:19:24 It was unintended. Thanks.
431 sizes[component_match] += int(words[COMMITTED]) 441 sizes[component_match] += int(words[COMMITTED])
432 442
433 if component_match.startswith('tc-'): 443 if component_match.startswith('tc-'):
434 sizes['tc-total-log'] += int(words[COMMITTED]) 444 sizes['tc-total-log'] += int(words[COMMITTED])
435 elif component_match.startswith('mmap-'): 445 elif component_match.startswith('mmap-'):
436 sizes['mmap-total-log'] += int(words[COMMITTED]) 446 sizes['mmap-total-log'] += int(words[COMMITTED])
437 else: 447 else:
438 sizes['other-total-log'] += int(words[COMMITTED]) 448 sizes['other-total-log'] += int(words[COMMITTED])
439 449
440 def apply_policy(self, policy_list, buckets, first_log_time): 450 def apply_policy(
451 self, rule_list, buckets, first_dump_time, components, symbols):
441 """Aggregates the total memory size of each component. 452 """Aggregates the total memory size of each component.
442 453
443 Iterate through all stacktraces and attribute them to one of the components 454 Iterate through all stacktraces and attribute them to one of the components
444 based on the policy. It is important to apply policy in right order. 455 based on the policy. It is important to apply policy in right order.
445 456
446 Args: 457 Args:
447 policy_list: A list containing Policy objects. (Parsed policy data by 458 rule_list: A list of Rule objects.
448 parse_policy.) 459 buckets: A dict mapping bucket ids to Bucket objects.
449 buckets: A dict mapping bucket ids and their corresponding Bucket 460 first_dump_time: An integer representing time when the first dump is
450 objects.
451 first_log_time: An integer representing time when the first log is
452 dumped. 461 dumped.
462 components: A list of strings of component names.
463 symbols: A dict mapping runtime addresses to symbol names.
453 464
454 Returns: 465 Returns:
455 A dict mapping components and their corresponding sizes. 466 A dict mapping components and their corresponding sizes.
456 """ 467 """
457 468
458 sys.stderr.write('apply policy:%s\n' % (self.log_path)) 469 sys.stderr.write('Applying policy: "%s".\n' % self.dump_path)
459 sizes = dict((c, 0) for c in components) 470 sizes = dict((c, 0) for c in components)
460 471
461 self.accumulate_size_for_policy(self.stacktrace_lines, 472 self.accumulate_size_for_policy(self.stacktrace_lines,
462 policy_list, buckets, sizes) 473 rule_list, buckets, sizes, symbols)
463 474
464 mmap_prefix = 'profiled-mmap' 475 mmap_prefix = 'profiled-mmap'
465 malloc_prefix = 'profiled-malloc' 476 malloc_prefix = 'profiled-malloc'
466 477
467 sizes['mmap-no-log'] = ( 478 sizes['mmap-no-log'] = (
468 self.counters['%s_committed' % mmap_prefix] - sizes['mmap-total-log']) 479 self.counters['%s_committed' % mmap_prefix] - sizes['mmap-total-log'])
469 sizes['mmap-total-record'] = self.counters['%s_committed' % mmap_prefix] 480 sizes['mmap-total-record'] = self.counters['%s_committed' % mmap_prefix]
470 sizes['mmap-total-record-vm'] = self.counters['%s_virtual' % mmap_prefix] 481 sizes['mmap-total-record-vm'] = self.counters['%s_virtual' % mmap_prefix]
471 482
472 sizes['tc-no-log'] = ( 483 sizes['tc-no-log'] = (
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
506 'nonprofiled-file-nonexec_committed', 517 'nonprofiled-file-nonexec_committed',
507 'nonprofiled-stack_committed', 518 'nonprofiled-stack_committed',
508 'nonprofiled-other_committed') 519 'nonprofiled-other_committed')
509 sizes['mustbezero'] = ( 520 sizes['mustbezero'] = (
510 self.counters['total_committed'] - 521 self.counters['total_committed'] -
511 sum(self.counters[i] for i in removed)) 522 sum(self.counters[i] for i in removed))
512 if 'total-exclude-profiler' in sizes: 523 if 'total-exclude-profiler' in sizes:
513 sizes['total-exclude-profiler'] = ( 524 sizes['total-exclude-profiler'] = (
514 self.counters['total_committed'] - sizes['mmap-profiler']) 525 self.counters['total_committed'] - sizes['mmap-profiler'])
515 if 'hour' in sizes: 526 if 'hour' in sizes:
516 sizes['hour'] = (self.log_time - first_log_time) / 60.0 / 60.0 527 sizes['hour'] = (self.dump_time - first_dump_time) / 60.0 / 60.0
517 if 'minute' in sizes: 528 if 'minute' in sizes:
518 sizes['minute'] = (self.log_time - first_log_time) / 60.0 529 sizes['minute'] = (self.dump_time - first_dump_time) / 60.0
519 if 'second' in sizes: 530 if 'second' in sizes:
520 sizes['second'] = self.log_time - first_log_time 531 sizes['second'] = self.dump_time - first_dump_time
521 532
522 return sizes 533 return sizes
523 534
524 @staticmethod 535 @staticmethod
525 def accumulate_size_for_expand(stacktrace_lines, policy_list, buckets, 536 def accumulate_size_for_expand(stacktrace_lines, rule_list, buckets,
526 component_name, depth, sizes): 537 component_name, depth, sizes, symbols):
527 for line in stacktrace_lines: 538 for line in stacktrace_lines:
528 words = line.split() 539 words = line.split()
529 bucket = buckets.get(int(words[BUCKET_ID])) 540 bucket = buckets.get(int(words[BUCKET_ID]))
530 component_match = get_component(policy_list, bucket) 541 component_match = get_component(rule_list, bucket, symbols)
531 if component_match == component_name: 542 if component_match == component_name:
532 stacktrace_sequence = '' 543 stacktrace_sequence = ''
533 for address in bucket.stacktrace[0 : min(len(bucket.stacktrace), 544 for address in bucket.stacktrace[0 : min(len(bucket.stacktrace),
534 1 + depth)]: 545 1 + depth)]:
535 stacktrace_sequence += address_symbol_dict[address] + ' ' 546 stacktrace_sequence += symbols[address] + ' '
536 if not stacktrace_sequence in sizes: 547 if not stacktrace_sequence in sizes:
537 sizes[stacktrace_sequence] = 0 548 sizes[stacktrace_sequence] = 0
538 sizes[stacktrace_sequence] += int(words[COMMITTED]) 549 sizes[stacktrace_sequence] += int(words[COMMITTED])
539 550
540 def expand(self, policy_list, buckets, component_name, depth): 551 def expand(self, rule_list, buckets, component_name, depth, symbols):
541 """Prints all stacktraces in a given component of given depth. 552 """Prints all stacktraces in a given component of given depth.
542 553
543 Args: 554 Args:
544 policy_list: A list containing Policy objects. (Parsed policy data by 555 rule_list: A list of Rule objects.
545 parse_policy.) 556 buckets: A dict mapping bucket ids to Bucket objects.
546 buckets: A dict mapping bucket ids and their corresponding Bucket
547 objects.
548 component_name: A name of component for filtering. 557 component_name: A name of component for filtering.
549 depth: An integer representing depth to be printed. 558 depth: An integer representing depth to be printed.
559 symbols: A dict mapping runtime addresses to symbol names.
550 """ 560 """
551 sizes = {} 561 sizes = {}
552 562
553 self.accumulate_size_for_expand( 563 self.accumulate_size_for_expand(
554 self.stacktrace_lines, policy_list, buckets, component_name, 564 self.stacktrace_lines, rule_list, buckets, component_name,
555 depth, sizes) 565 depth, sizes, symbols)
556 566
557 sorted_sizes_list = sorted( 567 sorted_sizes_list = sorted(
558 sizes.iteritems(), key=(lambda x: x[1]), reverse=True) 568 sizes.iteritems(), key=(lambda x: x[1]), reverse=True)
559 total = 0 569 total = 0
560 for size_pair in sorted_sizes_list: 570 for size_pair in sorted_sizes_list:
561 sys.stdout.write('%10d %s\n' % (size_pair[1], size_pair[0])) 571 sys.stdout.write('%10d %s\n' % (size_pair[1], size_pair[0]))
562 total += size_pair[1] 572 total += size_pair[1]
563 sys.stderr.write('total: %d\n' % (total)) 573 sys.stderr.write('total: %d\n' % (total))
564 574
565 575
566 def update_symbols(symbol_path, mapping_lines, maps_path): 576 def update_symbols(
577 symbol_path, maps_path, appeared_addresses, symbols):
567 """Updates address/symbol mapping on memory and in a .symbol cache file. 578 """Updates address/symbol mapping on memory and in a .symbol cache file.
568 579
569 It reads cached address/symbol mapping from a .symbol file if it exists. 580 It reads cached address/symbol mapping from a .symbol file if it exists.
570 Then, it resolves unresolved addresses from a Chrome binary with pprof. 581 Then, it resolves unresolved addresses from a Chrome binary with pprof.
571 Both mappings on memory and in a .symbol cache file are updated. 582 Both mappings on memory and in a .symbol cache file are updated.
572 583
573 Symbol files are formatted as follows: 584 Symbol files are formatted as follows:
574 <Address> <Symbol> 585 <Address> <Symbol>
575 <Address> <Symbol> 586 <Address> <Symbol>
576 <Address> <Symbol> 587 <Address> <Symbol>
577 ... 588 ...
578 589
579 Args: 590 Args:
580 symbol_path: A string representing a path for a .symbol file. 591 symbol_path: A string representing a path for a .symbol file.
581 mapping_lines: A list of strings containing /proc/.../maps.
582 maps_path: A string of the path of /proc/.../maps. 592 maps_path: A string of the path of /proc/.../maps.
593 appeared_addresses: A list of known addresses.
594 symbols: A dict mapping runtime addresses to symbol names.
583 """ 595 """
584 with open(symbol_path, mode='a+') as symbol_f: 596 with open(symbol_path, mode='a+') as symbol_f:
585 symbol_lines = symbol_f.readlines() 597 symbol_lines = symbol_f.readlines()
586 if symbol_lines: 598 if symbol_lines:
587 for line in symbol_lines: 599 for line in symbol_lines:
588 items = line.split(None, 1) 600 items = line.split(None, 1)
589 address_symbol_dict[items[0]] = items[1].rstrip() 601 if len(items) == 1:
602 items.append('??')
603 symbols[items[0]] = items[1].rstrip()
604 if symbols:
605 sys.stderr.write(' Found %d symbols in cache.\n' % len(symbols))
606 else:
607 sys.stderr.write(' No symbols found in cache.\n')
590 608
591 unresolved_addresses = sorted( 609 unresolved_addresses = sorted(
592 a for a in appeared_addresses if a not in address_symbol_dict) 610 a for a in appeared_addresses if a not in symbols)
593 611
594 if unresolved_addresses: 612 if not unresolved_addresses:
613 sys.stderr.write(' No need to resolve any more addresses.\n')
614 else:
615 sys.stderr.write(' %d addresses are unresolved.\n' %
616 len(unresolved_addresses))
595 prepared_data_dir = tempfile.mkdtemp() 617 prepared_data_dir = tempfile.mkdtemp()
596 prepare_symbol_info(maps_path, prepared_data_dir) 618 prepare_symbol_info(maps_path, prepared_data_dir)
597 619
598 symbols = find_runtime_symbols_list( 620 symbol_list = find_runtime_symbols_list(
599 prepared_data_dir, unresolved_addresses) 621 prepared_data_dir, unresolved_addresses)
600 622
601 for address, symbol in zip(unresolved_addresses, symbols): 623 for address, symbol in zip(unresolved_addresses, symbol_list):
624 if not symbol:
625 symbol = '??'
602 stripped_symbol = symbol.strip() 626 stripped_symbol = symbol.strip()
603 address_symbol_dict[address] = stripped_symbol 627 symbols[address] = stripped_symbol
604 symbol_f.write('%s %s\n' % (address, stripped_symbol)) 628 symbol_f.write('%s %s\n' % (address, stripped_symbol))
605 629
606 shutil.rmtree(prepared_data_dir) 630 shutil.rmtree(prepared_data_dir)
607 631
608 632
609 def parse_policy(policy_path): 633 def parse_policy(policy_path):
610 """Parses policy file. 634 """Parses policy file.
611 635
612 A policy file contains component's names and their 636 A policy file contains component's names and their
613 stacktrace pattern written in regular expression. 637 stacktrace pattern written in regular expression.
614 Those patterns are matched against each symbols of 638 Those patterns are matched against each symbols of
615 each stacktraces in the order written in the policy file 639 each stacktraces in the order written in the policy file
616 640
617 Args: 641 Args:
618 policy_path: A path for a policy file. 642 policy_path: A path for a policy file.
619 Returns: 643 Returns:
620 A list containing component's name and its regex object 644 A list containing component's name and its regex object
621 """ 645 """
622 with open(policy_path, mode='r') as policy_f: 646 with open(policy_path, mode='r') as policy_f:
623 policy_lines = policy_f.readlines() 647 policy_lines = policy_f.readlines()
624 648
625 policy_version = POLICY_DEEP_1 649 policy_version = POLICY_DEEP_1
626 if policy_lines[0].startswith('heap profile policy: '): 650 if policy_lines[0].startswith('heap profile policy: '):
627 policy_version = policy_lines[0][21:].strip() 651 policy_version = policy_lines[0][21:].strip()
628 policy_lines.pop(0) 652 policy_lines.pop(0)
629 policy_list = [] 653 rule_list = []
654 components = []
630 655
631 if policy_version == POLICY_DEEP_2 or policy_version == POLICY_DEEP_1: 656 if policy_version == POLICY_DEEP_2 or policy_version == POLICY_DEEP_1:
632 sys.stderr.write(' heap profile policy version: %s\n' % policy_version)
633 for line in policy_lines: 657 for line in policy_lines:
634 if line[0] == '#': 658 if line[0] == '#':
635 continue 659 continue
636 660
637 if policy_version == POLICY_DEEP_2: 661 if policy_version == POLICY_DEEP_2:
638 (name, allocation_type, pattern) = line.strip().split(None, 2) 662 (name, allocation_type, pattern) = line.strip().split(None, 2)
639 mmap = False 663 mmap = False
640 if allocation_type == 'mmap': 664 if allocation_type == 'mmap':
641 mmap = True 665 mmap = True
642 elif policy_version == POLICY_DEEP_1: 666 elif policy_version == POLICY_DEEP_1:
643 name = line.split()[0] 667 name = line.split()[0]
644 pattern = line[len(name) : len(line)].strip() 668 pattern = line[len(name) : len(line)].strip()
645 mmap = False 669 mmap = False
646 670
647 if pattern != 'default': 671 if pattern != 'default':
648 policy_list.append(Policy(name, mmap, pattern)) 672 rule_list.append(Rule(name, mmap, pattern))
649 if components.count(name) == 0: 673 if components.count(name) == 0:
650 components.append(name) 674 components.append(name)
651 675
652 else: 676 else:
653 sys.stderr.write(' invalid heap profile policy version: %s\n' % ( 677 sys.stderr.write(' invalid heap profile policy version: %s\n' % (
654 policy_version)) 678 policy_version))
655 679
656 return policy_list 680 return rule_list, policy_version, components
657 681
658 682
659 def main(): 683 def find_prefix(path):
660 if (len(sys.argv) < 4) or (not (sys.argv[1] in ['--csv', 684 return re.sub('\.[0-9][0-9][0-9][0-9]\.heap', '', path)
661 '--json',
662 '--expand',
663 '--list',
664 '--stacktrace',
665 '--pprof'])):
666 sys.stderr.write("""Usage:
667 %s [options] <chrome-binary> <policy> <profile> [component-name] [depth]
668 685
669 Options:
670 --csv Output result in csv format
671 --json Output result in json format
672 --stacktrace Convert raw address to symbol names
673 --list Lists components and their sizes
674 --expand Show all stacktraces in the specified component
675 of given depth with their sizes
676 --pprof Format the profile file so it can be processed
677 by pprof
678 686
679 Examples: 687 def load_buckets(prefix):
680 dmprof --csv Debug/chrome dmpolicy hprof.12345.0001.heap > result.csv
681 dmprof --json Debug/chrome dmpolicy hprof.12345.0001.heap > result.json
682 dmprof --list Debug/chrome dmpolicy hprof.12345.0012.heap
683 dmprof --expand Debug/chrome dmpolicy hprof.12345.0012.heap tc-webkit 4
684 dmprof --pprof Debug/chrome dmpolicy hprof.12345.0012.heap > for_pprof.txt
685 """ % (sys.argv[0]))
686 sys.exit(1)
687
688 action = sys.argv[1]
689 chrome_path = sys.argv[2]
690 policy_path = sys.argv[3]
691 log_path = sys.argv[4]
692
693 sys.stderr.write('parsing a policy file\n')
694 policy_list = parse_policy(policy_path)
695
696 p = re.compile('\.[0-9][0-9][0-9][0-9]\.heap')
697 prefix = p.sub('', log_path)
698 symbol_path = prefix + '.symbols'
699
700 sys.stderr.write('parsing the maps file\n')
701 maps_path = prefix + '.maps'
702 with open(maps_path, 'r') as maps_f:
703 maps_lines = maps_f.readlines()
704
705 # Reading buckets 688 # Reading buckets
706 sys.stderr.write('parsing the bucket file\n') 689 sys.stderr.write('Loading bucket files.\n')
707 buckets = {} 690 buckets = {}
708 bucket_count = 0 691 bucket_count = 0
709 n = 0 692 n = 0
710 while True: 693 while True:
711 buckets_path = '%s.%04d.buckets' % (prefix, n) 694 buckets_path = '%s.%04d.buckets' % (prefix, n)
712 if not os.path.exists(buckets_path): 695 if not os.path.exists(buckets_path):
713 if n > 10: 696 if n > 10:
714 break 697 break
715 n += 1 698 n += 1
716 continue 699 continue
717 sys.stderr.write('reading buckets from %s\n' % (buckets_path)) 700 sys.stderr.write(' %s\n' % buckets_path)
718 with open(buckets_path, 'r') as buckets_f: 701 with open(buckets_path, 'r') as buckets_f:
719 for line in buckets_f: 702 for line in buckets_f:
720 words = line.split() 703 words = line.split()
721 buckets[int(words[0])] = Bucket(words[2:], words[1] == 'mmap') 704 buckets[int(words[0])] = Bucket(words[2:], words[1] == 'mmap')
722 n += 1 705 n += 1
723 706
724 log_path_list = [log_path] 707 return buckets
725 708
726 if action in ('--csv', '--json'): 709
727 # search for the sequence of files 710 def determine_dump_path_list(dump_path, prefix):
728 n = int(log_path[len(log_path) - 9 : len(log_path) - 5]) 711 dump_path_list = [dump_path]
729 n += 1 # skip current file 712
730 while True: 713 # search for the sequence of files
731 p = '%s.%04d.heap' % (prefix, n) 714 n = int(dump_path[len(dump_path) - 9 : len(dump_path) - 5])
732 if os.path.exists(p): 715 n += 1 # skip current file
733 log_path_list.append(p) 716 while True:
734 else: 717 p = '%s.%04d.heap' % (prefix, n)
735 break 718 if os.path.exists(p):
736 n += 1 719 dump_path_list.append(p)
737
738 logs = []
739 for path in log_path_list:
740 new_log = Log(path)
741 sys.stderr.write('Parsing a dump: %s\n' % path)
742 try:
743 new_log.parse_log(buckets)
744 except EmptyDumpException:
745 sys.stderr.write(' WARNING: ignored an empty dump: %s\n' % path)
746 except ParsingException, e:
747 sys.stderr.write(' Error in parsing heap profile dump: %s\n' % e)
748 sys.exit(1)
749 else: 720 else:
750 logs.append(new_log) 721 break
751 722 n += 1
752 sys.stderr.write('getting symbols\n') 723
753 update_symbols(symbol_path, maps_lines, maps_path) 724 return dump_path_list
754 725
755 # TODO(dmikurube): Many modes now. Split them into separete functions. 726
756 if action == '--stacktrace': 727 def load_single_dump(dump_path, buckets, appeared_addresses):
757 logs[0].dump_stacktrace(buckets) 728 new_dump = Dump(dump_path)
758 729 try:
759 elif action == '--csv': 730 new_dump.parse_dump(buckets, appeared_addresses)
760 sys.stdout.write(','.join(components)) 731 except EmptyDumpException:
761 sys.stdout.write('\n') 732 sys.stderr.write('... ignored an empty dump')
762 733 except ParsingException, e:
763 for log in logs: 734 sys.stderr.write('... error in parsing: %s' % e)
764 component_sizes = log.apply_policy(policy_list, buckets, logs[0].log_time) 735 sys.exit(1)
736 else:
737 sys.stderr.write(' (version: %s)' % new_dump.dump_version)
738
739 return new_dump
740
741
742 def load_dump(dump_path, buckets):
743 sys.stderr.write('Loading a heap dump file: "%s"' % dump_path)
744 appeared_addresses = set()
745 dump = load_single_dump(dump_path, buckets, appeared_addresses)
746 sys.stderr.write('.\n')
747 return dump, appeared_addresses
748
749
750 def load_dumps(dump_path_list, buckets):
751 sys.stderr.write('Loading heap dump files.\n')
752 appeared_addresses = set()
753 dumps = []
754 for path in dump_path_list:
755 sys.stderr.write(' %s' % path)
756 dumps.append(load_single_dump(path, buckets, appeared_addresses))
757 sys.stderr.write('\n')
758 return dumps, appeared_addresses
759
760
761 def load_and_update_symbol_cache(prefix, appeared_addresses):
762 maps_path = prefix + '.maps'
763 symbol_path = prefix + '.symbols'
764 sys.stderr.write('Loading and updating symbol cache: "%s".\n' % symbol_path)
765 symbols = {}
766 update_symbols(symbol_path, maps_path, appeared_addresses, symbols)
767 return symbols
768
769
770 def load_default_policies():
771 with open(POLICIES_JSON_PATH, mode='r') as policies_f:
772 default_policies = json.load(policies_f)
773 return default_policies
774
775
776 def load_policy(policies_dict, policy_label):
777 policy_file = policies_dict[policy_label]['file']
778 policy_path = os.path.join(os.path.dirname(__file__), policy_file)
779 rule_list, policy_version, components = parse_policy(policy_path)
780 sys.stderr.write(' %s: %s (version: %s)\n' %
781 (policy_label, policy_path, policy_version))
782 return Policy(rule_list, policy_version, components)
783
784
785 def load_policies_dict(policies_dict):
786 sys.stderr.write('Loading policy files.\n')
787 policies = {}
788 for policy_label in policies_dict:
789 policies[policy_label] = load_policy(policies_dict, policy_label)
790 return policies
791
792
793 def load_policies(options_policy):
794 default_policies = load_default_policies()
795 if options_policy:
796 policy_labels = options_policy.split(',')
797 specified_policies = {}
798 for specified_policy in policy_labels:
799 if specified_policy in default_policies:
800 specified_policies[specified_policy] = (
801 default_policies[specified_policy])
802 policies = load_policies_dict(specified_policies)
803 else:
804 policies = load_policies_dict(default_policies)
805 return policies
806
807
808 def do_stacktrace(sys_argv):
809 parser = OptionParser(usage='Usage: %prog stacktrace <dump>')
810 options, args = parser.parse_args(sys_argv)
811
812 if len(args) < 2:
813 parser.error('needs 1 argument.')
814
815 dump_path = args[1]
816
817 prefix = find_prefix(dump_path)
818 buckets = load_buckets(prefix)
819 dump, appeared_addresses = load_dump(dump_path, buckets)
820 symbols = load_and_update_symbol_cache(prefix, appeared_addresses)
821
822 dump.print_stacktrace(buckets, symbols)
823
824 return 0
825
826
827 def do_csv(sys_argv):
828 parser = OptionParser('Usage: %prog csv [-p POLICY] <first-dump>')
829 parser.add_option('-p', '--policy', type='string', dest='policy',
830 help='profile with POLICY', metavar='POLICY')
831 options, args = parser.parse_args(sys_argv)
832
833 if len(args) < 2:
M-A Ruel 2012/07/24 14:10:53 what with 10 args?
Dai Mikurube (NOT FULLTIME) 2012/07/24 14:53:45 It just ignores extra args. Should it warn or abo
M-A Ruel 2012/07/24 14:57:59 Please abort. Unless it is necessary to ignore dur
Dai Mikurube (NOT FULLTIME) 2012/07/24 16:19:24 Done.
834 parser.error('needs 1 argument.')
835
836 dump_path = args[1]
837
838 prefix = find_prefix(dump_path)
839 buckets = load_buckets(prefix)
840 dumps, appeared_addresses = load_dumps(
841 determine_dump_path_list(dump_path, prefix), buckets)
842 symbols = load_and_update_symbol_cache(prefix, appeared_addresses)
843 policies = load_policies(options.policy)
844
845 max_components = 0
846 for policy in policies:
847 max_components = max(max_components, len(policies[policy].components))
848
849 for policy in sorted(policies):
850 rule_list = policies[policy].rules
851 components = policies[policy].components
852
853 if len(policies) > 1:
854 sys.stdout.write('%s%s\n' % (policy, ',' * (max_components - 1)))
855 sys.stdout.write('%s%s\n' % (
856 ','.join(components), ',' * (max_components - len(components))))
857
858 for dump in dumps:
859 component_sizes = dump.apply_policy(
860 rule_list, buckets, dumps[0].dump_time, components, symbols)
765 s = [] 861 s = []
766 for c in components: 862 for c in components:
767 if c in ('hour', 'minute', 'second'): 863 if c in ('hour', 'minute', 'second'):
768 s.append('%05.5f' % (component_sizes[c])) 864 s.append('%05.5f' % (component_sizes[c]))
769 else: 865 else:
770 s.append('%05.5f' % (component_sizes[c] / 1024.0 / 1024.0)) 866 s.append('%05.5f' % (component_sizes[c] / 1024.0 / 1024.0))
771 sys.stdout.write(','.join(s)) 867 sys.stdout.write('%s%s\n' % (
772 sys.stdout.write('\n') 868 ','.join(s), ',' * (max_components - len(components))))
773 869
774 elif action == '--json': 870 for bucket in buckets.itervalues():
775 json_base = { 871 bucket.clear_component_cache()
776 'version': 'JSON_DEEP_1', 872
873 return 0
874
875
876 def do_json(sys_argv):
877 parser = OptionParser('Usage: %prog json [-p POLICY] <first-dump>')
878 parser.add_option('-p', '--policy', type='string', dest='policy',
879 help='profile with POLICY', metavar='POLICY')
880 options, args = parser.parse_args(sys_argv)
881
882 if len(args) < 2:
883 parser.error('needs 1 argument.')
884
885 dump_path = args[1]
886
887 prefix = find_prefix(dump_path)
888 buckets = load_buckets(prefix)
889 dumps, appeared_addresses = load_dumps(
890 determine_dump_path_list(dump_path, prefix), buckets)
891 symbols = load_and_update_symbol_cache(prefix, appeared_addresses)
892 policies = load_policies(options.policy)
893
894 json_base = {
895 'version': 'JSON_DEEP_2',
896 'policies': {},
897 }
898
899 for policy in sorted(policies):
900 rule_list = policies[policy].rules
901 components = policies[policy].components
902
903 json_base['policies'][policy] = {
777 'legends': components, 904 'legends': components,
778 'snapshots': [], 905 'snapshots': [],
779 } 906 }
780 for log in logs: 907
781 component_sizes = log.apply_policy(policy_list, buckets, logs[0].log_time) 908 for dump in dumps:
782 component_sizes['log_path'] = log.log_path 909 component_sizes = dump.apply_policy(
783 component_sizes['log_time'] = datetime.fromtimestamp( 910 rule_list, buckets, dumps[0].dump_time, components, symbols)
784 log.log_time).strftime('%Y-%m-%d %H:%M:%S') 911 component_sizes['dump_path'] = dump.dump_path
785 json_base['snapshots'].append(component_sizes) 912 component_sizes['dump_time'] = datetime.fromtimestamp(
786 json.dump(json_base, sys.stdout, indent=2, sort_keys=True) 913 dump.dump_time).strftime('%Y-%m-%d %H:%M:%S')
787 914 json_base['policies'][policy]['snapshots'].append(component_sizes)
788 elif action == '--list': 915
789 component_sizes = logs[0].apply_policy( 916 for bucket in buckets.itervalues():
790 policy_list, buckets, logs[0].log_time) 917 bucket.clear_component_cache()
918
919 json.dump(json_base, sys.stdout, indent=2, sort_keys=True)
920
921 return 0
922
923
924 def do_list(sys_argv):
925 parser = OptionParser('Usage: %prog [-p POLICY] list <first-dump>')
926 parser.add_option('-p', '--policy', type='string', dest='policy',
927 help='profile with POLICY', metavar='POLICY')
928 options, args = parser.parse_args(sys_argv)
929
930 if len(args) < 2:
931 parser.error('needs 1 argument.')
932
933 dump_path = args[1]
934
935 prefix = find_prefix(dump_path)
936 buckets = load_buckets(prefix)
937 dumps, appeared_addresses = load_dumps(
938 determine_dump_path_list(dump_path, prefix), buckets)
939 symbols = load_and_update_symbol_cache(prefix, appeared_addresses)
940 policies = load_policies(options.policy)
941
942 for policy in sorted(policies):
943 rule_list = policies[policy].rules
944 components = policies[policy].components
945
946 component_sizes = dumps[0].apply_policy(
947 rule_list, buckets, dumps[0].dump_time, components, symbols)
948 sys.stdout.write('%s:\n' % policy)
791 for c in components: 949 for c in components:
792 if c in ['hour', 'minute', 'second']: 950 if c in ['hour', 'minute', 'second']:
793 sys.stdout.write('%30s %10.3f\n' % (c, component_sizes[c])) 951 sys.stdout.write('%30s %10.3f\n' % (c, component_sizes[c]))
794 else: 952 else:
795 sys.stdout.write('%30s %10.3f\n' % ( 953 sys.stdout.write('%30s %10.3f\n' % (
796 c, component_sizes[c] / 1024.0 / 1024.0)) 954 c, component_sizes[c] / 1024.0 / 1024.0))
797 955
798 elif action == '--expand': 956 for bucket in buckets.itervalues():
799 component_name = sys.argv[5] 957 bucket.clear_component_cache()
800 depth = sys.argv[6]
801 logs[0].expand(policy_list, buckets, component_name, int(depth))
802 958
803 elif action == '--pprof': 959 return 0
804 if len(sys.argv) > 5: 960
805 logs[0].dump_for_pprof(policy_list, buckets, maps_lines, sys.argv[5]) 961
806 else: 962 def do_expand(sys_argv):
807 logs[0].dump_for_pprof(policy_list, buckets, maps_lines, None) 963 parser = OptionParser(
964 'Usage: %prog expand <dump> <policy> <component> <depth>')
965 options, args = parser.parse_args(sys_argv)
966
967 if len(args) < 5:
968 parser.error('needs 4 arguments.')
969
970 dump_path = args[1]
971 target_policy = args[2]
972 component_name = args[3]
973 depth = args[4]
974
975 prefix = find_prefix(dump_path)
976 buckets = load_buckets(prefix)
977 dump, appeared_addresses = load_dump(dump_path, buckets)
978 symbols = load_and_update_symbol_cache(prefix, appeared_addresses)
979 policies = load_policies(target_policy)
980
981 rule_list = policies[target_policy].rules
982
983 dump.expand(rule_list, buckets, component_name, int(depth), symbols)
984
985 return 0
986
987
988 def do_pprof(sys_argv):
989 parser = OptionParser(
990 'Usage: %prog pprof [-c COMPONENT] <dump> <policy>')
991 parser.add_option('-c', '--component', type='string', dest='component',
992 help='restrict to COMPONENT', metavar='COMPONENT')
993 options, args = parser.parse_args(sys_argv)
994
995 if len(args) < 3:
996 parser.error('needs 2 arguments.')
997
998 dump_path = args[1]
999 target_policy = args[2]
1000 component = options.component
1001
1002 prefix = find_prefix(dump_path)
1003 buckets = load_buckets(prefix)
1004 dump, appeared_addresses = load_dump(dump_path, buckets)
1005 symbols = load_and_update_symbol_cache(prefix, appeared_addresses)
1006 policies = load_policies(target_policy)
1007
1008 rule_list = policies[target_policy].rules
1009
1010 with open(prefix + '.maps', 'r') as maps_f:
1011 maps_lines = maps_f.readlines()
1012 dump.print_for_pprof(rule_list, buckets, maps_lines, component, symbols)
1013
1014 return 0
1015
1016
1017 def main():
1018 COMMANDS = {
1019 'csv': do_csv,
1020 'expand': do_expand,
1021 'json': do_json,
1022 'list': do_list,
1023 'pprof': do_pprof,
1024 'stacktrace': do_stacktrace,
1025 }
1026
1027 # TODO(dmikurube): Remove this message after a while.
1028 if len(sys.argv) >= 2 and sys.argv[1].startswith('--'):
1029 sys.stderr.write("""
1030 **************** NOTICE!! ****************
1031 The command line format has changed.
1032 Please look at the description below.
1033 ******************************************
1034
1035 """)
1036
1037 if len(sys.argv) < 2 or (not sys.argv[1] in COMMANDS):
1038 sys.stderr.write("""Usage: %s <command> [options] [<args>]
1039
1040 Commands:
1041 csv Classify memory usage in CSV
1042 expand Show all stacktraces contained in the specified component
1043 json Classify memory usage in JSON
1044 list Classify memory usage in simple listing format
1045 pprof Format the profile dump so that it can be processed by pprof
1046 stacktrace Convert runtime addresses to symbol names
1047
1048 Quick Reference:
1049 dmprof csv [-p POLICY] <first-dump>
1050 dmprof expand <dump> <policy> <component> <depth>
1051 dmprof json [-p POLICY] <first-dump>
1052 dmprof list [-p POLICY] <first-dump>
1053 dmprof pprof [-c COMPONENT] <dump> <policy>
1054 dmprof stacktrace <dump>
1055 """ % (sys.argv[0]))
1056 sys.exit(1)
1057 action = sys.argv.pop(1)
1058
1059 return COMMANDS[action](sys.argv)
808 1060
809 1061
810 if __name__ == '__main__': 1062 if __name__ == '__main__':
811 sys.exit(main()) 1063 sys.exit(main())
OLDNEW
« no previous file with comments | « no previous file | tools/deep_memory_profiler/policies.json » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698