Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(82)

Side by Side Diff: tools/deep_memory_profiler/dmprof

Issue 10802049: Change dmprof commandline format, and clean up start-up routines. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: rebased Created 8 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | tools/deep_memory_profiler/policies.json » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be 3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file. 4 # found in the LICENSE file.
5 5
6 """The deep heap profiler script for Chrome.""" 6 """The deep heap profiler script for Chrome."""
7 7
8 from datetime import datetime 8 from datetime import datetime
9 import json 9 import json
10 import os 10 import os
11 import re 11 import re
12 from optparse import OptionParser
12 import shutil 13 import shutil
13 import subprocess 14 import subprocess
14 import sys 15 import sys
15 import tempfile 16 import tempfile
16 17
17 FIND_RUNTIME_SYMBOLS_PATH = os.path.join( 18 FIND_RUNTIME_SYMBOLS_PATH = os.path.join(
18 os.path.dirname(os.path.abspath(__file__)), 19 os.path.dirname(os.path.abspath(__file__)),
19 os.pardir, 20 os.pardir,
20 'find_runtime_symbols') 21 'find_runtime_symbols')
21 sys.path.append(FIND_RUNTIME_SYMBOLS_PATH) 22 sys.path.append(FIND_RUNTIME_SYMBOLS_PATH)
22 23
23 from prepare_symbol_info import prepare_symbol_info 24 from prepare_symbol_info import prepare_symbol_info
24 from find_runtime_symbols import find_runtime_symbols_list 25 from find_runtime_symbols import find_runtime_symbols_list
25 26
26 BUCKET_ID = 5 27 BUCKET_ID = 5
27 VIRTUAL = 0 28 VIRTUAL = 0
28 COMMITTED = 1 29 COMMITTED = 1
29 ALLOC_COUNT = 2 30 ALLOC_COUNT = 2
30 FREE_COUNT = 3 31 FREE_COUNT = 3
31 NULL_REGEX = re.compile('') 32 NULL_REGEX = re.compile('')
32 33
34 POLICIES_JSON_PATH = os.path.join(os.path.dirname(__file__), 'policies.json')
M-A Ruel 2012/07/24 14:57:59 os.path.dirname(os.path.abspath(__file__)) for the
Dai Mikurube (NOT FULLTIME) 2012/07/24 16:19:24 Ah, I forgot it. Done.
35
33 # Heap Profile Dump versions 36 # Heap Profile Dump versions
34 37
35 # DUMP_DEEP_1 is OBSOLETE. 38 # DUMP_DEEP_1 is OBSOLETE.
36 # DUMP_DEEP_1 DOES NOT distinct mmap regions and malloc chunks. 39 # DUMP_DEEP_1 DOES NOT distinct mmap regions and malloc chunks.
37 # Their stacktraces DO contain mmap* or tc-* at their tops. 40 # Their stacktraces DO contain mmap* or tc-* at their tops.
38 # They should be processed by POLICY_DEEP_1. 41 # They should be processed by POLICY_DEEP_1.
39 DUMP_DEEP_1 = 'DUMP_DEEP_1' 42 DUMP_DEEP_1 = 'DUMP_DEEP_1'
40 43
41 # DUMP_DEEP_2 is OBSOLETE. 44 # DUMP_DEEP_2 is OBSOLETE.
42 # DUMP_DEEP_2 DOES distinct mmap regions and malloc chunks. 45 # DUMP_DEEP_2 DOES distinct mmap regions and malloc chunks.
(...skipping 22 matching lines...) Expand all
65 # Heap Profile Policy versions 68 # Heap Profile Policy versions
66 69
67 # POLICY_DEEP_1 DOES NOT include allocation_type columns. 70 # POLICY_DEEP_1 DOES NOT include allocation_type columns.
68 # mmap regions are distincted w/ mmap frames in the pattern column. 71 # mmap regions are distincted w/ mmap frames in the pattern column.
69 POLICY_DEEP_1 = 'POLICY_DEEP_1' 72 POLICY_DEEP_1 = 'POLICY_DEEP_1'
70 73
71 # POLICY_DEEP_2 DOES include allocation_type columns. 74 # POLICY_DEEP_2 DOES include allocation_type columns.
72 # mmap regions are distincted w/ the allocation_type column. 75 # mmap regions are distincted w/ the allocation_type column.
73 POLICY_DEEP_2 = 'POLICY_DEEP_2' 76 POLICY_DEEP_2 = 'POLICY_DEEP_2'
74 77
75 # TODO(dmikurube): Avoid global variables.
76 address_symbol_dict = {}
77 appeared_addresses = set()
78 components = []
79
80 78
81 class EmptyDumpException(Exception): 79 class EmptyDumpException(Exception):
82 def __init__(self, value): 80 def __init__(self, value):
83 self.value = value 81 self.value = value
84 def __str__(self): 82 def __str__(self):
85 return repr(self.value) 83 return repr(self.value)
86 84
87 85
88 class ParsingException(Exception): 86 class ParsingException(Exception):
89 def __init__(self, value): 87 def __init__(self, value):
90 self.value = value 88 self.value = value
91 def __str__(self): 89 def __str__(self):
92 return repr(self.value) 90 return repr(self.value)
93 91
94 92
95 class InvalidDumpException(ParsingException): 93 class InvalidDumpException(ParsingException):
96 def __init__(self, value): 94 def __init__(self, value):
97 self.value = value 95 self.value = value
98 def __str__(self): 96 def __str__(self):
99 return "invalid heap profile dump: %s" % repr(self.value) 97 return "invalid heap profile dump: %s" % repr(self.value)
100 98
101 99
102 class ObsoleteDumpVersionException(ParsingException): 100 class ObsoleteDumpVersionException(ParsingException):
103 def __init__(self, value): 101 def __init__(self, value):
104 self.value = value 102 self.value = value
105 def __str__(self): 103 def __str__(self):
106 return "obsolete heap profile dump version: %s" % repr(self.value) 104 return "obsolete heap profile dump version: %s" % repr(self.value)
107 105
108 106
109 class Policy(object): 107 class Rule(object):
108 """Represents one matching rule in a policy file."""
110 109
111 def __init__(self, name, mmap, pattern): 110 def __init__(self, name, mmap, pattern):
112 self.name = name 111 self.name = name
113 self.mmap = mmap 112 self.mmap = mmap
114 self.condition = re.compile(pattern + r'\Z') 113 self.condition = re.compile(pattern + r'\Z')
115 114
116 115
117 def get_component(policy_list, bucket): 116 class Policy(object):
117 """Represents a policy, a content of a policy file."""
118
119 def __init__(self, rules, version, components):
120 self.rules = rules
121 self.version = version
122 self.components = components
123
124 def append_rule(self, rule):
125 self.rules.append(rule)
126
127
128 def get_component(rule_list, bucket, symbols):
118 """Returns a component name which a given bucket belongs to. 129 """Returns a component name which a given bucket belongs to.
119 130
120 Args: 131 Args:
121 policy_list: A list containing Policy objects. (Parsed policy data by 132 rule_list: A list of Rule objects.
122 parse_policy.)
123 bucket: A Bucket object to be searched for. 133 bucket: A Bucket object to be searched for.
134 symbols: A dict mapping runtime addresses to symbol names.
124 135
125 Returns: 136 Returns:
126 A string representing a component name. 137 A string representing a component name.
127 """ 138 """
128 if not bucket: 139 if not bucket:
129 return 'no-bucket' 140 return 'no-bucket'
130 if bucket.component: 141 if bucket.component_cache:
131 return bucket.component 142 return bucket.component_cache
132 143
133 stacktrace = ''.join( 144 stacktrace = ''.join(symbols[a] + ' ' for a in bucket.stacktrace).strip()
134 address_symbol_dict[a] + ' ' for a in bucket.stacktrace).strip()
135 145
136 for policy in policy_list: 146 for rule in rule_list:
137 if bucket.mmap == policy.mmap and policy.condition.match(stacktrace): 147 if bucket.mmap == rule.mmap and rule.condition.match(stacktrace):
138 bucket.component = policy.name 148 bucket.component_cache = rule.name
139 return policy.name 149 return rule.name
140 150
141 assert False 151 assert False
142 152
143 153
144 class Bucket(object): 154 class Bucket(object):
155 """Represents a bucket, which is a unit of memory classification."""
145 156
146 def __init__(self, stacktrace, mmap): 157 def __init__(self, stacktrace, mmap):
147 self.stacktrace = stacktrace 158 self.stacktrace = stacktrace
148 self.mmap = mmap 159 self.mmap = mmap
149 self.component = '' 160 self.component_cache = ''
161
162 def clear_component_cache(self):
163 self.component_cache = ''
150 164
151 165
152 class Log(object): 166 class Dump(object):
167 """Represents one heap profile dump."""
153 168
154 """A class representing one dumped log data.""" 169 def __init__(self, dump_path):
155 def __init__(self, log_path): 170 self.dump_path = dump_path
156 self.log_path = log_path 171 self.dump_lines = [
157 self.log_lines = [ 172 l for l in open(self.dump_path, 'r') if l and not l.startswith('#')]
158 l for l in open(self.log_path, 'r') if l and not l.startswith('#')] 173 self.dump_version = ''
159 self.log_version = ''
160 sys.stderr.write('Loading a dump: %s\n' % log_path)
161 self.stacktrace_lines = [] 174 self.stacktrace_lines = []
162 self.counters = {} 175 self.counters = {}
163 self.log_time = os.stat(self.log_path).st_mtime 176 self.dump_time = os.stat(self.dump_path).st_mtime
164 177
165 def dump_stacktrace(buckets): 178 def print_stacktrace(self, buckets, symbols):
166 """Prints a given stacktrace. 179 """Prints a given stacktrace.
167 180
168 Args: 181 Args:
169 buckets: A dict mapping bucket ids and their corresponding Bucket 182 buckets: A dict mapping bucket ids to Bucket objects.
170 objects. 183 symbols: A dict mapping runtime addresses to symbol names.
171 """ 184 """
172 for line in self.stacktrace_lines: 185 for line in self.stacktrace_lines:
173 words = line.split() 186 words = line.split()
174 bucket = buckets.get(int(words[BUCKET_ID])) 187 bucket = buckets.get(int(words[BUCKET_ID]))
175 if not bucket: 188 if not bucket:
176 continue 189 continue
177 for i in range(0, BUCKET_ID - 1): 190 for i in range(0, BUCKET_ID - 1):
178 sys.stdout.write(words[i] + ' ') 191 sys.stdout.write(words[i] + ' ')
179 for address in bucket.stacktrace: 192 for address in bucket.stacktrace:
180 sys.stdout.write((address_symbol_dict.get(address) or address) + ' ') 193 sys.stdout.write((symbols.get(address) or address) + ' ')
181 sys.stdout.write('\n') 194 sys.stdout.write('\n')
182 195
183 @staticmethod 196 @staticmethod
184 def accumulate_size_for_pprof(stacktrace_lines, policy_list, buckets, 197 def accumulate_size_for_pprof(stacktrace_lines, rule_list, buckets,
185 component_name): 198 component_name, symbols):
186 """Accumulates size of committed chunks and the number of allocated chunks. 199 """Accumulates size of committed chunks and the number of allocated chunks.
187 200
188 Args: 201 Args:
189 stacktrace_lines: A list of strings which are valid as stacktraces. 202 stacktrace_lines: A list of strings which are valid as stacktraces.
190 policy_list: A list containing Policy objects. (Parsed policy data by 203 rule_list: A list of Rule objects.
191 parse_policy.) 204 buckets: A dict mapping bucket ids to Bucket objects.
192 buckets: A dict mapping bucket ids and their corresponding Bucket
193 objects.
194 component_name: A name of component for filtering. 205 component_name: A name of component for filtering.
206 symbols: A dict mapping runtime addresses to symbol names.
195 207
196 Returns: 208 Returns:
197 Two integers which are the accumulated size of committed regions and the 209 Two integers which are the accumulated size of committed regions and the
198 number of allocated chunks, respectively. 210 number of allocated chunks, respectively.
199 """ 211 """
200 com_committed = 0 212 com_committed = 0
201 com_allocs = 0 213 com_allocs = 0
202 for line in stacktrace_lines: 214 for line in stacktrace_lines:
203 words = line.split() 215 words = line.split()
204 bucket = buckets.get(int(words[BUCKET_ID])) 216 bucket = buckets.get(int(words[BUCKET_ID]))
205 if (not bucket or 217 if (not bucket or
206 (component_name and 218 (component_name and
207 component_name != get_component(policy_list, bucket))): 219 component_name != get_component(rule_list, bucket, symbols))):
208 continue 220 continue
209 221
210 com_committed += int(words[COMMITTED]) 222 com_committed += int(words[COMMITTED])
211 com_allocs += int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]) 223 com_allocs += int(words[ALLOC_COUNT]) - int(words[FREE_COUNT])
212 224
213 return com_committed, com_allocs 225 return com_committed, com_allocs
214 226
215 @staticmethod 227 @staticmethod
216 def dump_stacktrace_lines_for_pprof(stacktrace_lines, policy_list, 228 def print_stacktrace_lines_for_pprof(stacktrace_lines, rule_list,
217 buckets, component_name): 229 buckets, component_name, symbols):
218 """Prints information of stacktrace lines for pprof. 230 """Prints information of stacktrace lines for pprof.
219 231
220 Args: 232 Args:
221 stacktrace_lines: A list of strings which are valid as stacktraces. 233 stacktrace_lines: A list of strings which are valid as stacktraces.
222 policy_list: A list containing Policy objects. (Parsed policy data by 234 rule_list: A list of Rule objects.
223 parse_policy.) 235 buckets: A dict mapping bucket ids to Bucket objects.
224 buckets: A dict mapping bucket ids and their corresponding Bucket
225 objects.
226 component_name: A name of component for filtering. 236 component_name: A name of component for filtering.
237 symbols: A dict mapping runtime addresses to symbol names.
227 """ 238 """
228 for line in stacktrace_lines: 239 for line in stacktrace_lines:
229 words = line.split() 240 words = line.split()
230 bucket = buckets.get(int(words[BUCKET_ID])) 241 bucket = buckets.get(int(words[BUCKET_ID]))
231 if (not bucket or 242 if (not bucket or
232 (component_name and 243 (component_name and
233 component_name != get_component(policy_list, bucket))): 244 component_name != get_component(rule_list, bucket, symbols))):
234 continue 245 continue
235 246
236 sys.stdout.write('%6d: %8s [%6d: %8s] @' % ( 247 sys.stdout.write('%6d: %8s [%6d: %8s] @' % (
237 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]), 248 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]),
238 words[COMMITTED], 249 words[COMMITTED],
239 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]), 250 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]),
240 words[COMMITTED])) 251 words[COMMITTED]))
241 for address in bucket.stacktrace: 252 for address in bucket.stacktrace:
242 sys.stdout.write(' ' + address) 253 sys.stdout.write(' ' + address)
243 sys.stdout.write('\n') 254 sys.stdout.write('\n')
244 255
245 def dump_for_pprof(self, policy_list, buckets, mapping_lines, component_name): 256 def print_for_pprof(
246 """Converts the log file so it can be processed by pprof. 257 self, rule_list, buckets, maps_lines, component_name, symbols):
258 """Converts the heap profile dump so it can be processed by pprof.
247 259
248 Args: 260 Args:
249 policy_list: A list containing Policy objects. (Parsed policy data by 261 rule_list: A list of Rule objects.
250 parse_policy.) 262 buckets: A dict mapping bucket ids to Bucket objects.
251 buckets: A dict mapping bucket ids and their corresponding Bucket 263 maps_lines: A list of strings containing /proc/.../maps.
252 objects.
253 mapping_lines: A list of strings containing /proc/.../maps.
254 component_name: A name of component for filtering. 264 component_name: A name of component for filtering.
265 symbols: A dict mapping runtime addresses to symbol names.
255 """ 266 """
256 sys.stdout.write('heap profile: ') 267 sys.stdout.write('heap profile: ')
257 com_committed, com_allocs = self.accumulate_size_for_pprof( 268 com_committed, com_allocs = self.accumulate_size_for_pprof(
258 self.stacktrace_lines, policy_list, buckets, component_name) 269 self.stacktrace_lines, rule_list, buckets, component_name, symbols)
259 270
260 sys.stdout.write('%6d: %8s [%6d: %8s] @ heapprofile\n' % ( 271 sys.stdout.write('%6d: %8s [%6d: %8s] @ heapprofile\n' % (
261 com_allocs, com_committed, com_allocs, com_committed)) 272 com_allocs, com_committed, com_allocs, com_committed))
262 273
263 self.dump_stacktrace_lines_for_pprof( 274 self.print_stacktrace_lines_for_pprof(
264 self.stacktrace_lines, policy_list, buckets, component_name) 275 self.stacktrace_lines, rule_list, buckets, component_name, symbols)
265 276
266 sys.stdout.write('MAPPED_LIBRARIES:\n') 277 sys.stdout.write('MAPPED_LIBRARIES:\n')
267 for line in mapping_lines: 278 for line in maps_lines:
268 sys.stdout.write(line) 279 sys.stdout.write(line)
269 280
270 @staticmethod 281 @staticmethod
271 def check_stacktrace_line(stacktrace_line, buckets): 282 def check_stacktrace_line(stacktrace_line, buckets, appeared_addresses):
272 """Checks if a given stacktrace_line is valid as stacktrace. 283 """Checks if a given stacktrace_line is valid as stacktrace.
273 284
274 Args: 285 Args:
275 stacktrace_line: A string to be checked. 286 stacktrace_line: A string to be checked.
276 buckets: A dict mapping bucket ids and their corresponding Bucket 287 buckets: A dict mapping bucket ids to Bucket objects.
277 objects. 288 appeared_addresses: A list where appeared addresses will be stored.
278 289
279 Returns: 290 Returns:
280 True if the given stacktrace_line is valid. 291 True if the given stacktrace_line is valid.
281 """ 292 """
282 words = stacktrace_line.split() 293 words = stacktrace_line.split()
283 if len(words) < BUCKET_ID + 1: 294 if len(words) < BUCKET_ID + 1:
284 return False 295 return False
285 if words[BUCKET_ID - 1] != '@': 296 if words[BUCKET_ID - 1] != '@':
286 return False 297 return False
287 bucket = buckets.get(int(words[BUCKET_ID])) 298 bucket = buckets.get(int(words[BUCKET_ID]))
(...skipping 10 matching lines...) Expand all
298 A pair of an integer indicating a line number after skipped, and a 309 A pair of an integer indicating a line number after skipped, and a
299 boolean value which is True if found a line which skipping_condition 310 boolean value which is True if found a line which skipping_condition
300 is False for. 311 is False for.
301 """ 312 """
302 while skipping_condition(line_number): 313 while skipping_condition(line_number):
303 line_number += 1 314 line_number += 1
304 if line_number >= max_line_number: 315 if line_number >= max_line_number:
305 return line_number, False 316 return line_number, False
306 return line_number, True 317 return line_number, True
307 318
308 def parse_stacktraces_while_valid(self, buckets, log_lines, line_number): 319 def parse_stacktraces_while_valid(
320 self, buckets, dump_lines, line_number, appeared_addresses):
309 """Parses stacktrace lines while the lines are valid. 321 """Parses stacktrace lines while the lines are valid.
310 322
311 Args: 323 Args:
312 buckets: A dict mapping bucket ids and their corresponding Bucket 324 buckets: A dict mapping bucket ids to Bucket objects.
313 objects. 325 dump_lines: A list of lines to be parsed.
314 log_lines: A list of lines to be parsed. 326 line_number: A line number to start parsing in dump_lines.
315 line_number: An integer representing the starting line number in 327 appeared_addresses: A list where appeared addresses will be stored.
316 log_lines.
317 328
318 Returns: 329 Returns:
319 A pair of a list of valid lines and an integer representing the last 330 A pair of a list of valid lines and an integer representing the last
320 line number in log_lines. 331 line number in dump_lines.
321 """ 332 """
322 (line_number, _) = self.skip_lines_while( 333 (line_number, _) = self.skip_lines_while(
323 line_number, len(log_lines), 334 line_number, len(dump_lines),
324 lambda n: not log_lines[n].split()[0].isdigit()) 335 lambda n: not dump_lines[n].split()[0].isdigit())
325 stacktrace_lines_start = line_number 336 stacktrace_lines_start = line_number
326 (line_number, _) = self.skip_lines_while( 337 (line_number, _) = self.skip_lines_while(
327 line_number, len(log_lines), 338 line_number, len(dump_lines),
328 lambda n: self.check_stacktrace_line(log_lines[n], buckets)) 339 lambda n: self.check_stacktrace_line(
329 return (log_lines[stacktrace_lines_start:line_number], line_number) 340 dump_lines[n], buckets, appeared_addresses))
341 return (dump_lines[stacktrace_lines_start:line_number], line_number)
330 342
331 def parse_stacktraces(self, buckets, line_number): 343 def parse_stacktraces(self, buckets, line_number, appeared_addresses):
332 """Parses lines in self.log_lines as stacktrace. 344 """Parses lines in self.dump_lines as stacktrace.
333 345
334 Valid stacktrace lines are stored into self.stacktrace_lines. 346 Valid stacktrace lines are stored into self.stacktrace_lines.
335 347
336 Args: 348 Args:
337 buckets: A dict mapping bucket ids and their corresponding Bucket 349 buckets: A dict mapping bucket ids to Bucket objects.
338 objects. 350 line_number: A line number to start parsing in dump_lines.
339 line_number: An integer representing the starting line number in 351 appeared_addresses: A list where appeared addresses will be stored.
340 log_lines.
341 352
342 Raises: 353 Raises:
343 ParsingException for invalid dump versions. 354 ParsingException for invalid dump versions.
344 """ 355 """
345 sys.stderr.write(' Version: %s\n' % self.log_version) 356 if self.dump_version == DUMP_DEEP_5:
346
347 if self.log_version == DUMP_DEEP_5:
348 (self.stacktrace_lines, line_number) = ( 357 (self.stacktrace_lines, line_number) = (
349 self.parse_stacktraces_while_valid( 358 self.parse_stacktraces_while_valid(
350 buckets, self.log_lines, line_number)) 359 buckets, self.dump_lines, line_number, appeared_addresses))
351 360
352 elif self.log_version in DUMP_DEEP_OBSOLETE: 361 elif self.dump_version in DUMP_DEEP_OBSOLETE:
353 raise ObsoleteDumpVersionException(self.log_version) 362 raise ObsoleteDumpVersionException(self.dump_version)
354 363
355 else: 364 else:
356 raise InvalidDumpException('Invalid version: %s' % self.log_version) 365 raise InvalidDumpException('Invalid version: %s' % self.dump_version)
357 366
358 def parse_global_stats(self): 367 def parse_global_stats(self):
359 """Parses lines in self.log_lines as global stats.""" 368 """Parses lines in self.dump_lines as global stats."""
360 (ln, _) = self.skip_lines_while( 369 (ln, _) = self.skip_lines_while(
361 0, len(self.log_lines), 370 0, len(self.dump_lines),
362 lambda n: self.log_lines[n] != 'GLOBAL_STATS:\n') 371 lambda n: self.dump_lines[n] != 'GLOBAL_STATS:\n')
363 372
364 global_stat_names = [ 373 global_stat_names = [
365 'total', 'file-exec', 'file-nonexec', 'anonymous', 'stack', 'other', 374 'total', 'file-exec', 'file-nonexec', 'anonymous', 'stack', 'other',
366 'nonprofiled-absent', 'nonprofiled-anonymous', 375 'nonprofiled-absent', 'nonprofiled-anonymous',
367 'nonprofiled-file-exec', 'nonprofiled-file-nonexec', 376 'nonprofiled-file-exec', 'nonprofiled-file-nonexec',
368 'nonprofiled-stack', 'nonprofiled-other', 377 'nonprofiled-stack', 'nonprofiled-other',
369 'profiled-mmap', 'profiled-malloc'] 378 'profiled-mmap', 'profiled-malloc']
370 379
371 for prefix in global_stat_names: 380 for prefix in global_stat_names:
372 (ln, _) = self.skip_lines_while( 381 (ln, _) = self.skip_lines_while(
373 ln, len(self.log_lines), 382 ln, len(self.dump_lines),
374 lambda n: self.log_lines[n].split()[0] != prefix) 383 lambda n: self.dump_lines[n].split()[0] != prefix)
375 words = self.log_lines[ln].split() 384 words = self.dump_lines[ln].split()
376 self.counters[prefix + '_virtual'] = int(words[-2]) 385 self.counters[prefix + '_virtual'] = int(words[-2])
377 self.counters[prefix + '_committed'] = int(words[-1]) 386 self.counters[prefix + '_committed'] = int(words[-1])
378 387
379 def parse_version(self): 388 def parse_version(self):
380 """Parses a version string in self.log_lines. 389 """Parses a version string in self.dump_lines.
381 390
382 Returns: 391 Returns:
383 A pair of (a string representing a version of the stacktrace dump, 392 A pair of (a string representing a version of the stacktrace dump,
384 and an integer indicating a line number next to the version string). 393 and an integer indicating a line number next to the version string).
385 394
386 Raises: 395 Raises:
387 ParsingException for invalid dump versions. 396 ParsingException for invalid dump versions.
388 """ 397 """
389 version = '' 398 version = ''
390 399
391 # Skip until an identifiable line. 400 # Skip until an identifiable line.
392 headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ') 401 headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ')
393 if not self.log_lines: 402 if not self.dump_lines:
394 raise EmptyDumpException('Empty heap dump file.') 403 raise EmptyDumpException('Empty heap dump file.')
395 (ln, found) = self.skip_lines_while( 404 (ln, found) = self.skip_lines_while(
396 0, len(self.log_lines), 405 0, len(self.dump_lines),
397 lambda n: not self.log_lines[n].startswith(headers)) 406 lambda n: not self.dump_lines[n].startswith(headers))
398 if not found: 407 if not found:
399 raise InvalidDumpException('No version header.') 408 raise InvalidDumpException('No version header.')
400 409
401 # Identify a version. 410 # Identify a version.
402 if self.log_lines[ln].startswith('heap profile: '): 411 if self.dump_lines[ln].startswith('heap profile: '):
403 version = self.log_lines[ln][13:].strip() 412 version = self.dump_lines[ln][13:].strip()
404 if version == DUMP_DEEP_5: 413 if version == DUMP_DEEP_5:
405 (ln, _) = self.skip_lines_while( 414 (ln, _) = self.skip_lines_while(
406 ln, len(self.log_lines), 415 ln, len(self.dump_lines),
407 lambda n: self.log_lines[n] != 'STACKTRACES:\n') 416 lambda n: self.dump_lines[n] != 'STACKTRACES:\n')
408 elif version in DUMP_DEEP_OBSOLETE: 417 elif version in DUMP_DEEP_OBSOLETE:
409 raise ObsoleteDumpVersionException(version) 418 raise ObsoleteDumpVersionException(version)
410 else: 419 else:
411 raise InvalidDumpException('Invalid version: %s' % version) 420 raise InvalidDumpException('Invalid version: %s' % version)
412 elif self.log_lines[ln] == 'STACKTRACES:\n': 421 elif self.dump_lines[ln] == 'STACKTRACES:\n':
413 raise ObsoleteDumpVersionException(DUMP_DEEP_1) 422 raise ObsoleteDumpVersionException(DUMP_DEEP_1)
414 elif self.log_lines[ln] == 'MMAP_STACKTRACES:\n': 423 elif self.dump_lines[ln] == 'MMAP_STACKTRACES:\n':
415 raise ObsoleteDumpVersionException(DUMP_DEEP_2) 424 raise ObsoleteDumpVersionException(DUMP_DEEP_2)
416 425
417 return (version, ln) 426 return (version, ln)
418 427
419 def parse_log(self, buckets): 428 def parse_dump(self, buckets, appeared_addresses):
420 self.log_version, ln = self.parse_version() 429 self.dump_version, ln = self.parse_version()
421 self.parse_global_stats() 430 self.parse_global_stats()
422 self.parse_stacktraces(buckets, ln) 431 self.parse_stacktraces(buckets, ln, appeared_addresses)
423 432
424 @staticmethod 433 @staticmethod
425 def accumulate_size_for_policy(stacktrace_lines, 434 def accumulate_size_for_policy(stacktrace_lines,
426 policy_list, buckets, sizes): 435 rule_list, buckets, sizes, symbols):
427 for line in stacktrace_lines: 436 for line in stacktrace_lines:
428 words = line.split() 437 words = line.split()
429 bucket = buckets.get(int(words[BUCKET_ID])) 438 bucket = buckets.get(int(words[BUCKET_ID]))
430 component_match = get_component(policy_list, bucket) 439 component_match = get_component(rule_list, bucket, symbols)
440
431 sizes[component_match] += int(words[COMMITTED]) 441 sizes[component_match] += int(words[COMMITTED])
432 442
433 if component_match.startswith('tc-'): 443 if component_match.startswith('tc-'):
434 sizes['tc-total-log'] += int(words[COMMITTED]) 444 sizes['tc-total-log'] += int(words[COMMITTED])
435 elif component_match.startswith('mmap-'): 445 elif component_match.startswith('mmap-'):
436 sizes['mmap-total-log'] += int(words[COMMITTED]) 446 sizes['mmap-total-log'] += int(words[COMMITTED])
437 else: 447 else:
438 sizes['other-total-log'] += int(words[COMMITTED]) 448 sizes['other-total-log'] += int(words[COMMITTED])
439 449
440 def apply_policy(self, policy_list, buckets, first_log_time): 450 def apply_policy(
451 self, rule_list, buckets, first_dump_time, components, symbols):
441 """Aggregates the total memory size of each component. 452 """Aggregates the total memory size of each component.
442 453
443 Iterate through all stacktraces and attribute them to one of the components 454 Iterate through all stacktraces and attribute them to one of the components
444 based on the policy. It is important to apply policy in right order. 455 based on the policy. It is important to apply policy in right order.
445 456
446 Args: 457 Args:
447 policy_list: A list containing Policy objects. (Parsed policy data by 458 rule_list: A list of Rule objects.
448 parse_policy.) 459 buckets: A dict mapping bucket ids to Bucket objects.
449 buckets: A dict mapping bucket ids and their corresponding Bucket 460 first_dump_time: An integer representing time when the first dump is
450 objects.
451 first_log_time: An integer representing time when the first log is
452 dumped. 461 dumped.
462 components: A list of strings of component names.
463 symbols: A dict mapping runtime addresses to symbol names.
453 464
454 Returns: 465 Returns:
455 A dict mapping components and their corresponding sizes. 466 A dict mapping components and their corresponding sizes.
456 """ 467 """
457 468
458 sys.stderr.write('apply policy:%s\n' % (self.log_path)) 469 sys.stderr.write('Applying policy: "%s".\n' % self.dump_path)
459 sizes = dict((c, 0) for c in components) 470 sizes = dict((c, 0) for c in components)
460 471
461 self.accumulate_size_for_policy(self.stacktrace_lines, 472 self.accumulate_size_for_policy(self.stacktrace_lines,
462 policy_list, buckets, sizes) 473 rule_list, buckets, sizes, symbols)
463 474
464 mmap_prefix = 'profiled-mmap' 475 mmap_prefix = 'profiled-mmap'
465 malloc_prefix = 'profiled-malloc' 476 malloc_prefix = 'profiled-malloc'
466 477
467 sizes['mmap-no-log'] = ( 478 sizes['mmap-no-log'] = (
468 self.counters['%s_committed' % mmap_prefix] - sizes['mmap-total-log']) 479 self.counters['%s_committed' % mmap_prefix] - sizes['mmap-total-log'])
469 sizes['mmap-total-record'] = self.counters['%s_committed' % mmap_prefix] 480 sizes['mmap-total-record'] = self.counters['%s_committed' % mmap_prefix]
470 sizes['mmap-total-record-vm'] = self.counters['%s_virtual' % mmap_prefix] 481 sizes['mmap-total-record-vm'] = self.counters['%s_virtual' % mmap_prefix]
471 482
472 sizes['tc-no-log'] = ( 483 sizes['tc-no-log'] = (
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
507 'nonprofiled-stack_committed', 518 'nonprofiled-stack_committed',
508 'nonprofiled-other_committed') 519 'nonprofiled-other_committed')
509 sizes['mustbezero'] = ( 520 sizes['mustbezero'] = (
510 self.counters['total_committed'] - 521 self.counters['total_committed'] -
511 sum(self.counters[i] for i in removed)) 522 sum(self.counters[i] for i in removed))
512 if 'total-exclude-profiler' in sizes: 523 if 'total-exclude-profiler' in sizes:
513 sizes['total-exclude-profiler'] = ( 524 sizes['total-exclude-profiler'] = (
514 self.counters['total_committed'] - 525 self.counters['total_committed'] -
515 (sizes['mmap-profiler'] + sizes['mmap-allocated-type'])) 526 (sizes['mmap-profiler'] + sizes['mmap-allocated-type']))
516 if 'hour' in sizes: 527 if 'hour' in sizes:
517 sizes['hour'] = (self.log_time - first_log_time) / 60.0 / 60.0 528 sizes['hour'] = (self.dump_time - first_dump_time) / 60.0 / 60.0
518 if 'minute' in sizes: 529 if 'minute' in sizes:
519 sizes['minute'] = (self.log_time - first_log_time) / 60.0 530 sizes['minute'] = (self.dump_time - first_dump_time) / 60.0
520 if 'second' in sizes: 531 if 'second' in sizes:
521 sizes['second'] = self.log_time - first_log_time 532 sizes['second'] = self.dump_time - first_dump_time
522 533
523 return sizes 534 return sizes
524 535
525 @staticmethod 536 @staticmethod
526 def accumulate_size_for_expand(stacktrace_lines, policy_list, buckets, 537 def accumulate_size_for_expand(stacktrace_lines, rule_list, buckets,
527 component_name, depth, sizes): 538 component_name, depth, sizes, symbols):
528 for line in stacktrace_lines: 539 for line in stacktrace_lines:
529 words = line.split() 540 words = line.split()
530 bucket = buckets.get(int(words[BUCKET_ID])) 541 bucket = buckets.get(int(words[BUCKET_ID]))
531 component_match = get_component(policy_list, bucket) 542 component_match = get_component(rule_list, bucket, symbols)
532 if component_match == component_name: 543 if component_match == component_name:
533 stacktrace_sequence = '' 544 stacktrace_sequence = ''
534 for address in bucket.stacktrace[0 : min(len(bucket.stacktrace), 545 for address in bucket.stacktrace[0 : min(len(bucket.stacktrace),
535 1 + depth)]: 546 1 + depth)]:
536 stacktrace_sequence += address_symbol_dict[address] + ' ' 547 stacktrace_sequence += symbols[address] + ' '
537 if not stacktrace_sequence in sizes: 548 if not stacktrace_sequence in sizes:
538 sizes[stacktrace_sequence] = 0 549 sizes[stacktrace_sequence] = 0
539 sizes[stacktrace_sequence] += int(words[COMMITTED]) 550 sizes[stacktrace_sequence] += int(words[COMMITTED])
540 551
541 def expand(self, policy_list, buckets, component_name, depth): 552 def expand(self, rule_list, buckets, component_name, depth, symbols):
542 """Prints all stacktraces in a given component of given depth. 553 """Prints all stacktraces in a given component of given depth.
543 554
544 Args: 555 Args:
545 policy_list: A list containing Policy objects. (Parsed policy data by 556 rule_list: A list of Rule objects.
546 parse_policy.) 557 buckets: A dict mapping bucket ids to Bucket objects.
547 buckets: A dict mapping bucket ids and their corresponding Bucket
548 objects.
549 component_name: A name of component for filtering. 558 component_name: A name of component for filtering.
550 depth: An integer representing depth to be printed. 559 depth: An integer representing depth to be printed.
560 symbols: A dict mapping runtime addresses to symbol names.
551 """ 561 """
552 sizes = {} 562 sizes = {}
553 563
554 self.accumulate_size_for_expand( 564 self.accumulate_size_for_expand(
555 self.stacktrace_lines, policy_list, buckets, component_name, 565 self.stacktrace_lines, rule_list, buckets, component_name,
556 depth, sizes) 566 depth, sizes, symbols)
557 567
558 sorted_sizes_list = sorted( 568 sorted_sizes_list = sorted(
559 sizes.iteritems(), key=(lambda x: x[1]), reverse=True) 569 sizes.iteritems(), key=(lambda x: x[1]), reverse=True)
560 total = 0 570 total = 0
561 for size_pair in sorted_sizes_list: 571 for size_pair in sorted_sizes_list:
562 sys.stdout.write('%10d %s\n' % (size_pair[1], size_pair[0])) 572 sys.stdout.write('%10d %s\n' % (size_pair[1], size_pair[0]))
563 total += size_pair[1] 573 total += size_pair[1]
564 sys.stderr.write('total: %d\n' % (total)) 574 sys.stderr.write('total: %d\n' % (total))
565 575
566 576
567 def update_symbols(symbol_path, mapping_lines, maps_path): 577 def update_symbols(
578 symbol_path, maps_path, appeared_addresses, symbols):
568 """Updates address/symbol mapping on memory and in a .symbol cache file. 579 """Updates address/symbol mapping on memory and in a .symbol cache file.
569 580
570 It reads cached address/symbol mapping from a .symbol file if it exists. 581 It reads cached address/symbol mapping from a .symbol file if it exists.
571 Then, it resolves unresolved addresses from a Chrome binary with pprof. 582 Then, it resolves unresolved addresses from a Chrome binary with pprof.
572 Both mappings on memory and in a .symbol cache file are updated. 583 Both mappings on memory and in a .symbol cache file are updated.
573 584
574 Symbol files are formatted as follows: 585 Symbol files are formatted as follows:
575 <Address> <Symbol> 586 <Address> <Symbol>
576 <Address> <Symbol> 587 <Address> <Symbol>
577 <Address> <Symbol> 588 <Address> <Symbol>
578 ... 589 ...
579 590
580 Args: 591 Args:
581 symbol_path: A string representing a path for a .symbol file. 592 symbol_path: A string representing a path for a .symbol file.
582 mapping_lines: A list of strings containing /proc/.../maps.
583 maps_path: A string of the path of /proc/.../maps. 593 maps_path: A string of the path of /proc/.../maps.
594 appeared_addresses: A list of known addresses.
595 symbols: A dict mapping runtime addresses to symbol names.
584 """ 596 """
585 with open(symbol_path, mode='a+') as symbol_f: 597 with open(symbol_path, mode='a+') as symbol_f:
586 symbol_lines = symbol_f.readlines() 598 symbol_lines = symbol_f.readlines()
587 if symbol_lines: 599 if symbol_lines:
588 for line in symbol_lines: 600 for line in symbol_lines:
589 items = line.split(None, 1) 601 items = line.split(None, 1)
590 address_symbol_dict[items[0]] = items[1].rstrip() 602 if len(items) == 1:
603 items.append('??')
604 symbols[items[0]] = items[1].rstrip()
605 if symbols:
606 sys.stderr.write(' Found %d symbols in cache.\n' % len(symbols))
607 else:
608 sys.stderr.write(' No symbols found in cache.\n')
591 609
592 unresolved_addresses = sorted( 610 unresolved_addresses = sorted(
593 a for a in appeared_addresses if a not in address_symbol_dict) 611 a for a in appeared_addresses if a not in symbols)
594 612
595 if unresolved_addresses: 613 if not unresolved_addresses:
614 sys.stderr.write(' No need to resolve any more addresses.\n')
615 else:
616 sys.stderr.write(' %d addresses are unresolved.\n' %
617 len(unresolved_addresses))
596 prepared_data_dir = tempfile.mkdtemp() 618 prepared_data_dir = tempfile.mkdtemp()
597 try: 619 try:
598 prepare_symbol_info(maps_path, prepared_data_dir) 620 prepare_symbol_info(maps_path, prepared_data_dir)
599 621
600 symbols = find_runtime_symbols_list( 622 symbol_list = find_runtime_symbols_list(
601 prepared_data_dir, unresolved_addresses) 623 prepared_data_dir, unresolved_addresses)
602 624
603 for address, symbol in zip(unresolved_addresses, symbols): 625 for address, symbol in zip(unresolved_addresses, symbol_list):
626 if not symbol:
627 symbol = '??'
604 stripped_symbol = symbol.strip() 628 stripped_symbol = symbol.strip()
605 address_symbol_dict[address] = stripped_symbol 629 symbols[address] = stripped_symbol
606 symbol_f.write('%s %s\n' % (address, stripped_symbol)) 630 symbol_f.write('%s %s\n' % (address, stripped_symbol))
607 finally: 631 finally:
608 shutil.rmtree(prepared_data_dir) 632 shutil.rmtree(prepared_data_dir)
609 633
610 634
611 def parse_policy(policy_path): 635 def parse_policy(policy_path):
612 """Parses policy file. 636 """Parses policy file.
613 637
614 A policy file contains component's names and their 638 A policy file contains component's names and their
615 stacktrace pattern written in regular expression. 639 stacktrace pattern written in regular expression.
616 Those patterns are matched against each symbols of 640 Those patterns are matched against each symbols of
617 each stacktraces in the order written in the policy file 641 each stacktraces in the order written in the policy file
618 642
619 Args: 643 Args:
620 policy_path: A path for a policy file. 644 policy_path: A path for a policy file.
621 Returns: 645 Returns:
622 A list containing component's name and its regex object 646 A list containing component's name and its regex object
623 """ 647 """
624 with open(policy_path, mode='r') as policy_f: 648 with open(policy_path, mode='r') as policy_f:
625 policy_lines = policy_f.readlines() 649 policy_lines = policy_f.readlines()
626 650
627 policy_version = POLICY_DEEP_1 651 policy_version = POLICY_DEEP_1
628 if policy_lines[0].startswith('heap profile policy: '): 652 if policy_lines[0].startswith('heap profile policy: '):
629 policy_version = policy_lines[0][21:].strip() 653 policy_version = policy_lines[0][21:].strip()
630 policy_lines.pop(0) 654 policy_lines.pop(0)
631 policy_list = [] 655 rule_list = []
656 components = []
632 657
633 if policy_version == POLICY_DEEP_2 or policy_version == POLICY_DEEP_1: 658 if policy_version == POLICY_DEEP_2 or policy_version == POLICY_DEEP_1:
634 sys.stderr.write(' heap profile policy version: %s\n' % policy_version)
635 for line in policy_lines: 659 for line in policy_lines:
636 if line[0] == '#': 660 if line[0] == '#':
637 continue 661 continue
638 662
639 if policy_version == POLICY_DEEP_2: 663 if policy_version == POLICY_DEEP_2:
640 (name, allocation_type, pattern) = line.strip().split(None, 2) 664 (name, allocation_type, pattern) = line.strip().split(None, 2)
641 mmap = False 665 mmap = False
642 if allocation_type == 'mmap': 666 if allocation_type == 'mmap':
643 mmap = True 667 mmap = True
644 elif policy_version == POLICY_DEEP_1: 668 elif policy_version == POLICY_DEEP_1:
645 name = line.split()[0] 669 name = line.split()[0]
646 pattern = line[len(name) : len(line)].strip() 670 pattern = line[len(name) : len(line)].strip()
647 mmap = False 671 mmap = False
648 672
649 if pattern != 'default': 673 if pattern != 'default':
650 policy_list.append(Policy(name, mmap, pattern)) 674 rule_list.append(Rule(name, mmap, pattern))
651 if components.count(name) == 0: 675 if components.count(name) == 0:
652 components.append(name) 676 components.append(name)
653 677
654 else: 678 else:
655 sys.stderr.write(' invalid heap profile policy version: %s\n' % ( 679 sys.stderr.write(' invalid heap profile policy version: %s\n' % (
656 policy_version)) 680 policy_version))
657 681
658 return policy_list 682 return rule_list, policy_version, components
659 683
660 684
661 def main(): 685 def find_prefix(path):
662 if (len(sys.argv) < 4) or (not (sys.argv[1] in ['--csv', 686 return re.sub('\.[0-9][0-9][0-9][0-9]\.heap', '', path)
663 '--json',
664 '--expand',
665 '--list',
666 '--stacktrace',
667 '--pprof'])):
668 sys.stderr.write("""Usage:
669 %s [options] <chrome-binary> <policy> <profile> [component-name] [depth]
670 687
671 Options:
672 --csv Output result in csv format
673 --json Output result in json format
674 --stacktrace Convert raw address to symbol names
675 --list Lists components and their sizes
676 --expand Show all stacktraces in the specified component
677 of given depth with their sizes
678 --pprof Format the profile file so it can be processed
679 by pprof
680 688
681 Examples: 689 def load_buckets(prefix):
682 dmprof --csv Debug/chrome dmpolicy hprof.12345.0001.heap > result.csv
683 dmprof --json Debug/chrome dmpolicy hprof.12345.0001.heap > result.json
684 dmprof --list Debug/chrome dmpolicy hprof.12345.0012.heap
685 dmprof --expand Debug/chrome dmpolicy hprof.12345.0012.heap tc-webkit 4
686 dmprof --pprof Debug/chrome dmpolicy hprof.12345.0012.heap > for_pprof.txt
687 """ % (sys.argv[0]))
688 sys.exit(1)
689
690 action = sys.argv[1]
691 chrome_path = sys.argv[2]
692 policy_path = sys.argv[3]
693 log_path = sys.argv[4]
694
695 sys.stderr.write('parsing a policy file\n')
696 policy_list = parse_policy(policy_path)
697
698 p = re.compile('\.[0-9][0-9][0-9][0-9]\.heap')
699 prefix = p.sub('', log_path)
700 symbol_path = prefix + '.symbols'
701
702 sys.stderr.write('parsing the maps file\n')
703 maps_path = prefix + '.maps'
704 with open(maps_path, 'r') as maps_f:
705 maps_lines = maps_f.readlines()
706
707 # Reading buckets 690 # Reading buckets
708 sys.stderr.write('parsing the bucket file\n') 691 sys.stderr.write('Loading bucket files.\n')
709 buckets = {} 692 buckets = {}
710 bucket_count = 0 693 bucket_count = 0
711 n = 0 694 n = 0
712 while True: 695 while True:
713 buckets_path = '%s.%04d.buckets' % (prefix, n) 696 buckets_path = '%s.%04d.buckets' % (prefix, n)
714 if not os.path.exists(buckets_path): 697 if not os.path.exists(buckets_path):
715 if n > 10: 698 if n > 10:
716 break 699 break
717 n += 1 700 n += 1
718 continue 701 continue
719 sys.stderr.write('reading buckets from %s\n' % (buckets_path)) 702 sys.stderr.write(' %s\n' % buckets_path)
720 with open(buckets_path, 'r') as buckets_f: 703 with open(buckets_path, 'r') as buckets_f:
721 for line in buckets_f: 704 for line in buckets_f:
722 words = line.split() 705 words = line.split()
723 buckets[int(words[0])] = Bucket(words[2:], words[1] == 'mmap') 706 buckets[int(words[0])] = Bucket(words[2:], words[1] == 'mmap')
724 n += 1 707 n += 1
725 708
726 log_path_list = [log_path] 709 return buckets
727 710
728 if action in ('--csv', '--json'): 711
729 # search for the sequence of files 712 def determine_dump_path_list(dump_path, prefix):
730 n = int(log_path[len(log_path) - 9 : len(log_path) - 5]) 713 dump_path_list = [dump_path]
731 n += 1 # skip current file 714
732 while True: 715 # search for the sequence of files
733 p = '%s.%04d.heap' % (prefix, n) 716 n = int(dump_path[len(dump_path) - 9 : len(dump_path) - 5])
734 if os.path.exists(p): 717 n += 1 # skip current file
735 log_path_list.append(p) 718 while True:
736 else: 719 p = '%s.%04d.heap' % (prefix, n)
737 break 720 if os.path.exists(p):
738 n += 1 721 dump_path_list.append(p)
739
740 logs = []
741 for path in log_path_list:
742 new_log = Log(path)
743 sys.stderr.write('Parsing a dump: %s\n' % path)
744 try:
745 new_log.parse_log(buckets)
746 except EmptyDumpException:
747 sys.stderr.write(' WARNING: ignored an empty dump: %s\n' % path)
748 except ParsingException, e:
749 sys.stderr.write(' Error in parsing heap profile dump: %s\n' % e)
750 sys.exit(1)
751 else: 722 else:
752 logs.append(new_log) 723 break
753 724 n += 1
754 sys.stderr.write('getting symbols\n') 725
755 update_symbols(symbol_path, maps_lines, maps_path) 726 return dump_path_list
756 727
757 # TODO(dmikurube): Many modes now. Split them into separete functions. 728
758 if action == '--stacktrace': 729 def load_single_dump(dump_path, buckets, appeared_addresses):
759 logs[0].dump_stacktrace(buckets) 730 new_dump = Dump(dump_path)
760 731 try:
761 elif action == '--csv': 732 new_dump.parse_dump(buckets, appeared_addresses)
762 sys.stdout.write(','.join(components)) 733 except EmptyDumpException:
763 sys.stdout.write('\n') 734 sys.stderr.write('... ignored an empty dump')
764 735 except ParsingException, e:
765 for log in logs: 736 sys.stderr.write('... error in parsing: %s' % e)
766 component_sizes = log.apply_policy(policy_list, buckets, logs[0].log_time) 737 sys.exit(1)
738 else:
739 sys.stderr.write(' (version: %s)' % new_dump.dump_version)
740
741 return new_dump
742
743
744 def load_dump(dump_path, buckets):
745 sys.stderr.write('Loading a heap dump file: "%s"' % dump_path)
746 appeared_addresses = set()
747 dump = load_single_dump(dump_path, buckets, appeared_addresses)
748 sys.stderr.write('.\n')
749 return dump, appeared_addresses
750
751
752 def load_dumps(dump_path_list, buckets):
753 sys.stderr.write('Loading heap dump files.\n')
754 appeared_addresses = set()
755 dumps = []
756 for path in dump_path_list:
757 sys.stderr.write(' %s' % path)
758 dumps.append(load_single_dump(path, buckets, appeared_addresses))
759 sys.stderr.write('\n')
760 return dumps, appeared_addresses
761
762
763 def load_and_update_symbol_cache(prefix, appeared_addresses):
764 maps_path = prefix + '.maps'
765 symbol_path = prefix + '.symbols'
766 sys.stderr.write('Loading and updating symbol cache: "%s".\n' % symbol_path)
767 symbols = {}
768 update_symbols(symbol_path, maps_path, appeared_addresses, symbols)
769 return symbols
770
771
772 def load_default_policies():
773 with open(POLICIES_JSON_PATH, mode='r') as policies_f:
774 default_policies = json.load(policies_f)
775 return default_policies
776
777
778 def load_policy(policies_dict, policy_label):
779 policy_file = policies_dict[policy_label]['file']
780 policy_path = os.path.join(os.path.dirname(__file__), policy_file)
781 rule_list, policy_version, components = parse_policy(policy_path)
782 sys.stderr.write(' %s: %s (version: %s)\n' %
783 (policy_label, policy_path, policy_version))
784 return Policy(rule_list, policy_version, components)
785
786
787 def load_policies_dict(policies_dict):
788 sys.stderr.write('Loading policy files.\n')
789 policies = {}
790 for policy_label in policies_dict:
791 policies[policy_label] = load_policy(policies_dict, policy_label)
792 return policies
793
794
795 def load_policies(options_policy):
796 default_policies = load_default_policies()
797 if options_policy:
798 policy_labels = options_policy.split(',')
799 specified_policies = {}
800 for specified_policy in policy_labels:
801 if specified_policy in default_policies:
802 specified_policies[specified_policy] = (
803 default_policies[specified_policy])
804 policies = load_policies_dict(specified_policies)
805 else:
806 policies = load_policies_dict(default_policies)
807 return policies
808
809
810 def do_stacktrace(sys_argv):
811 parser = OptionParser(usage='Usage: %prog stacktrace <dump>')
812 options, args = parser.parse_args(sys_argv)
813
814 if len(args) < 2:
815 parser.error('needs 1 argument.')
816
817 dump_path = args[1]
818
819 prefix = find_prefix(dump_path)
820 buckets = load_buckets(prefix)
821 dump, appeared_addresses = load_dump(dump_path, buckets)
822 symbols = load_and_update_symbol_cache(prefix, appeared_addresses)
823
824 dump.print_stacktrace(buckets, symbols)
825
826 return 0
827
828
829 def do_csv(sys_argv):
830 parser = OptionParser('Usage: %prog csv [-p POLICY] <first-dump>')
831 parser.add_option('-p', '--policy', type='string', dest='policy',
832 help='profile with POLICY', metavar='POLICY')
833 options, args = parser.parse_args(sys_argv)
834
835 if len(args) < 2:
836 parser.error('needs 1 argument.')
837
838 dump_path = args[1]
839
840 prefix = find_prefix(dump_path)
841 buckets = load_buckets(prefix)
842 dumps, appeared_addresses = load_dumps(
843 determine_dump_path_list(dump_path, prefix), buckets)
844 symbols = load_and_update_symbol_cache(prefix, appeared_addresses)
845 policies = load_policies(options.policy)
846
847 max_components = 0
848 for policy in policies:
849 max_components = max(max_components, len(policies[policy].components))
850
851 for policy in sorted(policies):
852 rule_list = policies[policy].rules
853 components = policies[policy].components
854
855 if len(policies) > 1:
856 sys.stdout.write('%s%s\n' % (policy, ',' * (max_components - 1)))
857 sys.stdout.write('%s%s\n' % (
858 ','.join(components), ',' * (max_components - len(components))))
859
860 for dump in dumps:
861 component_sizes = dump.apply_policy(
862 rule_list, buckets, dumps[0].dump_time, components, symbols)
767 s = [] 863 s = []
768 for c in components: 864 for c in components:
769 if c in ('hour', 'minute', 'second'): 865 if c in ('hour', 'minute', 'second'):
770 s.append('%05.5f' % (component_sizes[c])) 866 s.append('%05.5f' % (component_sizes[c]))
771 else: 867 else:
772 s.append('%05.5f' % (component_sizes[c] / 1024.0 / 1024.0)) 868 s.append('%05.5f' % (component_sizes[c] / 1024.0 / 1024.0))
773 sys.stdout.write(','.join(s)) 869 sys.stdout.write('%s%s\n' % (
774 sys.stdout.write('\n') 870 ','.join(s), ',' * (max_components - len(components))))
775 871
776 elif action == '--json': 872 for bucket in buckets.itervalues():
777 json_base = { 873 bucket.clear_component_cache()
778 'version': 'JSON_DEEP_1', 874
875 return 0
876
877
878 def do_json(sys_argv):
879 parser = OptionParser('Usage: %prog json [-p POLICY] <first-dump>')
880 parser.add_option('-p', '--policy', type='string', dest='policy',
881 help='profile with POLICY', metavar='POLICY')
882 options, args = parser.parse_args(sys_argv)
883
884 if len(args) < 2:
885 parser.error('needs 1 argument.')
886
887 dump_path = args[1]
888
889 prefix = find_prefix(dump_path)
890 buckets = load_buckets(prefix)
891 dumps, appeared_addresses = load_dumps(
892 determine_dump_path_list(dump_path, prefix), buckets)
893 symbols = load_and_update_symbol_cache(prefix, appeared_addresses)
894 policies = load_policies(options.policy)
895
896 json_base = {
897 'version': 'JSON_DEEP_2',
898 'policies': {},
899 }
900
901 for policy in sorted(policies):
902 rule_list = policies[policy].rules
903 components = policies[policy].components
904
905 json_base['policies'][policy] = {
779 'legends': components, 906 'legends': components,
780 'snapshots': [], 907 'snapshots': [],
781 } 908 }
782 for log in logs: 909
783 component_sizes = log.apply_policy(policy_list, buckets, logs[0].log_time) 910 for dump in dumps:
784 component_sizes['log_path'] = log.log_path 911 component_sizes = dump.apply_policy(
785 component_sizes['log_time'] = datetime.fromtimestamp( 912 rule_list, buckets, dumps[0].dump_time, components, symbols)
786 log.log_time).strftime('%Y-%m-%d %H:%M:%S') 913 component_sizes['dump_path'] = dump.dump_path
787 json_base['snapshots'].append(component_sizes) 914 component_sizes['dump_time'] = datetime.fromtimestamp(
788 json.dump(json_base, sys.stdout, indent=2, sort_keys=True) 915 dump.dump_time).strftime('%Y-%m-%d %H:%M:%S')
789 916 json_base['policies'][policy]['snapshots'].append(component_sizes)
790 elif action == '--list': 917
791 component_sizes = logs[0].apply_policy( 918 for bucket in buckets.itervalues():
792 policy_list, buckets, logs[0].log_time) 919 bucket.clear_component_cache()
920
921 json.dump(json_base, sys.stdout, indent=2, sort_keys=True)
922
923 return 0
924
925
926 def do_list(sys_argv):
927 parser = OptionParser('Usage: %prog [-p POLICY] list <first-dump>')
928 parser.add_option('-p', '--policy', type='string', dest='policy',
929 help='profile with POLICY', metavar='POLICY')
930 options, args = parser.parse_args(sys_argv)
931
932 if len(args) < 2:
933 parser.error('needs 1 argument.')
934
935 dump_path = args[1]
936
937 prefix = find_prefix(dump_path)
938 buckets = load_buckets(prefix)
939 dumps, appeared_addresses = load_dumps(
940 determine_dump_path_list(dump_path, prefix), buckets)
941 symbols = load_and_update_symbol_cache(prefix, appeared_addresses)
942 policies = load_policies(options.policy)
943
944 for policy in sorted(policies):
945 rule_list = policies[policy].rules
946 components = policies[policy].components
947
948 component_sizes = dumps[0].apply_policy(
949 rule_list, buckets, dumps[0].dump_time, components, symbols)
950 sys.stdout.write('%s:\n' % policy)
793 for c in components: 951 for c in components:
794 if c in ['hour', 'minute', 'second']: 952 if c in ['hour', 'minute', 'second']:
795 sys.stdout.write('%30s %10.3f\n' % (c, component_sizes[c])) 953 sys.stdout.write('%30s %10.3f\n' % (c, component_sizes[c]))
796 else: 954 else:
797 sys.stdout.write('%30s %10.3f\n' % ( 955 sys.stdout.write('%30s %10.3f\n' % (
798 c, component_sizes[c] / 1024.0 / 1024.0)) 956 c, component_sizes[c] / 1024.0 / 1024.0))
799 957
800 elif action == '--expand': 958 for bucket in buckets.itervalues():
801 component_name = sys.argv[5] 959 bucket.clear_component_cache()
802 depth = sys.argv[6]
803 logs[0].expand(policy_list, buckets, component_name, int(depth))
804 960
805 elif action == '--pprof': 961 return 0
806 if len(sys.argv) > 5: 962
807 logs[0].dump_for_pprof(policy_list, buckets, maps_lines, sys.argv[5]) 963
808 else: 964 def do_expand(sys_argv):
809 logs[0].dump_for_pprof(policy_list, buckets, maps_lines, None) 965 parser = OptionParser(
966 'Usage: %prog expand <dump> <policy> <component> <depth>')
967 options, args = parser.parse_args(sys_argv)
968
969 if len(args) < 5:
970 parser.error('needs 4 arguments.')
971
972 dump_path = args[1]
973 target_policy = args[2]
974 component_name = args[3]
975 depth = args[4]
976
977 prefix = find_prefix(dump_path)
978 buckets = load_buckets(prefix)
979 dump, appeared_addresses = load_dump(dump_path, buckets)
980 symbols = load_and_update_symbol_cache(prefix, appeared_addresses)
981 policies = load_policies(target_policy)
982
983 rule_list = policies[target_policy].rules
984
985 dump.expand(rule_list, buckets, component_name, int(depth), symbols)
986
987 return 0
988
989
990 def do_pprof(sys_argv):
991 parser = OptionParser(
992 'Usage: %prog pprof [-c COMPONENT] <dump> <policy>')
993 parser.add_option('-c', '--component', type='string', dest='component',
994 help='restrict to COMPONENT', metavar='COMPONENT')
995 options, args = parser.parse_args(sys_argv)
996
997 if len(args) < 3:
998 parser.error('needs 2 arguments.')
999
1000 dump_path = args[1]
1001 target_policy = args[2]
1002 component = options.component
1003
1004 prefix = find_prefix(dump_path)
1005 buckets = load_buckets(prefix)
1006 dump, appeared_addresses = load_dump(dump_path, buckets)
1007 symbols = load_and_update_symbol_cache(prefix, appeared_addresses)
1008 policies = load_policies(target_policy)
1009
1010 rule_list = policies[target_policy].rules
1011
1012 with open(prefix + '.maps', 'r') as maps_f:
1013 maps_lines = maps_f.readlines()
1014 dump.print_for_pprof(rule_list, buckets, maps_lines, component, symbols)
1015
1016 return 0
1017
1018
1019 def main():
1020 COMMANDS = {
1021 'csv': do_csv,
1022 'expand': do_expand,
1023 'json': do_json,
1024 'list': do_list,
1025 'pprof': do_pprof,
1026 'stacktrace': do_stacktrace,
1027 }
1028
1029 # TODO(dmikurube): Remove this message after a while.
1030 if len(sys.argv) >= 2 and sys.argv[1].startswith('--'):
1031 sys.stderr.write("""
1032 **************** NOTICE!! ****************
1033 The command line format has changed.
1034 Please look at the description below.
1035 ******************************************
1036
1037 """)
1038
1039 if len(sys.argv) < 2 or (not sys.argv[1] in COMMANDS):
1040 sys.stderr.write("""Usage: %s <command> [options] [<args>]
1041
1042 Commands:
1043 csv Classify memory usage in CSV
1044 expand Show all stacktraces contained in the specified component
1045 json Classify memory usage in JSON
1046 list Classify memory usage in simple listing format
1047 pprof Format the profile dump so that it can be processed by pprof
1048 stacktrace Convert runtime addresses to symbol names
1049
1050 Quick Reference:
1051 dmprof csv [-p POLICY] <first-dump>
1052 dmprof expand <dump> <policy> <component> <depth>
1053 dmprof json [-p POLICY] <first-dump>
1054 dmprof list [-p POLICY] <first-dump>
1055 dmprof pprof [-c COMPONENT] <dump> <policy>
1056 dmprof stacktrace <dump>
1057 """ % (sys.argv[0]))
1058 sys.exit(1)
1059 action = sys.argv.pop(1)
1060
1061 return COMMANDS[action](sys.argv)
810 1062
811 1063
812 if __name__ == '__main__': 1064 if __name__ == '__main__':
813 sys.exit(main()) 1065 sys.exit(main())
OLDNEW
« no previous file with comments | « no previous file | tools/deep_memory_profiler/policies.json » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698