Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(95)

Side by Side Diff: tools/deep_memory_profiler/dmprof

Issue 10802049: Change dmprof commandline format, and clean up start-up routines. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: reordered import. Created 8 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | tools/deep_memory_profiler/policies.json » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be 3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file. 4 # found in the LICENSE file.
5 5
6 """The deep heap profiler script for Chrome.""" 6 """The deep heap profiler script for Chrome."""
7 7
8 from datetime import datetime 8 from datetime import datetime
9 import json 9 import json
10 import optparse
10 import os 11 import os
11 import re 12 import re
12 import shutil 13 import shutil
13 import subprocess 14 import subprocess
14 import sys 15 import sys
15 import tempfile 16 import tempfile
16 17
17 FIND_RUNTIME_SYMBOLS_PATH = os.path.join( 18 FIND_RUNTIME_SYMBOLS_PATH = os.path.join(
18 os.path.dirname(os.path.abspath(__file__)), 19 os.path.dirname(os.path.abspath(__file__)),
19 os.pardir, 20 os.pardir,
20 'find_runtime_symbols') 21 'find_runtime_symbols')
21 sys.path.append(FIND_RUNTIME_SYMBOLS_PATH) 22 sys.path.append(FIND_RUNTIME_SYMBOLS_PATH)
22 23
23 from prepare_symbol_info import prepare_symbol_info 24 from prepare_symbol_info import prepare_symbol_info
24 from find_runtime_symbols import find_runtime_symbols_list 25 from find_runtime_symbols import find_runtime_symbols_list
25 26
26 BUCKET_ID = 5 27 BUCKET_ID = 5
27 VIRTUAL = 0 28 VIRTUAL = 0
28 COMMITTED = 1 29 COMMITTED = 1
29 ALLOC_COUNT = 2 30 ALLOC_COUNT = 2
30 FREE_COUNT = 3 31 FREE_COUNT = 3
31 NULL_REGEX = re.compile('') 32 NULL_REGEX = re.compile('')
32 33
34 POLICIES_JSON_PATH = os.path.join(
35 os.path.dirname(os.path.abspath(__file__)),
36 'policies.json')
37
33 # Heap Profile Dump versions 38 # Heap Profile Dump versions
34 39
35 # DUMP_DEEP_1 is OBSOLETE. 40 # DUMP_DEEP_1 is OBSOLETE.
36 # DUMP_DEEP_1 DOES NOT distinct mmap regions and malloc chunks. 41 # DUMP_DEEP_1 DOES NOT distinct mmap regions and malloc chunks.
37 # Their stacktraces DO contain mmap* or tc-* at their tops. 42 # Their stacktraces DO contain mmap* or tc-* at their tops.
38 # They should be processed by POLICY_DEEP_1. 43 # They should be processed by POLICY_DEEP_1.
39 DUMP_DEEP_1 = 'DUMP_DEEP_1' 44 DUMP_DEEP_1 = 'DUMP_DEEP_1'
40 45
41 # DUMP_DEEP_2 is OBSOLETE. 46 # DUMP_DEEP_2 is OBSOLETE.
42 # DUMP_DEEP_2 DOES distinct mmap regions and malloc chunks. 47 # DUMP_DEEP_2 DOES distinct mmap regions and malloc chunks.
(...skipping 22 matching lines...) Expand all
65 # Heap Profile Policy versions 70 # Heap Profile Policy versions
66 71
67 # POLICY_DEEP_1 DOES NOT include allocation_type columns. 72 # POLICY_DEEP_1 DOES NOT include allocation_type columns.
68 # mmap regions are distincted w/ mmap frames in the pattern column. 73 # mmap regions are distincted w/ mmap frames in the pattern column.
69 POLICY_DEEP_1 = 'POLICY_DEEP_1' 74 POLICY_DEEP_1 = 'POLICY_DEEP_1'
70 75
71 # POLICY_DEEP_2 DOES include allocation_type columns. 76 # POLICY_DEEP_2 DOES include allocation_type columns.
72 # mmap regions are distincted w/ the allocation_type column. 77 # mmap regions are distincted w/ the allocation_type column.
73 POLICY_DEEP_2 = 'POLICY_DEEP_2' 78 POLICY_DEEP_2 = 'POLICY_DEEP_2'
74 79
75 # TODO(dmikurube): Avoid global variables.
76 address_symbol_dict = {}
77 appeared_addresses = set()
78 components = []
79
80 80
81 class EmptyDumpException(Exception): 81 class EmptyDumpException(Exception):
82 def __init__(self, value): 82 def __init__(self, value):
83 self.value = value 83 self.value = value
84 def __str__(self): 84 def __str__(self):
85 return repr(self.value) 85 return repr(self.value)
86 86
87 87
88 class ParsingException(Exception): 88 class ParsingException(Exception):
89 def __init__(self, value): 89 def __init__(self, value):
90 self.value = value 90 self.value = value
91 def __str__(self): 91 def __str__(self):
92 return repr(self.value) 92 return repr(self.value)
93 93
94 94
95 class InvalidDumpException(ParsingException): 95 class InvalidDumpException(ParsingException):
96 def __init__(self, value): 96 def __init__(self, value):
97 self.value = value 97 self.value = value
98 def __str__(self): 98 def __str__(self):
99 return "invalid heap profile dump: %s" % repr(self.value) 99 return "invalid heap profile dump: %s" % repr(self.value)
100 100
101 101
102 class ObsoleteDumpVersionException(ParsingException): 102 class ObsoleteDumpVersionException(ParsingException):
103 def __init__(self, value): 103 def __init__(self, value):
104 self.value = value 104 self.value = value
105 def __str__(self): 105 def __str__(self):
106 return "obsolete heap profile dump version: %s" % repr(self.value) 106 return "obsolete heap profile dump version: %s" % repr(self.value)
107 107
108 108
109 class Policy(object): 109 class Rule(object):
110 """Represents one matching rule in a policy file."""
110 111
111 def __init__(self, name, mmap, pattern): 112 def __init__(self, name, mmap, pattern):
112 self.name = name 113 self.name = name
113 self.mmap = mmap 114 self.mmap = mmap
114 self.condition = re.compile(pattern + r'\Z') 115 self.condition = re.compile(pattern + r'\Z')
115 116
116 117
117 def get_component(policy_list, bucket): 118 class Policy(object):
119 """Represents a policy, a content of a policy file."""
120
121 def __init__(self, rules, version, components):
122 self.rules = rules
123 self.version = version
124 self.components = components
125
126 def append_rule(self, rule):
127 self.rules.append(rule)
128
129
130 def get_component(rule_list, bucket, symbols):
118 """Returns a component name which a given bucket belongs to. 131 """Returns a component name which a given bucket belongs to.
119 132
120 Args: 133 Args:
121 policy_list: A list containing Policy objects. (Parsed policy data by 134 rule_list: A list of Rule objects.
122 parse_policy.)
123 bucket: A Bucket object to be searched for. 135 bucket: A Bucket object to be searched for.
136 symbols: A dict mapping runtime addresses to symbol names.
124 137
125 Returns: 138 Returns:
126 A string representing a component name. 139 A string representing a component name.
127 """ 140 """
128 if not bucket: 141 if not bucket:
129 return 'no-bucket' 142 return 'no-bucket'
130 if bucket.component: 143 if bucket.component_cache:
131 return bucket.component 144 return bucket.component_cache
132 145
133 stacktrace = ''.join( 146 stacktrace = ''.join(symbols[a] + ' ' for a in bucket.stacktrace).strip()
134 address_symbol_dict[a] + ' ' for a in bucket.stacktrace).strip()
135 147
136 for policy in policy_list: 148 for rule in rule_list:
137 if bucket.mmap == policy.mmap and policy.condition.match(stacktrace): 149 if bucket.mmap == rule.mmap and rule.condition.match(stacktrace):
138 bucket.component = policy.name 150 bucket.component_cache = rule.name
139 return policy.name 151 return rule.name
140 152
141 assert False 153 assert False
142 154
143 155
144 class Bucket(object): 156 class Bucket(object):
157 """Represents a bucket, which is a unit of memory classification."""
145 158
146 def __init__(self, stacktrace, mmap): 159 def __init__(self, stacktrace, mmap):
147 self.stacktrace = stacktrace 160 self.stacktrace = stacktrace
148 self.mmap = mmap 161 self.mmap = mmap
149 self.component = '' 162 self.component_cache = ''
163
164 def clear_component_cache(self):
165 self.component_cache = ''
150 166
151 167
152 class Log(object): 168 class Dump(object):
169 """Represents one heap profile dump."""
153 170
154 """A class representing one dumped log data.""" 171 def __init__(self, dump_path):
155 def __init__(self, log_path): 172 self.dump_path = dump_path
156 self.log_path = log_path 173 self.dump_lines = [
157 self.log_lines = [ 174 l for l in open(self.dump_path, 'r') if l and not l.startswith('#')]
158 l for l in open(self.log_path, 'r') if l and not l.startswith('#')] 175 self.dump_version = ''
159 self.log_version = ''
160 sys.stderr.write('Loading a dump: %s\n' % log_path)
161 self.stacktrace_lines = [] 176 self.stacktrace_lines = []
162 self.counters = {} 177 self.counters = {}
163 self.log_time = os.stat(self.log_path).st_mtime 178 self.dump_time = os.stat(self.dump_path).st_mtime
164 179
165 def dump_stacktrace(buckets): 180 def print_stacktrace(self, buckets, symbols):
166 """Prints a given stacktrace. 181 """Prints a given stacktrace.
167 182
168 Args: 183 Args:
169 buckets: A dict mapping bucket ids and their corresponding Bucket 184 buckets: A dict mapping bucket ids to Bucket objects.
170 objects. 185 symbols: A dict mapping runtime addresses to symbol names.
171 """ 186 """
172 for line in self.stacktrace_lines: 187 for line in self.stacktrace_lines:
173 words = line.split() 188 words = line.split()
174 bucket = buckets.get(int(words[BUCKET_ID])) 189 bucket = buckets.get(int(words[BUCKET_ID]))
175 if not bucket: 190 if not bucket:
176 continue 191 continue
177 for i in range(0, BUCKET_ID - 1): 192 for i in range(0, BUCKET_ID - 1):
178 sys.stdout.write(words[i] + ' ') 193 sys.stdout.write(words[i] + ' ')
179 for address in bucket.stacktrace: 194 for address in bucket.stacktrace:
180 sys.stdout.write((address_symbol_dict.get(address) or address) + ' ') 195 sys.stdout.write((symbols.get(address) or address) + ' ')
181 sys.stdout.write('\n') 196 sys.stdout.write('\n')
182 197
183 @staticmethod 198 @staticmethod
184 def accumulate_size_for_pprof(stacktrace_lines, policy_list, buckets, 199 def accumulate_size_for_pprof(stacktrace_lines, rule_list, buckets,
185 component_name): 200 component_name, symbols):
186 """Accumulates size of committed chunks and the number of allocated chunks. 201 """Accumulates size of committed chunks and the number of allocated chunks.
187 202
188 Args: 203 Args:
189 stacktrace_lines: A list of strings which are valid as stacktraces. 204 stacktrace_lines: A list of strings which are valid as stacktraces.
190 policy_list: A list containing Policy objects. (Parsed policy data by 205 rule_list: A list of Rule objects.
191 parse_policy.) 206 buckets: A dict mapping bucket ids to Bucket objects.
192 buckets: A dict mapping bucket ids and their corresponding Bucket
193 objects.
194 component_name: A name of component for filtering. 207 component_name: A name of component for filtering.
208 symbols: A dict mapping runtime addresses to symbol names.
195 209
196 Returns: 210 Returns:
197 Two integers which are the accumulated size of committed regions and the 211 Two integers which are the accumulated size of committed regions and the
198 number of allocated chunks, respectively. 212 number of allocated chunks, respectively.
199 """ 213 """
200 com_committed = 0 214 com_committed = 0
201 com_allocs = 0 215 com_allocs = 0
202 for line in stacktrace_lines: 216 for line in stacktrace_lines:
203 words = line.split() 217 words = line.split()
204 bucket = buckets.get(int(words[BUCKET_ID])) 218 bucket = buckets.get(int(words[BUCKET_ID]))
205 if (not bucket or 219 if (not bucket or
206 (component_name and 220 (component_name and
207 component_name != get_component(policy_list, bucket))): 221 component_name != get_component(rule_list, bucket, symbols))):
208 continue 222 continue
209 223
210 com_committed += int(words[COMMITTED]) 224 com_committed += int(words[COMMITTED])
211 com_allocs += int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]) 225 com_allocs += int(words[ALLOC_COUNT]) - int(words[FREE_COUNT])
212 226
213 return com_committed, com_allocs 227 return com_committed, com_allocs
214 228
215 @staticmethod 229 @staticmethod
216 def dump_stacktrace_lines_for_pprof(stacktrace_lines, policy_list, 230 def print_stacktrace_lines_for_pprof(stacktrace_lines, rule_list,
217 buckets, component_name): 231 buckets, component_name, symbols):
218 """Prints information of stacktrace lines for pprof. 232 """Prints information of stacktrace lines for pprof.
219 233
220 Args: 234 Args:
221 stacktrace_lines: A list of strings which are valid as stacktraces. 235 stacktrace_lines: A list of strings which are valid as stacktraces.
222 policy_list: A list containing Policy objects. (Parsed policy data by 236 rule_list: A list of Rule objects.
223 parse_policy.) 237 buckets: A dict mapping bucket ids to Bucket objects.
224 buckets: A dict mapping bucket ids and their corresponding Bucket
225 objects.
226 component_name: A name of component for filtering. 238 component_name: A name of component for filtering.
239 symbols: A dict mapping runtime addresses to symbol names.
227 """ 240 """
228 for line in stacktrace_lines: 241 for line in stacktrace_lines:
229 words = line.split() 242 words = line.split()
230 bucket = buckets.get(int(words[BUCKET_ID])) 243 bucket = buckets.get(int(words[BUCKET_ID]))
231 if (not bucket or 244 if (not bucket or
232 (component_name and 245 (component_name and
233 component_name != get_component(policy_list, bucket))): 246 component_name != get_component(rule_list, bucket, symbols))):
234 continue 247 continue
235 248
236 sys.stdout.write('%6d: %8s [%6d: %8s] @' % ( 249 sys.stdout.write('%6d: %8s [%6d: %8s] @' % (
237 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]), 250 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]),
238 words[COMMITTED], 251 words[COMMITTED],
239 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]), 252 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]),
240 words[COMMITTED])) 253 words[COMMITTED]))
241 for address in bucket.stacktrace: 254 for address in bucket.stacktrace:
242 sys.stdout.write(' ' + address) 255 sys.stdout.write(' ' + address)
243 sys.stdout.write('\n') 256 sys.stdout.write('\n')
244 257
245 def dump_for_pprof(self, policy_list, buckets, mapping_lines, component_name): 258 def print_for_pprof(
246 """Converts the log file so it can be processed by pprof. 259 self, rule_list, buckets, maps_lines, component_name, symbols):
260 """Converts the heap profile dump so it can be processed by pprof.
247 261
248 Args: 262 Args:
249 policy_list: A list containing Policy objects. (Parsed policy data by 263 rule_list: A list of Rule objects.
250 parse_policy.) 264 buckets: A dict mapping bucket ids to Bucket objects.
251 buckets: A dict mapping bucket ids and their corresponding Bucket 265 maps_lines: A list of strings containing /proc/.../maps.
252 objects.
253 mapping_lines: A list of strings containing /proc/.../maps.
254 component_name: A name of component for filtering. 266 component_name: A name of component for filtering.
267 symbols: A dict mapping runtime addresses to symbol names.
255 """ 268 """
256 sys.stdout.write('heap profile: ') 269 sys.stdout.write('heap profile: ')
257 com_committed, com_allocs = self.accumulate_size_for_pprof( 270 com_committed, com_allocs = self.accumulate_size_for_pprof(
258 self.stacktrace_lines, policy_list, buckets, component_name) 271 self.stacktrace_lines, rule_list, buckets, component_name, symbols)
259 272
260 sys.stdout.write('%6d: %8s [%6d: %8s] @ heapprofile\n' % ( 273 sys.stdout.write('%6d: %8s [%6d: %8s] @ heapprofile\n' % (
261 com_allocs, com_committed, com_allocs, com_committed)) 274 com_allocs, com_committed, com_allocs, com_committed))
262 275
263 self.dump_stacktrace_lines_for_pprof( 276 self.print_stacktrace_lines_for_pprof(
264 self.stacktrace_lines, policy_list, buckets, component_name) 277 self.stacktrace_lines, rule_list, buckets, component_name, symbols)
265 278
266 sys.stdout.write('MAPPED_LIBRARIES:\n') 279 sys.stdout.write('MAPPED_LIBRARIES:\n')
267 for line in mapping_lines: 280 for line in maps_lines:
268 sys.stdout.write(line) 281 sys.stdout.write(line)
269 282
270 @staticmethod 283 @staticmethod
271 def check_stacktrace_line(stacktrace_line, buckets): 284 def check_stacktrace_line(stacktrace_line, buckets, appeared_addresses):
272 """Checks if a given stacktrace_line is valid as stacktrace. 285 """Checks if a given stacktrace_line is valid as stacktrace.
273 286
274 Args: 287 Args:
275 stacktrace_line: A string to be checked. 288 stacktrace_line: A string to be checked.
276 buckets: A dict mapping bucket ids and their corresponding Bucket 289 buckets: A dict mapping bucket ids to Bucket objects.
277 objects. 290 appeared_addresses: A list where appeared addresses will be stored.
278 291
279 Returns: 292 Returns:
280 True if the given stacktrace_line is valid. 293 True if the given stacktrace_line is valid.
281 """ 294 """
282 words = stacktrace_line.split() 295 words = stacktrace_line.split()
283 if len(words) < BUCKET_ID + 1: 296 if len(words) < BUCKET_ID + 1:
284 return False 297 return False
285 if words[BUCKET_ID - 1] != '@': 298 if words[BUCKET_ID - 1] != '@':
286 return False 299 return False
287 bucket = buckets.get(int(words[BUCKET_ID])) 300 bucket = buckets.get(int(words[BUCKET_ID]))
(...skipping 10 matching lines...) Expand all
298 A pair of an integer indicating a line number after skipped, and a 311 A pair of an integer indicating a line number after skipped, and a
299 boolean value which is True if found a line which skipping_condition 312 boolean value which is True if found a line which skipping_condition
300 is False for. 313 is False for.
301 """ 314 """
302 while skipping_condition(line_number): 315 while skipping_condition(line_number):
303 line_number += 1 316 line_number += 1
304 if line_number >= max_line_number: 317 if line_number >= max_line_number:
305 return line_number, False 318 return line_number, False
306 return line_number, True 319 return line_number, True
307 320
308 def parse_stacktraces_while_valid(self, buckets, log_lines, line_number): 321 def parse_stacktraces_while_valid(
322 self, buckets, dump_lines, line_number, appeared_addresses):
309 """Parses stacktrace lines while the lines are valid. 323 """Parses stacktrace lines while the lines are valid.
310 324
311 Args: 325 Args:
312 buckets: A dict mapping bucket ids and their corresponding Bucket 326 buckets: A dict mapping bucket ids to Bucket objects.
313 objects. 327 dump_lines: A list of lines to be parsed.
314 log_lines: A list of lines to be parsed. 328 line_number: A line number to start parsing in dump_lines.
315 line_number: An integer representing the starting line number in 329 appeared_addresses: A list where appeared addresses will be stored.
316 log_lines.
317 330
318 Returns: 331 Returns:
319 A pair of a list of valid lines and an integer representing the last 332 A pair of a list of valid lines and an integer representing the last
320 line number in log_lines. 333 line number in dump_lines.
321 """ 334 """
322 (line_number, _) = self.skip_lines_while( 335 (line_number, _) = self.skip_lines_while(
323 line_number, len(log_lines), 336 line_number, len(dump_lines),
324 lambda n: not log_lines[n].split()[0].isdigit()) 337 lambda n: not dump_lines[n].split()[0].isdigit())
325 stacktrace_lines_start = line_number 338 stacktrace_lines_start = line_number
326 (line_number, _) = self.skip_lines_while( 339 (line_number, _) = self.skip_lines_while(
327 line_number, len(log_lines), 340 line_number, len(dump_lines),
328 lambda n: self.check_stacktrace_line(log_lines[n], buckets)) 341 lambda n: self.check_stacktrace_line(
329 return (log_lines[stacktrace_lines_start:line_number], line_number) 342 dump_lines[n], buckets, appeared_addresses))
343 return (dump_lines[stacktrace_lines_start:line_number], line_number)
330 344
331 def parse_stacktraces(self, buckets, line_number): 345 def parse_stacktraces(self, buckets, line_number, appeared_addresses):
332 """Parses lines in self.log_lines as stacktrace. 346 """Parses lines in self.dump_lines as stacktrace.
333 347
334 Valid stacktrace lines are stored into self.stacktrace_lines. 348 Valid stacktrace lines are stored into self.stacktrace_lines.
335 349
336 Args: 350 Args:
337 buckets: A dict mapping bucket ids and their corresponding Bucket 351 buckets: A dict mapping bucket ids to Bucket objects.
338 objects. 352 line_number: A line number to start parsing in dump_lines.
339 line_number: An integer representing the starting line number in 353 appeared_addresses: A list where appeared addresses will be stored.
340 log_lines.
341 354
342 Raises: 355 Raises:
343 ParsingException for invalid dump versions. 356 ParsingException for invalid dump versions.
344 """ 357 """
345 sys.stderr.write(' Version: %s\n' % self.log_version) 358 if self.dump_version == DUMP_DEEP_5:
346
347 if self.log_version == DUMP_DEEP_5:
348 (self.stacktrace_lines, line_number) = ( 359 (self.stacktrace_lines, line_number) = (
349 self.parse_stacktraces_while_valid( 360 self.parse_stacktraces_while_valid(
350 buckets, self.log_lines, line_number)) 361 buckets, self.dump_lines, line_number, appeared_addresses))
351 362
352 elif self.log_version in DUMP_DEEP_OBSOLETE: 363 elif self.dump_version in DUMP_DEEP_OBSOLETE:
353 raise ObsoleteDumpVersionException(self.log_version) 364 raise ObsoleteDumpVersionException(self.dump_version)
354 365
355 else: 366 else:
356 raise InvalidDumpException('Invalid version: %s' % self.log_version) 367 raise InvalidDumpException('Invalid version: %s' % self.dump_version)
357 368
358 def parse_global_stats(self): 369 def parse_global_stats(self):
359 """Parses lines in self.log_lines as global stats.""" 370 """Parses lines in self.dump_lines as global stats."""
360 (ln, _) = self.skip_lines_while( 371 (ln, _) = self.skip_lines_while(
361 0, len(self.log_lines), 372 0, len(self.dump_lines),
362 lambda n: self.log_lines[n] != 'GLOBAL_STATS:\n') 373 lambda n: self.dump_lines[n] != 'GLOBAL_STATS:\n')
363 374
364 global_stat_names = [ 375 global_stat_names = [
365 'total', 'file-exec', 'file-nonexec', 'anonymous', 'stack', 'other', 376 'total', 'file-exec', 'file-nonexec', 'anonymous', 'stack', 'other',
366 'nonprofiled-absent', 'nonprofiled-anonymous', 377 'nonprofiled-absent', 'nonprofiled-anonymous',
367 'nonprofiled-file-exec', 'nonprofiled-file-nonexec', 378 'nonprofiled-file-exec', 'nonprofiled-file-nonexec',
368 'nonprofiled-stack', 'nonprofiled-other', 379 'nonprofiled-stack', 'nonprofiled-other',
369 'profiled-mmap', 'profiled-malloc'] 380 'profiled-mmap', 'profiled-malloc']
370 381
371 for prefix in global_stat_names: 382 for prefix in global_stat_names:
372 (ln, _) = self.skip_lines_while( 383 (ln, _) = self.skip_lines_while(
373 ln, len(self.log_lines), 384 ln, len(self.dump_lines),
374 lambda n: self.log_lines[n].split()[0] != prefix) 385 lambda n: self.dump_lines[n].split()[0] != prefix)
375 words = self.log_lines[ln].split() 386 words = self.dump_lines[ln].split()
376 self.counters[prefix + '_virtual'] = int(words[-2]) 387 self.counters[prefix + '_virtual'] = int(words[-2])
377 self.counters[prefix + '_committed'] = int(words[-1]) 388 self.counters[prefix + '_committed'] = int(words[-1])
378 389
379 def parse_version(self): 390 def parse_version(self):
380 """Parses a version string in self.log_lines. 391 """Parses a version string in self.dump_lines.
381 392
382 Returns: 393 Returns:
383 A pair of (a string representing a version of the stacktrace dump, 394 A pair of (a string representing a version of the stacktrace dump,
384 and an integer indicating a line number next to the version string). 395 and an integer indicating a line number next to the version string).
385 396
386 Raises: 397 Raises:
387 ParsingException for invalid dump versions. 398 ParsingException for invalid dump versions.
388 """ 399 """
389 version = '' 400 version = ''
390 401
391 # Skip until an identifiable line. 402 # Skip until an identifiable line.
392 headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ') 403 headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ')
393 if not self.log_lines: 404 if not self.dump_lines:
394 raise EmptyDumpException('Empty heap dump file.') 405 raise EmptyDumpException('Empty heap dump file.')
395 (ln, found) = self.skip_lines_while( 406 (ln, found) = self.skip_lines_while(
396 0, len(self.log_lines), 407 0, len(self.dump_lines),
397 lambda n: not self.log_lines[n].startswith(headers)) 408 lambda n: not self.dump_lines[n].startswith(headers))
398 if not found: 409 if not found:
399 raise InvalidDumpException('No version header.') 410 raise InvalidDumpException('No version header.')
400 411
401 # Identify a version. 412 # Identify a version.
402 if self.log_lines[ln].startswith('heap profile: '): 413 if self.dump_lines[ln].startswith('heap profile: '):
403 version = self.log_lines[ln][13:].strip() 414 version = self.dump_lines[ln][13:].strip()
404 if version == DUMP_DEEP_5: 415 if version == DUMP_DEEP_5:
405 (ln, _) = self.skip_lines_while( 416 (ln, _) = self.skip_lines_while(
406 ln, len(self.log_lines), 417 ln, len(self.dump_lines),
407 lambda n: self.log_lines[n] != 'STACKTRACES:\n') 418 lambda n: self.dump_lines[n] != 'STACKTRACES:\n')
408 elif version in DUMP_DEEP_OBSOLETE: 419 elif version in DUMP_DEEP_OBSOLETE:
409 raise ObsoleteDumpVersionException(version) 420 raise ObsoleteDumpVersionException(version)
410 else: 421 else:
411 raise InvalidDumpException('Invalid version: %s' % version) 422 raise InvalidDumpException('Invalid version: %s' % version)
412 elif self.log_lines[ln] == 'STACKTRACES:\n': 423 elif self.dump_lines[ln] == 'STACKTRACES:\n':
413 raise ObsoleteDumpVersionException(DUMP_DEEP_1) 424 raise ObsoleteDumpVersionException(DUMP_DEEP_1)
414 elif self.log_lines[ln] == 'MMAP_STACKTRACES:\n': 425 elif self.dump_lines[ln] == 'MMAP_STACKTRACES:\n':
415 raise ObsoleteDumpVersionException(DUMP_DEEP_2) 426 raise ObsoleteDumpVersionException(DUMP_DEEP_2)
416 427
417 return (version, ln) 428 return (version, ln)
418 429
419 def parse_log(self, buckets): 430 def parse_dump(self, buckets, appeared_addresses):
420 self.log_version, ln = self.parse_version() 431 self.dump_version, ln = self.parse_version()
421 self.parse_global_stats() 432 self.parse_global_stats()
422 self.parse_stacktraces(buckets, ln) 433 self.parse_stacktraces(buckets, ln, appeared_addresses)
423 434
424 @staticmethod 435 @staticmethod
425 def accumulate_size_for_policy(stacktrace_lines, 436 def accumulate_size_for_policy(stacktrace_lines,
426 policy_list, buckets, sizes): 437 rule_list, buckets, sizes, symbols):
427 for line in stacktrace_lines: 438 for line in stacktrace_lines:
428 words = line.split() 439 words = line.split()
429 bucket = buckets.get(int(words[BUCKET_ID])) 440 bucket = buckets.get(int(words[BUCKET_ID]))
430 component_match = get_component(policy_list, bucket) 441 component_match = get_component(rule_list, bucket, symbols)
431 sizes[component_match] += int(words[COMMITTED]) 442 sizes[component_match] += int(words[COMMITTED])
432 443
433 if component_match.startswith('tc-'): 444 if component_match.startswith('tc-'):
434 sizes['tc-total-log'] += int(words[COMMITTED]) 445 sizes['tc-total-log'] += int(words[COMMITTED])
435 elif component_match.startswith('mmap-'): 446 elif component_match.startswith('mmap-'):
436 sizes['mmap-total-log'] += int(words[COMMITTED]) 447 sizes['mmap-total-log'] += int(words[COMMITTED])
437 else: 448 else:
438 sizes['other-total-log'] += int(words[COMMITTED]) 449 sizes['other-total-log'] += int(words[COMMITTED])
439 450
440 def apply_policy(self, policy_list, buckets, first_log_time): 451 def apply_policy(
452 self, rule_list, buckets, first_dump_time, components, symbols):
441 """Aggregates the total memory size of each component. 453 """Aggregates the total memory size of each component.
442 454
443 Iterate through all stacktraces and attribute them to one of the components 455 Iterate through all stacktraces and attribute them to one of the components
444 based on the policy. It is important to apply policy in right order. 456 based on the policy. It is important to apply policy in right order.
445 457
446 Args: 458 Args:
447 policy_list: A list containing Policy objects. (Parsed policy data by 459 rule_list: A list of Rule objects.
448 parse_policy.) 460 buckets: A dict mapping bucket ids to Bucket objects.
449 buckets: A dict mapping bucket ids and their corresponding Bucket 461 first_dump_time: An integer representing time when the first dump is
450 objects.
451 first_log_time: An integer representing time when the first log is
452 dumped. 462 dumped.
463 components: A list of strings of component names.
464 symbols: A dict mapping runtime addresses to symbol names.
453 465
454 Returns: 466 Returns:
455 A dict mapping components and their corresponding sizes. 467 A dict mapping components and their corresponding sizes.
456 """ 468 """
457 469
458 sys.stderr.write('apply policy:%s\n' % (self.log_path)) 470 sys.stderr.write('Applying policy: "%s".\n' % self.dump_path)
459 sizes = dict((c, 0) for c in components) 471 sizes = dict((c, 0) for c in components)
460 472
461 self.accumulate_size_for_policy(self.stacktrace_lines, 473 self.accumulate_size_for_policy(self.stacktrace_lines,
462 policy_list, buckets, sizes) 474 rule_list, buckets, sizes, symbols)
463 475
464 mmap_prefix = 'profiled-mmap' 476 mmap_prefix = 'profiled-mmap'
465 malloc_prefix = 'profiled-malloc' 477 malloc_prefix = 'profiled-malloc'
466 478
467 sizes['mmap-no-log'] = ( 479 sizes['mmap-no-log'] = (
468 self.counters['%s_committed' % mmap_prefix] - sizes['mmap-total-log']) 480 self.counters['%s_committed' % mmap_prefix] - sizes['mmap-total-log'])
469 sizes['mmap-total-record'] = self.counters['%s_committed' % mmap_prefix] 481 sizes['mmap-total-record'] = self.counters['%s_committed' % mmap_prefix]
470 sizes['mmap-total-record-vm'] = self.counters['%s_virtual' % mmap_prefix] 482 sizes['mmap-total-record-vm'] = self.counters['%s_virtual' % mmap_prefix]
471 483
472 sizes['tc-no-log'] = ( 484 sizes['tc-no-log'] = (
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
507 'nonprofiled-stack_committed', 519 'nonprofiled-stack_committed',
508 'nonprofiled-other_committed') 520 'nonprofiled-other_committed')
509 sizes['mustbezero'] = ( 521 sizes['mustbezero'] = (
510 self.counters['total_committed'] - 522 self.counters['total_committed'] -
511 sum(self.counters[i] for i in removed)) 523 sum(self.counters[i] for i in removed))
512 if 'total-exclude-profiler' in sizes: 524 if 'total-exclude-profiler' in sizes:
513 sizes['total-exclude-profiler'] = ( 525 sizes['total-exclude-profiler'] = (
514 self.counters['total_committed'] - 526 self.counters['total_committed'] -
515 (sizes['mmap-profiler'] + sizes['mmap-allocated-type'])) 527 (sizes['mmap-profiler'] + sizes['mmap-allocated-type']))
516 if 'hour' in sizes: 528 if 'hour' in sizes:
517 sizes['hour'] = (self.log_time - first_log_time) / 60.0 / 60.0 529 sizes['hour'] = (self.dump_time - first_dump_time) / 60.0 / 60.0
518 if 'minute' in sizes: 530 if 'minute' in sizes:
519 sizes['minute'] = (self.log_time - first_log_time) / 60.0 531 sizes['minute'] = (self.dump_time - first_dump_time) / 60.0
520 if 'second' in sizes: 532 if 'second' in sizes:
521 sizes['second'] = self.log_time - first_log_time 533 sizes['second'] = self.dump_time - first_dump_time
522 534
523 return sizes 535 return sizes
524 536
525 @staticmethod 537 @staticmethod
526 def accumulate_size_for_expand(stacktrace_lines, policy_list, buckets, 538 def accumulate_size_for_expand(stacktrace_lines, rule_list, buckets,
527 component_name, depth, sizes): 539 component_name, depth, sizes, symbols):
528 for line in stacktrace_lines: 540 for line in stacktrace_lines:
529 words = line.split() 541 words = line.split()
530 bucket = buckets.get(int(words[BUCKET_ID])) 542 bucket = buckets.get(int(words[BUCKET_ID]))
531 component_match = get_component(policy_list, bucket) 543 component_match = get_component(rule_list, bucket, symbols)
532 if component_match == component_name: 544 if component_match == component_name:
533 stacktrace_sequence = '' 545 stacktrace_sequence = ''
534 for address in bucket.stacktrace[0 : min(len(bucket.stacktrace), 546 for address in bucket.stacktrace[0 : min(len(bucket.stacktrace),
535 1 + depth)]: 547 1 + depth)]:
536 stacktrace_sequence += address_symbol_dict[address] + ' ' 548 stacktrace_sequence += symbols[address] + ' '
537 if not stacktrace_sequence in sizes: 549 if not stacktrace_sequence in sizes:
538 sizes[stacktrace_sequence] = 0 550 sizes[stacktrace_sequence] = 0
539 sizes[stacktrace_sequence] += int(words[COMMITTED]) 551 sizes[stacktrace_sequence] += int(words[COMMITTED])
540 552
541 def expand(self, policy_list, buckets, component_name, depth): 553 def expand(self, rule_list, buckets, component_name, depth, symbols):
542 """Prints all stacktraces in a given component of given depth. 554 """Prints all stacktraces in a given component of given depth.
543 555
544 Args: 556 Args:
545 policy_list: A list containing Policy objects. (Parsed policy data by 557 rule_list: A list of Rule objects.
546 parse_policy.) 558 buckets: A dict mapping bucket ids to Bucket objects.
547 buckets: A dict mapping bucket ids and their corresponding Bucket
548 objects.
549 component_name: A name of component for filtering. 559 component_name: A name of component for filtering.
550 depth: An integer representing depth to be printed. 560 depth: An integer representing depth to be printed.
561 symbols: A dict mapping runtime addresses to symbol names.
551 """ 562 """
552 sizes = {} 563 sizes = {}
553 564
554 self.accumulate_size_for_expand( 565 self.accumulate_size_for_expand(
555 self.stacktrace_lines, policy_list, buckets, component_name, 566 self.stacktrace_lines, rule_list, buckets, component_name,
556 depth, sizes) 567 depth, sizes, symbols)
557 568
558 sorted_sizes_list = sorted( 569 sorted_sizes_list = sorted(
559 sizes.iteritems(), key=(lambda x: x[1]), reverse=True) 570 sizes.iteritems(), key=(lambda x: x[1]), reverse=True)
560 total = 0 571 total = 0
561 for size_pair in sorted_sizes_list: 572 for size_pair in sorted_sizes_list:
562 sys.stdout.write('%10d %s\n' % (size_pair[1], size_pair[0])) 573 sys.stdout.write('%10d %s\n' % (size_pair[1], size_pair[0]))
563 total += size_pair[1] 574 total += size_pair[1]
564 sys.stderr.write('total: %d\n' % (total)) 575 sys.stderr.write('total: %d\n' % (total))
565 576
566 577
567 def update_symbols(symbol_path, mapping_lines, maps_path): 578 def update_symbols(
579 symbol_path, maps_path, appeared_addresses, symbols):
568 """Updates address/symbol mapping on memory and in a .symbol cache file. 580 """Updates address/symbol mapping on memory and in a .symbol cache file.
569 581
570 It reads cached address/symbol mapping from a .symbol file if it exists. 582 It reads cached address/symbol mapping from a .symbol file if it exists.
571 Then, it resolves unresolved addresses from a Chrome binary with pprof. 583 Then, it resolves unresolved addresses from a Chrome binary with pprof.
572 Both mappings on memory and in a .symbol cache file are updated. 584 Both mappings on memory and in a .symbol cache file are updated.
573 585
574 Symbol files are formatted as follows: 586 Symbol files are formatted as follows:
575 <Address> <Symbol> 587 <Address> <Symbol>
576 <Address> <Symbol> 588 <Address> <Symbol>
577 <Address> <Symbol> 589 <Address> <Symbol>
578 ... 590 ...
579 591
580 Args: 592 Args:
581 symbol_path: A string representing a path for a .symbol file. 593 symbol_path: A string representing a path for a .symbol file.
582 mapping_lines: A list of strings containing /proc/.../maps.
583 maps_path: A string of the path of /proc/.../maps. 594 maps_path: A string of the path of /proc/.../maps.
595 appeared_addresses: A list of known addresses.
596 symbols: A dict mapping runtime addresses to symbol names.
584 """ 597 """
585 with open(symbol_path, mode='a+') as symbol_f: 598 with open(symbol_path, mode='a+') as symbol_f:
586 symbol_lines = symbol_f.readlines() 599 symbol_lines = symbol_f.readlines()
587 if symbol_lines: 600 if symbol_lines:
588 for line in symbol_lines: 601 for line in symbol_lines:
589 items = line.split(None, 1) 602 items = line.split(None, 1)
590 address_symbol_dict[items[0]] = items[1].rstrip() 603 if len(items) == 1:
604 items.append('??')
605 symbols[items[0]] = items[1].rstrip()
606 if symbols:
607 sys.stderr.write(' Found %d symbols in cache.\n' % len(symbols))
608 else:
609 sys.stderr.write(' No symbols found in cache.\n')
591 610
592 unresolved_addresses = sorted( 611 unresolved_addresses = sorted(
593 a for a in appeared_addresses if a not in address_symbol_dict) 612 a for a in appeared_addresses if a not in symbols)
594 613
595 if unresolved_addresses: 614 if not unresolved_addresses:
615 sys.stderr.write(' No need to resolve any more addresses.\n')
616 else:
617 sys.stderr.write(' %d addresses are unresolved.\n' %
618 len(unresolved_addresses))
596 prepared_data_dir = tempfile.mkdtemp() 619 prepared_data_dir = tempfile.mkdtemp()
597 try: 620 try:
598 prepare_symbol_info(maps_path, prepared_data_dir) 621 prepare_symbol_info(maps_path, prepared_data_dir)
599 622
600 symbols = find_runtime_symbols_list( 623 symbol_list = find_runtime_symbols_list(
601 prepared_data_dir, unresolved_addresses) 624 prepared_data_dir, unresolved_addresses)
602 625
603 for address, symbol in zip(unresolved_addresses, symbols): 626 for address, symbol in zip(unresolved_addresses, symbol_list):
627 if not symbol:
628 symbol = '??'
604 stripped_symbol = symbol.strip() 629 stripped_symbol = symbol.strip()
605 address_symbol_dict[address] = stripped_symbol 630 symbols[address] = stripped_symbol
606 symbol_f.write('%s %s\n' % (address, stripped_symbol)) 631 symbol_f.write('%s %s\n' % (address, stripped_symbol))
607 finally: 632 finally:
608 shutil.rmtree(prepared_data_dir) 633 shutil.rmtree(prepared_data_dir)
609 634
610 635
611 def parse_policy(policy_path): 636 def parse_policy(policy_path):
612 """Parses policy file. 637 """Parses policy file.
613 638
614 A policy file contains component's names and their 639 A policy file contains component's names and their
615 stacktrace pattern written in regular expression. 640 stacktrace pattern written in regular expression.
616 Those patterns are matched against each symbols of 641 Those patterns are matched against each symbols of
617 each stacktraces in the order written in the policy file 642 each stacktraces in the order written in the policy file
618 643
619 Args: 644 Args:
620 policy_path: A path for a policy file. 645 policy_path: A path for a policy file.
621 Returns: 646 Returns:
622 A list containing component's name and its regex object 647 A list containing component's name and its regex object
623 """ 648 """
624 with open(policy_path, mode='r') as policy_f: 649 with open(policy_path, mode='r') as policy_f:
625 policy_lines = policy_f.readlines() 650 policy_lines = policy_f.readlines()
626 651
627 policy_version = POLICY_DEEP_1 652 policy_version = POLICY_DEEP_1
628 if policy_lines[0].startswith('heap profile policy: '): 653 if policy_lines[0].startswith('heap profile policy: '):
629 policy_version = policy_lines[0][21:].strip() 654 policy_version = policy_lines[0][21:].strip()
630 policy_lines.pop(0) 655 policy_lines.pop(0)
631 policy_list = [] 656 rule_list = []
657 components = []
632 658
633 if policy_version == POLICY_DEEP_2 or policy_version == POLICY_DEEP_1: 659 if policy_version == POLICY_DEEP_2 or policy_version == POLICY_DEEP_1:
634 sys.stderr.write(' heap profile policy version: %s\n' % policy_version)
635 for line in policy_lines: 660 for line in policy_lines:
636 if line[0] == '#': 661 if line[0] == '#':
637 continue 662 continue
638 663
639 if policy_version == POLICY_DEEP_2: 664 if policy_version == POLICY_DEEP_2:
640 (name, allocation_type, pattern) = line.strip().split(None, 2) 665 (name, allocation_type, pattern) = line.strip().split(None, 2)
641 mmap = False 666 mmap = False
642 if allocation_type == 'mmap': 667 if allocation_type == 'mmap':
643 mmap = True 668 mmap = True
644 elif policy_version == POLICY_DEEP_1: 669 elif policy_version == POLICY_DEEP_1:
645 name = line.split()[0] 670 name = line.split()[0]
646 pattern = line[len(name) : len(line)].strip() 671 pattern = line[len(name) : len(line)].strip()
647 mmap = False 672 mmap = False
648 673
649 if pattern != 'default': 674 if pattern != 'default':
650 policy_list.append(Policy(name, mmap, pattern)) 675 rule_list.append(Rule(name, mmap, pattern))
651 if components.count(name) == 0: 676 if components.count(name) == 0:
652 components.append(name) 677 components.append(name)
653 678
654 else: 679 else:
655 sys.stderr.write(' invalid heap profile policy version: %s\n' % ( 680 sys.stderr.write(' invalid heap profile policy version: %s\n' % (
656 policy_version)) 681 policy_version))
657 682
658 return policy_list 683 return rule_list, policy_version, components
659 684
660 685
661 def main(): 686 def find_prefix(path):
662 if (len(sys.argv) < 4) or (not (sys.argv[1] in ['--csv', 687 return re.sub('\.[0-9][0-9][0-9][0-9]\.heap', '', path)
663 '--json',
664 '--expand',
665 '--list',
666 '--stacktrace',
667 '--pprof'])):
668 sys.stderr.write("""Usage:
669 %s [options] <chrome-binary> <policy> <profile> [component-name] [depth]
670 688
671 Options:
672 --csv Output result in csv format
673 --json Output result in json format
674 --stacktrace Convert raw address to symbol names
675 --list Lists components and their sizes
676 --expand Show all stacktraces in the specified component
677 of given depth with their sizes
678 --pprof Format the profile file so it can be processed
679 by pprof
680 689
681 Examples: 690 def load_buckets(prefix):
682 dmprof --csv Debug/chrome dmpolicy hprof.12345.0001.heap > result.csv
683 dmprof --json Debug/chrome dmpolicy hprof.12345.0001.heap > result.json
684 dmprof --list Debug/chrome dmpolicy hprof.12345.0012.heap
685 dmprof --expand Debug/chrome dmpolicy hprof.12345.0012.heap tc-webkit 4
686 dmprof --pprof Debug/chrome dmpolicy hprof.12345.0012.heap > for_pprof.txt
687 """ % (sys.argv[0]))
688 sys.exit(1)
689
690 action = sys.argv[1]
691 chrome_path = sys.argv[2]
692 policy_path = sys.argv[3]
693 log_path = sys.argv[4]
694
695 sys.stderr.write('parsing a policy file\n')
696 policy_list = parse_policy(policy_path)
697
698 p = re.compile('\.[0-9][0-9][0-9][0-9]\.heap')
699 prefix = p.sub('', log_path)
700 symbol_path = prefix + '.symbols'
701
702 sys.stderr.write('parsing the maps file\n')
703 maps_path = prefix + '.maps'
704 with open(maps_path, 'r') as maps_f:
705 maps_lines = maps_f.readlines()
706
707 # Reading buckets 691 # Reading buckets
708 sys.stderr.write('parsing the bucket file\n') 692 sys.stderr.write('Loading bucket files.\n')
709 buckets = {} 693 buckets = {}
710 bucket_count = 0 694 bucket_count = 0
711 n = 0 695 n = 0
712 while True: 696 while True:
713 buckets_path = '%s.%04d.buckets' % (prefix, n) 697 buckets_path = '%s.%04d.buckets' % (prefix, n)
714 if not os.path.exists(buckets_path): 698 if not os.path.exists(buckets_path):
715 if n > 10: 699 if n > 10:
716 break 700 break
717 n += 1 701 n += 1
718 continue 702 continue
719 sys.stderr.write('reading buckets from %s\n' % (buckets_path)) 703 sys.stderr.write(' %s\n' % buckets_path)
720 with open(buckets_path, 'r') as buckets_f: 704 with open(buckets_path, 'r') as buckets_f:
721 for line in buckets_f: 705 for line in buckets_f:
722 words = line.split() 706 words = line.split()
723 buckets[int(words[0])] = Bucket(words[2:], words[1] == 'mmap') 707 buckets[int(words[0])] = Bucket(words[2:], words[1] == 'mmap')
724 n += 1 708 n += 1
725 709
726 log_path_list = [log_path] 710 return buckets
727 711
728 if action in ('--csv', '--json'): 712
729 # search for the sequence of files 713 def determine_dump_path_list(dump_path, prefix):
730 n = int(log_path[len(log_path) - 9 : len(log_path) - 5]) 714 dump_path_list = [dump_path]
731 n += 1 # skip current file 715
732 while True: 716 # search for the sequence of files
733 p = '%s.%04d.heap' % (prefix, n) 717 n = int(dump_path[len(dump_path) - 9 : len(dump_path) - 5])
734 if os.path.exists(p): 718 n += 1 # skip current file
735 log_path_list.append(p) 719 while True:
736 else: 720 p = '%s.%04d.heap' % (prefix, n)
737 break 721 if os.path.exists(p):
738 n += 1 722 dump_path_list.append(p)
739
740 logs = []
741 for path in log_path_list:
742 new_log = Log(path)
743 sys.stderr.write('Parsing a dump: %s\n' % path)
744 try:
745 new_log.parse_log(buckets)
746 except EmptyDumpException:
747 sys.stderr.write(' WARNING: ignored an empty dump: %s\n' % path)
748 except ParsingException, e:
749 sys.stderr.write(' Error in parsing heap profile dump: %s\n' % e)
750 sys.exit(1)
751 else: 723 else:
752 logs.append(new_log) 724 break
753 725 n += 1
754 sys.stderr.write('getting symbols\n') 726
755 update_symbols(symbol_path, maps_lines, maps_path) 727 return dump_path_list
756 728
757 # TODO(dmikurube): Many modes now. Split them into separete functions. 729
758 if action == '--stacktrace': 730 def load_single_dump(dump_path, buckets, appeared_addresses):
759 logs[0].dump_stacktrace(buckets) 731 new_dump = Dump(dump_path)
760 732 try:
761 elif action == '--csv': 733 new_dump.parse_dump(buckets, appeared_addresses)
762 sys.stdout.write(','.join(components)) 734 except EmptyDumpException:
763 sys.stdout.write('\n') 735 sys.stderr.write('... ignored an empty dump')
764 736 except ParsingException, e:
765 for log in logs: 737 sys.stderr.write('... error in parsing: %s' % e)
766 component_sizes = log.apply_policy(policy_list, buckets, logs[0].log_time) 738 sys.exit(1)
739 else:
740 sys.stderr.write(' (version: %s)' % new_dump.dump_version)
741
742 return new_dump
743
744
745 def load_dump(dump_path, buckets):
746 sys.stderr.write('Loading a heap dump file: "%s"' % dump_path)
747 appeared_addresses = set()
748 dump = load_single_dump(dump_path, buckets, appeared_addresses)
749 sys.stderr.write('.\n')
750 return dump, appeared_addresses
751
752
753 def load_dumps(dump_path_list, buckets):
754 sys.stderr.write('Loading heap dump files.\n')
755 appeared_addresses = set()
756 dumps = []
757 for path in dump_path_list:
758 sys.stderr.write(' %s' % path)
759 dumps.append(load_single_dump(path, buckets, appeared_addresses))
760 sys.stderr.write('\n')
761 return dumps, appeared_addresses
762
763
764 def load_and_update_symbol_cache(prefix, appeared_addresses):
765 maps_path = prefix + '.maps'
766 symbol_path = prefix + '.symbols'
767 sys.stderr.write('Loading and updating symbol cache: "%s".\n' % symbol_path)
768 symbols = {}
769 update_symbols(symbol_path, maps_path, appeared_addresses, symbols)
770 return symbols
771
772
773 def load_default_policies():
774 with open(POLICIES_JSON_PATH, mode='r') as policies_f:
775 default_policies = json.load(policies_f)
776 return default_policies
777
778
779 def load_policy(policies_dict, policy_label):
780 policy_file = policies_dict[policy_label]['file']
781 policy_path = os.path.join(os.path.dirname(__file__), policy_file)
782 rule_list, policy_version, components = parse_policy(policy_path)
783 sys.stderr.write(' %s: %s (version: %s)\n' %
784 (policy_label, policy_path, policy_version))
785 return Policy(rule_list, policy_version, components)
786
787
788 def load_policies_dict(policies_dict):
789 sys.stderr.write('Loading policy files.\n')
790 policies = {}
791 for policy_label in policies_dict:
792 policies[policy_label] = load_policy(policies_dict, policy_label)
793 return policies
794
795
796 def load_policies(options_policy):
797 default_policies = load_default_policies()
798 if options_policy:
799 policy_labels = options_policy.split(',')
800 specified_policies = {}
801 for specified_policy in policy_labels:
802 if specified_policy in default_policies:
803 specified_policies[specified_policy] = (
804 default_policies[specified_policy])
805 policies = load_policies_dict(specified_policies)
806 else:
807 policies = load_policies_dict(default_policies)
808 return policies
809
810
811 def do_stacktrace(sys_argv):
812 parser = optparse.OptionParser(usage='Usage: %prog stacktrace <dump>')
813 options, args = parser.parse_args(sys_argv)
814
815 if len(args) != 2:
816 parser.error('needs 1 argument.')
817 return 1
818
819 dump_path = args[1]
820
821 prefix = find_prefix(dump_path)
822 buckets = load_buckets(prefix)
823 dump, appeared_addresses = load_dump(dump_path, buckets)
824 symbols = load_and_update_symbol_cache(prefix, appeared_addresses)
825
826 dump.print_stacktrace(buckets, symbols)
827
828 return 0
829
830
831 def do_csv(sys_argv):
832 parser = optparse.OptionParser('Usage: %prog csv [-p POLICY] <first-dump>')
833 parser.add_option('-p', '--policy', type='string', dest='policy',
834 help='profile with POLICY', metavar='POLICY')
835 options, args = parser.parse_args(sys_argv)
836
837 if len(args) != 2:
838 parser.error('needs 1 argument.')
839 return 1
840
841 dump_path = args[1]
842
843 prefix = find_prefix(dump_path)
844 buckets = load_buckets(prefix)
845 dumps, appeared_addresses = load_dumps(
846 determine_dump_path_list(dump_path, prefix), buckets)
847 symbols = load_and_update_symbol_cache(prefix, appeared_addresses)
848 policies = load_policies(options.policy)
849
850 max_components = 0
851 for policy in policies:
852 max_components = max(max_components, len(policies[policy].components))
853
854 for policy in sorted(policies):
855 rule_list = policies[policy].rules
856 components = policies[policy].components
857
858 if len(policies) > 1:
859 sys.stdout.write('%s%s\n' % (policy, ',' * (max_components - 1)))
860 sys.stdout.write('%s%s\n' % (
861 ','.join(components), ',' * (max_components - len(components))))
862
863 for dump in dumps:
864 component_sizes = dump.apply_policy(
865 rule_list, buckets, dumps[0].dump_time, components, symbols)
767 s = [] 866 s = []
768 for c in components: 867 for c in components:
769 if c in ('hour', 'minute', 'second'): 868 if c in ('hour', 'minute', 'second'):
770 s.append('%05.5f' % (component_sizes[c])) 869 s.append('%05.5f' % (component_sizes[c]))
771 else: 870 else:
772 s.append('%05.5f' % (component_sizes[c] / 1024.0 / 1024.0)) 871 s.append('%05.5f' % (component_sizes[c] / 1024.0 / 1024.0))
773 sys.stdout.write(','.join(s)) 872 sys.stdout.write('%s%s\n' % (
774 sys.stdout.write('\n') 873 ','.join(s), ',' * (max_components - len(components))))
775 874
776 elif action == '--json': 875 for bucket in buckets.itervalues():
777 json_base = { 876 bucket.clear_component_cache()
778 'version': 'JSON_DEEP_1', 877
878 return 0
879
880
881 def do_json(sys_argv):
882 parser = optparse.OptionParser('Usage: %prog json [-p POLICY] <first-dump>')
883 parser.add_option('-p', '--policy', type='string', dest='policy',
884 help='profile with POLICY', metavar='POLICY')
885 options, args = parser.parse_args(sys_argv)
886
887 if len(args) != 2:
888 parser.error('needs 1 argument.')
889 return 1
890
891 dump_path = args[1]
892
893 prefix = find_prefix(dump_path)
894 buckets = load_buckets(prefix)
895 dumps, appeared_addresses = load_dumps(
896 determine_dump_path_list(dump_path, prefix), buckets)
897 symbols = load_and_update_symbol_cache(prefix, appeared_addresses)
898 policies = load_policies(options.policy)
899
900 json_base = {
901 'version': 'JSON_DEEP_2',
902 'policies': {},
903 }
904
905 for policy in sorted(policies):
906 rule_list = policies[policy].rules
907 components = policies[policy].components
908
909 json_base['policies'][policy] = {
779 'legends': components, 910 'legends': components,
780 'snapshots': [], 911 'snapshots': [],
781 } 912 }
782 for log in logs: 913
783 component_sizes = log.apply_policy(policy_list, buckets, logs[0].log_time) 914 for dump in dumps:
784 component_sizes['log_path'] = log.log_path 915 component_sizes = dump.apply_policy(
785 component_sizes['log_time'] = datetime.fromtimestamp( 916 rule_list, buckets, dumps[0].dump_time, components, symbols)
786 log.log_time).strftime('%Y-%m-%d %H:%M:%S') 917 component_sizes['dump_path'] = dump.dump_path
787 json_base['snapshots'].append(component_sizes) 918 component_sizes['dump_time'] = datetime.fromtimestamp(
788 json.dump(json_base, sys.stdout, indent=2, sort_keys=True) 919 dump.dump_time).strftime('%Y-%m-%d %H:%M:%S')
789 920 json_base['policies'][policy]['snapshots'].append(component_sizes)
790 elif action == '--list': 921
791 component_sizes = logs[0].apply_policy( 922 for bucket in buckets.itervalues():
792 policy_list, buckets, logs[0].log_time) 923 bucket.clear_component_cache()
924
925 json.dump(json_base, sys.stdout, indent=2, sort_keys=True)
926
927 return 0
928
929
930 def do_list(sys_argv):
931 parser = optparse.OptionParser('Usage: %prog [-p POLICY] list <first-dump>')
932 parser.add_option('-p', '--policy', type='string', dest='policy',
933 help='profile with POLICY', metavar='POLICY')
934 options, args = parser.parse_args(sys_argv)
935
936 if len(args) != 2:
937 parser.error('needs 1 argument.')
938 return 1
939
940 dump_path = args[1]
941
942 prefix = find_prefix(dump_path)
943 buckets = load_buckets(prefix)
944 dumps, appeared_addresses = load_dumps(
945 determine_dump_path_list(dump_path, prefix), buckets)
946 symbols = load_and_update_symbol_cache(prefix, appeared_addresses)
947 policies = load_policies(options.policy)
948
949 for policy in sorted(policies):
950 rule_list = policies[policy].rules
951 components = policies[policy].components
952
953 component_sizes = dumps[0].apply_policy(
954 rule_list, buckets, dumps[0].dump_time, components, symbols)
955 sys.stdout.write('%s:\n' % policy)
793 for c in components: 956 for c in components:
794 if c in ['hour', 'minute', 'second']: 957 if c in ['hour', 'minute', 'second']:
795 sys.stdout.write('%30s %10.3f\n' % (c, component_sizes[c])) 958 sys.stdout.write('%30s %10.3f\n' % (c, component_sizes[c]))
796 else: 959 else:
797 sys.stdout.write('%30s %10.3f\n' % ( 960 sys.stdout.write('%30s %10.3f\n' % (
798 c, component_sizes[c] / 1024.0 / 1024.0)) 961 c, component_sizes[c] / 1024.0 / 1024.0))
799 962
800 elif action == '--expand': 963 for bucket in buckets.itervalues():
801 component_name = sys.argv[5] 964 bucket.clear_component_cache()
802 depth = sys.argv[6]
803 logs[0].expand(policy_list, buckets, component_name, int(depth))
804 965
805 elif action == '--pprof': 966 return 0
806 if len(sys.argv) > 5: 967
807 logs[0].dump_for_pprof(policy_list, buckets, maps_lines, sys.argv[5]) 968
808 else: 969 def do_expand(sys_argv):
809 logs[0].dump_for_pprof(policy_list, buckets, maps_lines, None) 970 parser = optparse.OptionParser(
971 'Usage: %prog expand <dump> <policy> <component> <depth>')
972 options, args = parser.parse_args(sys_argv)
973
974 if len(args) != 5:
975 parser.error('needs 4 arguments.')
976 return 1
977
978 dump_path = args[1]
979 target_policy = args[2]
980 component_name = args[3]
981 depth = args[4]
982
983 prefix = find_prefix(dump_path)
984 buckets = load_buckets(prefix)
985 dump, appeared_addresses = load_dump(dump_path, buckets)
986 symbols = load_and_update_symbol_cache(prefix, appeared_addresses)
987 policies = load_policies(target_policy)
988
989 rule_list = policies[target_policy].rules
990
991 dump.expand(rule_list, buckets, component_name, int(depth), symbols)
992
993 return 0
994
995
996 def do_pprof(sys_argv):
997 parser = optparse.OptionParser(
998 'Usage: %prog pprof [-c COMPONENT] <dump> <policy>')
999 parser.add_option('-c', '--component', type='string', dest='component',
1000 help='restrict to COMPONENT', metavar='COMPONENT')
1001 options, args = parser.parse_args(sys_argv)
1002
1003 if len(args) != 3:
1004 parser.error('needs 2 arguments.')
1005 return 1
1006
1007 dump_path = args[1]
1008 target_policy = args[2]
1009 component = options.component
1010
1011 prefix = find_prefix(dump_path)
1012 buckets = load_buckets(prefix)
1013 dump, appeared_addresses = load_dump(dump_path, buckets)
1014 symbols = load_and_update_symbol_cache(prefix, appeared_addresses)
1015 policies = load_policies(target_policy)
1016
1017 rule_list = policies[target_policy].rules
1018
1019 with open(prefix + '.maps', 'r') as maps_f:
1020 maps_lines = maps_f.readlines()
1021 dump.print_for_pprof(rule_list, buckets, maps_lines, component, symbols)
1022
1023 return 0
1024
1025
1026 def main():
1027 COMMANDS = {
1028 'csv': do_csv,
1029 'expand': do_expand,
1030 'json': do_json,
1031 'list': do_list,
1032 'pprof': do_pprof,
1033 'stacktrace': do_stacktrace,
1034 }
1035
1036 # TODO(dmikurube): Remove this message after a while.
1037 if len(sys.argv) >= 2 and sys.argv[1].startswith('--'):
1038 sys.stderr.write("""
1039 **************** NOTICE!! ****************
1040 The command line format has changed.
1041 Please look at the description below.
1042 ******************************************
1043
1044 """)
1045
1046 if len(sys.argv) < 2 or (not sys.argv[1] in COMMANDS):
1047 sys.stderr.write("""Usage: %s <command> [options] [<args>]
1048
1049 Commands:
1050 csv Classify memory usage in CSV
1051 expand Show all stacktraces contained in the specified component
1052 json Classify memory usage in JSON
1053 list Classify memory usage in simple listing format
1054 pprof Format the profile dump so that it can be processed by pprof
1055 stacktrace Convert runtime addresses to symbol names
1056
1057 Quick Reference:
1058 dmprof csv [-p POLICY] <first-dump>
1059 dmprof expand <dump> <policy> <component> <depth>
1060 dmprof json [-p POLICY] <first-dump>
1061 dmprof list [-p POLICY] <first-dump>
1062 dmprof pprof [-c COMPONENT] <dump> <policy>
1063 dmprof stacktrace <dump>
1064 """ % (sys.argv[0]))
1065 sys.exit(1)
1066 action = sys.argv.pop(1)
1067
1068 return COMMANDS[action](sys.argv)
810 1069
811 1070
812 if __name__ == '__main__': 1071 if __name__ == '__main__':
813 sys.exit(main()) 1072 sys.exit(main())
OLDNEW
« no previous file with comments | « no previous file | tools/deep_memory_profiler/policies.json » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698