OLD | NEW |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
5 | 5 |
6 """The deep heap profiler script for Chrome.""" | 6 """The deep heap profiler script for Chrome.""" |
7 | 7 |
8 from datetime import datetime | 8 from datetime import datetime |
9 import json | 9 import json |
10 import os | 10 import os |
11 import re | 11 import re |
12 from optparse import OptionParser | |
M-A Ruel
2012/07/24 14:10:53
just import optparse
Dai Mikurube (NOT FULLTIME)
2012/07/24 16:19:24
Done.
| |
12 import shutil | 13 import shutil |
13 import subprocess | 14 import subprocess |
14 import sys | 15 import sys |
15 import tempfile | 16 import tempfile |
16 | 17 |
17 FIND_RUNTIME_SYMBOLS_PATH = os.path.join( | 18 FIND_RUNTIME_SYMBOLS_PATH = os.path.join( |
18 os.path.dirname(os.path.abspath(__file__)), | 19 os.path.dirname(os.path.abspath(__file__)), |
19 os.pardir, | 20 os.pardir, |
20 'find_runtime_symbols') | 21 'find_runtime_symbols') |
21 sys.path.append(FIND_RUNTIME_SYMBOLS_PATH) | 22 sys.path.append(FIND_RUNTIME_SYMBOLS_PATH) |
22 | 23 |
23 from prepare_symbol_info import prepare_symbol_info | 24 from prepare_symbol_info import prepare_symbol_info |
24 from find_runtime_symbols import find_runtime_symbols_list | 25 from find_runtime_symbols import find_runtime_symbols_list |
25 | 26 |
26 BUCKET_ID = 5 | 27 BUCKET_ID = 5 |
27 VIRTUAL = 0 | 28 VIRTUAL = 0 |
28 COMMITTED = 1 | 29 COMMITTED = 1 |
29 ALLOC_COUNT = 2 | 30 ALLOC_COUNT = 2 |
30 FREE_COUNT = 3 | 31 FREE_COUNT = 3 |
31 NULL_REGEX = re.compile('') | 32 NULL_REGEX = re.compile('') |
32 | 33 |
34 POLICIES_JSON_PATH = os.path.join(os.path.dirname(__file__), 'policies.json') | |
M-A Ruel
2012/07/24 14:10:53
no
Dai Mikurube (NOT FULLTIME)
2012/07/24 14:53:45
Sorry, what do you mean by this?
| |
35 | |
33 # Heap Profile Dump versions | 36 # Heap Profile Dump versions |
34 | 37 |
35 # DUMP_DEEP_1 is OBSOLETE. | 38 # DUMP_DEEP_1 is OBSOLETE. |
36 # DUMP_DEEP_1 DOES NOT distinct mmap regions and malloc chunks. | 39 # DUMP_DEEP_1 DOES NOT distinct mmap regions and malloc chunks. |
37 # Their stacktraces DO contain mmap* or tc-* at their tops. | 40 # Their stacktraces DO contain mmap* or tc-* at their tops. |
38 # They should be processed by POLICY_DEEP_1. | 41 # They should be processed by POLICY_DEEP_1. |
39 DUMP_DEEP_1 = 'DUMP_DEEP_1' | 42 DUMP_DEEP_1 = 'DUMP_DEEP_1' |
40 | 43 |
41 # DUMP_DEEP_2 is OBSOLETE. | 44 # DUMP_DEEP_2 is OBSOLETE. |
42 # DUMP_DEEP_2 DOES distinct mmap regions and malloc chunks. | 45 # DUMP_DEEP_2 DOES distinct mmap regions and malloc chunks. |
(...skipping 22 matching lines...) Expand all Loading... | |
65 # Heap Profile Policy versions | 68 # Heap Profile Policy versions |
66 | 69 |
67 # POLICY_DEEP_1 DOES NOT include allocation_type columns. | 70 # POLICY_DEEP_1 DOES NOT include allocation_type columns. |
68 # mmap regions are distincted w/ mmap frames in the pattern column. | 71 # mmap regions are distincted w/ mmap frames in the pattern column. |
69 POLICY_DEEP_1 = 'POLICY_DEEP_1' | 72 POLICY_DEEP_1 = 'POLICY_DEEP_1' |
70 | 73 |
71 # POLICY_DEEP_2 DOES include allocation_type columns. | 74 # POLICY_DEEP_2 DOES include allocation_type columns. |
72 # mmap regions are distincted w/ the allocation_type column. | 75 # mmap regions are distincted w/ the allocation_type column. |
73 POLICY_DEEP_2 = 'POLICY_DEEP_2' | 76 POLICY_DEEP_2 = 'POLICY_DEEP_2' |
74 | 77 |
75 # TODO(dmikurube): Avoid global variables. | |
76 address_symbol_dict = {} | |
77 appeared_addresses = set() | |
78 components = [] | |
79 | |
80 | 78 |
81 class EmptyDumpException(Exception): | 79 class EmptyDumpException(Exception): |
82 def __init__(self, value): | 80 def __init__(self, value): |
83 self.value = value | 81 self.value = value |
84 def __str__(self): | 82 def __str__(self): |
85 return repr(self.value) | 83 return repr(self.value) |
86 | 84 |
87 | 85 |
88 class ParsingException(Exception): | 86 class ParsingException(Exception): |
89 def __init__(self, value): | 87 def __init__(self, value): |
90 self.value = value | 88 self.value = value |
91 def __str__(self): | 89 def __str__(self): |
92 return repr(self.value) | 90 return repr(self.value) |
93 | 91 |
94 | 92 |
95 class InvalidDumpException(ParsingException): | 93 class InvalidDumpException(ParsingException): |
96 def __init__(self, value): | 94 def __init__(self, value): |
97 self.value = value | 95 self.value = value |
98 def __str__(self): | 96 def __str__(self): |
99 return "invalid heap profile dump: %s" % repr(self.value) | 97 return "invalid heap profile dump: %s" % repr(self.value) |
100 | 98 |
101 | 99 |
102 class ObsoleteDumpVersionException(ParsingException): | 100 class ObsoleteDumpVersionException(ParsingException): |
103 def __init__(self, value): | 101 def __init__(self, value): |
104 self.value = value | 102 self.value = value |
105 def __str__(self): | 103 def __str__(self): |
106 return "obsolete heap profile dump version: %s" % repr(self.value) | 104 return "obsolete heap profile dump version: %s" % repr(self.value) |
107 | 105 |
108 | 106 |
109 class Policy(object): | 107 class Rule(object): |
108 """Represents one matching rule in a policy file.""" | |
110 | 109 |
111 def __init__(self, name, mmap, pattern): | 110 def __init__(self, name, mmap, pattern): |
112 self.name = name | 111 self.name = name |
113 self.mmap = mmap | 112 self.mmap = mmap |
114 self.condition = re.compile(pattern + r'\Z') | 113 self.condition = re.compile(pattern + r'\Z') |
115 | 114 |
116 | 115 |
117 def get_component(policy_list, bucket): | 116 class Policy(object): |
117 """Represents a policy, a content of a policy file.""" | |
118 | |
119 def __init__(self, rules, version, components): | |
120 self.rules = rules | |
121 self.version = version | |
122 self.components = components | |
123 | |
124 def append_rule(self, rule): | |
125 self.rules.append(rule) | |
126 | |
127 | |
128 def get_component(rule_list, bucket, symbols): | |
118 """Returns a component name which a given bucket belongs to. | 129 """Returns a component name which a given bucket belongs to. |
119 | 130 |
120 Args: | 131 Args: |
121 policy_list: A list containing Policy objects. (Parsed policy data by | 132 rule_list: A list of Rule objects. |
122 parse_policy.) | |
123 bucket: A Bucket object to be searched for. | 133 bucket: A Bucket object to be searched for. |
134 symbols: A dict mapping runtime addresses to symbol names. | |
124 | 135 |
125 Returns: | 136 Returns: |
126 A string representing a component name. | 137 A string representing a component name. |
127 """ | 138 """ |
128 if not bucket: | 139 if not bucket: |
129 return 'no-bucket' | 140 return 'no-bucket' |
130 if bucket.component: | 141 if bucket.component_cache: |
131 return bucket.component | 142 return bucket.component_cache |
132 | 143 |
133 stacktrace = ''.join( | 144 stacktrace = ''.join(symbols[a] + ' ' for a in bucket.stacktrace).strip() |
134 address_symbol_dict[a] + ' ' for a in bucket.stacktrace).strip() | |
135 | 145 |
136 for policy in policy_list: | 146 for rule in rule_list: |
137 if bucket.mmap == policy.mmap and policy.condition.match(stacktrace): | 147 if bucket.mmap == rule.mmap and rule.condition.match(stacktrace): |
138 bucket.component = policy.name | 148 bucket.component_cache = rule.name |
139 return policy.name | 149 return rule.name |
140 | 150 |
141 assert False | 151 assert False |
142 | 152 |
143 | 153 |
144 class Bucket(object): | 154 class Bucket(object): |
155 """Represents a bucket, which is a unit of memory classification.""" | |
145 | 156 |
146 def __init__(self, stacktrace, mmap): | 157 def __init__(self, stacktrace, mmap): |
147 self.stacktrace = stacktrace | 158 self.stacktrace = stacktrace |
148 self.mmap = mmap | 159 self.mmap = mmap |
149 self.component = '' | 160 self.component_cache = '' |
161 | |
162 def clear_component_cache(self): | |
163 self.component_cache = '' | |
150 | 164 |
151 | 165 |
152 class Log(object): | 166 class Dump(object): |
167 """Represents one heap profile dump.""" | |
153 | 168 |
154 """A class representing one dumped log data.""" | 169 def __init__(self, dump_path): |
155 def __init__(self, log_path): | 170 self.dump_path = dump_path |
156 self.log_path = log_path | 171 self.dump_lines = [ |
157 self.log_lines = [ | 172 l for l in open(self.dump_path, 'r') if l and not l.startswith('#')] |
158 l for l in open(self.log_path, 'r') if l and not l.startswith('#')] | 173 self.dump_version = '' |
159 self.log_version = '' | |
160 sys.stderr.write('Loading a dump: %s\n' % log_path) | |
161 self.stacktrace_lines = [] | 174 self.stacktrace_lines = [] |
162 self.counters = {} | 175 self.counters = {} |
163 self.log_time = os.stat(self.log_path).st_mtime | 176 self.dump_time = os.stat(self.dump_path).st_mtime |
164 | 177 |
165 def dump_stacktrace(buckets): | 178 def print_stacktrace(self, buckets, symbols): |
166 """Prints a given stacktrace. | 179 """Prints a given stacktrace. |
167 | 180 |
168 Args: | 181 Args: |
169 buckets: A dict mapping bucket ids and their corresponding Bucket | 182 buckets: A dict mapping bucket ids to Bucket objects. |
170 objects. | 183 symbols: A dict mapping runtime addresses to symbol names. |
171 """ | 184 """ |
172 for line in self.stacktrace_lines: | 185 for line in self.stacktrace_lines: |
173 words = line.split() | 186 words = line.split() |
174 bucket = buckets.get(int(words[BUCKET_ID])) | 187 bucket = buckets.get(int(words[BUCKET_ID])) |
175 if not bucket: | 188 if not bucket: |
176 continue | 189 continue |
177 for i in range(0, BUCKET_ID - 1): | 190 for i in range(0, BUCKET_ID - 1): |
178 sys.stdout.write(words[i] + ' ') | 191 sys.stdout.write(words[i] + ' ') |
179 for address in bucket.stacktrace: | 192 for address in bucket.stacktrace: |
180 sys.stdout.write((address_symbol_dict.get(address) or address) + ' ') | 193 sys.stdout.write((symbols.get(address) or address) + ' ') |
181 sys.stdout.write('\n') | 194 sys.stdout.write('\n') |
182 | 195 |
183 @staticmethod | 196 @staticmethod |
184 def accumulate_size_for_pprof(stacktrace_lines, policy_list, buckets, | 197 def accumulate_size_for_pprof(stacktrace_lines, rule_list, buckets, |
185 component_name): | 198 component_name, symbols): |
186 """Accumulates size of committed chunks and the number of allocated chunks. | 199 """Accumulates size of committed chunks and the number of allocated chunks. |
187 | 200 |
188 Args: | 201 Args: |
189 stacktrace_lines: A list of strings which are valid as stacktraces. | 202 stacktrace_lines: A list of strings which are valid as stacktraces. |
190 policy_list: A list containing Policy objects. (Parsed policy data by | 203 rule_list: A list of Rule objects. |
191 parse_policy.) | 204 buckets: A dict mapping bucket ids to Bucket objects. |
192 buckets: A dict mapping bucket ids and their corresponding Bucket | |
193 objects. | |
194 component_name: A name of component for filtering. | 205 component_name: A name of component for filtering. |
206 symbols: A dict mapping runtime addresses to symbol names. | |
195 | 207 |
196 Returns: | 208 Returns: |
197 Two integers which are the accumulated size of committed regions and the | 209 Two integers which are the accumulated size of committed regions and the |
198 number of allocated chunks, respectively. | 210 number of allocated chunks, respectively. |
199 """ | 211 """ |
200 com_committed = 0 | 212 com_committed = 0 |
201 com_allocs = 0 | 213 com_allocs = 0 |
202 for line in stacktrace_lines: | 214 for line in stacktrace_lines: |
203 words = line.split() | 215 words = line.split() |
204 bucket = buckets.get(int(words[BUCKET_ID])) | 216 bucket = buckets.get(int(words[BUCKET_ID])) |
205 if (not bucket or | 217 if (not bucket or |
206 (component_name and | 218 (component_name and |
207 component_name != get_component(policy_list, bucket))): | 219 component_name != get_component(rule_list, bucket, symbols))): |
208 continue | 220 continue |
209 | 221 |
210 com_committed += int(words[COMMITTED]) | 222 com_committed += int(words[COMMITTED]) |
211 com_allocs += int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]) | 223 com_allocs += int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]) |
212 | 224 |
213 return com_committed, com_allocs | 225 return com_committed, com_allocs |
214 | 226 |
215 @staticmethod | 227 @staticmethod |
216 def dump_stacktrace_lines_for_pprof(stacktrace_lines, policy_list, | 228 def print_stacktrace_lines_for_pprof(stacktrace_lines, rule_list, |
217 buckets, component_name): | 229 buckets, component_name, symbols): |
218 """Prints information of stacktrace lines for pprof. | 230 """Prints information of stacktrace lines for pprof. |
219 | 231 |
220 Args: | 232 Args: |
221 stacktrace_lines: A list of strings which are valid as stacktraces. | 233 stacktrace_lines: A list of strings which are valid as stacktraces. |
222 policy_list: A list containing Policy objects. (Parsed policy data by | 234 rule_list: A list of Rule objects. |
223 parse_policy.) | 235 buckets: A dict mapping bucket ids to Bucket objects. |
224 buckets: A dict mapping bucket ids and their corresponding Bucket | |
225 objects. | |
226 component_name: A name of component for filtering. | 236 component_name: A name of component for filtering. |
237 symbols: A dict mapping runtime addresses to symbol names. | |
227 """ | 238 """ |
228 for line in stacktrace_lines: | 239 for line in stacktrace_lines: |
229 words = line.split() | 240 words = line.split() |
230 bucket = buckets.get(int(words[BUCKET_ID])) | 241 bucket = buckets.get(int(words[BUCKET_ID])) |
231 if (not bucket or | 242 if (not bucket or |
232 (component_name and | 243 (component_name and |
233 component_name != get_component(policy_list, bucket))): | 244 component_name != get_component(rule_list, bucket, symbols))): |
234 continue | 245 continue |
235 | 246 |
236 sys.stdout.write('%6d: %8s [%6d: %8s] @' % ( | 247 sys.stdout.write('%6d: %8s [%6d: %8s] @' % ( |
237 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]), | 248 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]), |
238 words[COMMITTED], | 249 words[COMMITTED], |
239 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]), | 250 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]), |
240 words[COMMITTED])) | 251 words[COMMITTED])) |
241 for address in bucket.stacktrace: | 252 for address in bucket.stacktrace: |
242 sys.stdout.write(' ' + address) | 253 sys.stdout.write(' ' + address) |
243 sys.stdout.write('\n') | 254 sys.stdout.write('\n') |
244 | 255 |
245 def dump_for_pprof(self, policy_list, buckets, mapping_lines, component_name): | 256 def print_for_pprof( |
246 """Converts the log file so it can be processed by pprof. | 257 self, rule_list, buckets, maps_lines, component_name, symbols): |
258 """Converts the heap profile dump so it can be processed by pprof. | |
247 | 259 |
248 Args: | 260 Args: |
249 policy_list: A list containing Policy objects. (Parsed policy data by | 261 rule_list: A list of Rule objects. |
250 parse_policy.) | 262 buckets: A dict mapping bucket ids to Bucket objects. |
251 buckets: A dict mapping bucket ids and their corresponding Bucket | 263 maps_lines: A list of strings containing /proc/.../maps. |
252 objects. | |
253 mapping_lines: A list of strings containing /proc/.../maps. | |
254 component_name: A name of component for filtering. | 264 component_name: A name of component for filtering. |
265 symbols: A dict mapping runtime addresses to symbol names. | |
255 """ | 266 """ |
256 sys.stdout.write('heap profile: ') | 267 sys.stdout.write('heap profile: ') |
257 com_committed, com_allocs = self.accumulate_size_for_pprof( | 268 com_committed, com_allocs = self.accumulate_size_for_pprof( |
258 self.stacktrace_lines, policy_list, buckets, component_name) | 269 self.stacktrace_lines, rule_list, buckets, component_name, symbols) |
259 | 270 |
260 sys.stdout.write('%6d: %8s [%6d: %8s] @ heapprofile\n' % ( | 271 sys.stdout.write('%6d: %8s [%6d: %8s] @ heapprofile\n' % ( |
261 com_allocs, com_committed, com_allocs, com_committed)) | 272 com_allocs, com_committed, com_allocs, com_committed)) |
262 | 273 |
263 self.dump_stacktrace_lines_for_pprof( | 274 self.print_stacktrace_lines_for_pprof( |
264 self.stacktrace_lines, policy_list, buckets, component_name) | 275 self.stacktrace_lines, rule_list, buckets, component_name, symbols) |
265 | 276 |
266 sys.stdout.write('MAPPED_LIBRARIES:\n') | 277 sys.stdout.write('MAPPED_LIBRARIES:\n') |
267 for line in mapping_lines: | 278 for line in maps_lines: |
268 sys.stdout.write(line) | 279 sys.stdout.write(line) |
269 | 280 |
270 @staticmethod | 281 @staticmethod |
271 def check_stacktrace_line(stacktrace_line, buckets): | 282 def check_stacktrace_line(stacktrace_line, buckets, appeared_addresses): |
272 """Checks if a given stacktrace_line is valid as stacktrace. | 283 """Checks if a given stacktrace_line is valid as stacktrace. |
273 | 284 |
274 Args: | 285 Args: |
275 stacktrace_line: A string to be checked. | 286 stacktrace_line: A string to be checked. |
276 buckets: A dict mapping bucket ids and their corresponding Bucket | 287 buckets: A dict mapping bucket ids to Bucket objects. |
277 objects. | 288 appeared_addresses: A list where appeared addresses will be stored. |
278 | 289 |
279 Returns: | 290 Returns: |
280 True if the given stacktrace_line is valid. | 291 True if the given stacktrace_line is valid. |
281 """ | 292 """ |
282 words = stacktrace_line.split() | 293 words = stacktrace_line.split() |
283 if len(words) < BUCKET_ID + 1: | 294 if len(words) < BUCKET_ID + 1: |
284 return False | 295 return False |
285 if words[BUCKET_ID - 1] != '@': | 296 if words[BUCKET_ID - 1] != '@': |
286 return False | 297 return False |
287 bucket = buckets.get(int(words[BUCKET_ID])) | 298 bucket = buckets.get(int(words[BUCKET_ID])) |
(...skipping 10 matching lines...) Expand all Loading... | |
298 A pair of an integer indicating a line number after skipped, and a | 309 A pair of an integer indicating a line number after skipped, and a |
299 boolean value which is True if found a line which skipping_condition | 310 boolean value which is True if found a line which skipping_condition |
300 is False for. | 311 is False for. |
301 """ | 312 """ |
302 while skipping_condition(line_number): | 313 while skipping_condition(line_number): |
303 line_number += 1 | 314 line_number += 1 |
304 if line_number >= max_line_number: | 315 if line_number >= max_line_number: |
305 return line_number, False | 316 return line_number, False |
306 return line_number, True | 317 return line_number, True |
307 | 318 |
308 def parse_stacktraces_while_valid(self, buckets, log_lines, line_number): | 319 def parse_stacktraces_while_valid( |
320 self, buckets, dump_lines, line_number, appeared_addresses): | |
309 """Parses stacktrace lines while the lines are valid. | 321 """Parses stacktrace lines while the lines are valid. |
310 | 322 |
311 Args: | 323 Args: |
312 buckets: A dict mapping bucket ids and their corresponding Bucket | 324 buckets: A dict mapping bucket ids to Bucket objects. |
313 objects. | 325 dump_lines: A list of lines to be parsed. |
314 log_lines: A list of lines to be parsed. | 326 line_number: A line number to start parsing in dump_lines. |
315 line_number: An integer representing the starting line number in | 327 appeared_addresses: A list where appeared addresses will be stored. |
316 log_lines. | |
317 | 328 |
318 Returns: | 329 Returns: |
319 A pair of a list of valid lines and an integer representing the last | 330 A pair of a list of valid lines and an integer representing the last |
320 line number in log_lines. | 331 line number in dump_lines. |
321 """ | 332 """ |
322 (line_number, _) = self.skip_lines_while( | 333 (line_number, _) = self.skip_lines_while( |
323 line_number, len(log_lines), | 334 line_number, len(dump_lines), |
324 lambda n: not log_lines[n].split()[0].isdigit()) | 335 lambda n: not dump_lines[n].split()[0].isdigit()) |
325 stacktrace_lines_start = line_number | 336 stacktrace_lines_start = line_number |
326 (line_number, _) = self.skip_lines_while( | 337 (line_number, _) = self.skip_lines_while( |
327 line_number, len(log_lines), | 338 line_number, len(dump_lines), |
328 lambda n: self.check_stacktrace_line(log_lines[n], buckets)) | 339 lambda n: self.check_stacktrace_line( |
329 return (log_lines[stacktrace_lines_start:line_number], line_number) | 340 dump_lines[n], buckets, appeared_addresses)) |
341 return (dump_lines[stacktrace_lines_start:line_number], line_number) | |
330 | 342 |
331 def parse_stacktraces(self, buckets, line_number): | 343 def parse_stacktraces(self, buckets, line_number, appeared_addresses): |
332 """Parses lines in self.log_lines as stacktrace. | 344 """Parses lines in self.dump_lines as stacktrace. |
333 | 345 |
334 Valid stacktrace lines are stored into self.stacktrace_lines. | 346 Valid stacktrace lines are stored into self.stacktrace_lines. |
335 | 347 |
336 Args: | 348 Args: |
337 buckets: A dict mapping bucket ids and their corresponding Bucket | 349 buckets: A dict mapping bucket ids to Bucket objects. |
338 objects. | 350 line_number: A line number to start parsing in dump_lines. |
339 line_number: An integer representing the starting line number in | 351 appeared_addresses: A list where appeared addresses will be stored. |
340 log_lines. | |
341 | 352 |
342 Raises: | 353 Raises: |
343 ParsingException for invalid dump versions. | 354 ParsingException for invalid dump versions. |
344 """ | 355 """ |
345 sys.stderr.write(' Version: %s\n' % self.log_version) | 356 if self.dump_version == DUMP_DEEP_5: |
346 | |
347 if self.log_version == DUMP_DEEP_5: | |
348 (self.stacktrace_lines, line_number) = ( | 357 (self.stacktrace_lines, line_number) = ( |
349 self.parse_stacktraces_while_valid( | 358 self.parse_stacktraces_while_valid( |
350 buckets, self.log_lines, line_number)) | 359 buckets, self.dump_lines, line_number, appeared_addresses)) |
351 | 360 |
352 elif self.log_version in DUMP_DEEP_OBSOLETE: | 361 elif self.dump_version in DUMP_DEEP_OBSOLETE: |
353 raise ObsoleteDumpVersionException(self.log_version) | 362 raise ObsoleteDumpVersionException(self.dump_version) |
354 | 363 |
355 else: | 364 else: |
356 raise InvalidDumpException('Invalid version: %s' % self.log_version) | 365 raise InvalidDumpException('Invalid version: %s' % self.dump_version) |
357 | 366 |
358 def parse_global_stats(self): | 367 def parse_global_stats(self): |
359 """Parses lines in self.log_lines as global stats.""" | 368 """Parses lines in self.dump_lines as global stats.""" |
360 (ln, _) = self.skip_lines_while( | 369 (ln, _) = self.skip_lines_while( |
361 0, len(self.log_lines), | 370 0, len(self.dump_lines), |
362 lambda n: self.log_lines[n] != 'GLOBAL_STATS:\n') | 371 lambda n: self.dump_lines[n] != 'GLOBAL_STATS:\n') |
363 | 372 |
364 global_stat_names = [ | 373 global_stat_names = [ |
365 'total', 'file-exec', 'file-nonexec', 'anonymous', 'stack', 'other', | 374 'total', 'file-exec', 'file-nonexec', 'anonymous', 'stack', 'other', |
366 'nonprofiled-absent', 'nonprofiled-anonymous', | 375 'nonprofiled-absent', 'nonprofiled-anonymous', |
367 'nonprofiled-file-exec', 'nonprofiled-file-nonexec', | 376 'nonprofiled-file-exec', 'nonprofiled-file-nonexec', |
368 'nonprofiled-stack', 'nonprofiled-other', | 377 'nonprofiled-stack', 'nonprofiled-other', |
369 'profiled-mmap', 'profiled-malloc'] | 378 'profiled-mmap', 'profiled-malloc'] |
370 | 379 |
371 for prefix in global_stat_names: | 380 for prefix in global_stat_names: |
372 (ln, _) = self.skip_lines_while( | 381 (ln, _) = self.skip_lines_while( |
373 ln, len(self.log_lines), | 382 ln, len(self.dump_lines), |
374 lambda n: self.log_lines[n].split()[0] != prefix) | 383 lambda n: self.dump_lines[n].split()[0] != prefix) |
375 words = self.log_lines[ln].split() | 384 words = self.dump_lines[ln].split() |
376 self.counters[prefix + '_virtual'] = int(words[-2]) | 385 self.counters[prefix + '_virtual'] = int(words[-2]) |
377 self.counters[prefix + '_committed'] = int(words[-1]) | 386 self.counters[prefix + '_committed'] = int(words[-1]) |
378 | 387 |
379 def parse_version(self): | 388 def parse_version(self): |
380 """Parses a version string in self.log_lines. | 389 """Parses a version string in self.dump_lines. |
381 | 390 |
382 Returns: | 391 Returns: |
383 A pair of (a string representing a version of the stacktrace dump, | 392 A pair of (a string representing a version of the stacktrace dump, |
384 and an integer indicating a line number next to the version string). | 393 and an integer indicating a line number next to the version string). |
385 | 394 |
386 Raises: | 395 Raises: |
387 ParsingException for invalid dump versions. | 396 ParsingException for invalid dump versions. |
388 """ | 397 """ |
389 version = '' | 398 version = '' |
390 | 399 |
391 # Skip until an identifiable line. | 400 # Skip until an identifiable line. |
392 headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ') | 401 headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ') |
393 if not self.log_lines: | 402 if not self.dump_lines: |
394 raise EmptyDumpException('Empty heap dump file.') | 403 raise EmptyDumpException('Empty heap dump file.') |
395 (ln, found) = self.skip_lines_while( | 404 (ln, found) = self.skip_lines_while( |
396 0, len(self.log_lines), | 405 0, len(self.dump_lines), |
397 lambda n: not self.log_lines[n].startswith(headers)) | 406 lambda n: not self.dump_lines[n].startswith(headers)) |
398 if not found: | 407 if not found: |
399 raise InvalidDumpException('No version header.') | 408 raise InvalidDumpException('No version header.') |
400 | 409 |
401 # Identify a version. | 410 # Identify a version. |
402 if self.log_lines[ln].startswith('heap profile: '): | 411 if self.dump_lines[ln].startswith('heap profile: '): |
403 version = self.log_lines[ln][13:].strip() | 412 version = self.dump_lines[ln][13:].strip() |
404 if version == DUMP_DEEP_5: | 413 if version == DUMP_DEEP_5: |
405 (ln, _) = self.skip_lines_while( | 414 (ln, _) = self.skip_lines_while( |
406 ln, len(self.log_lines), | 415 ln, len(self.dump_lines), |
407 lambda n: self.log_lines[n] != 'STACKTRACES:\n') | 416 lambda n: self.dump_lines[n] != 'STACKTRACES:\n') |
408 elif version in DUMP_DEEP_OBSOLETE: | 417 elif version in DUMP_DEEP_OBSOLETE: |
409 raise ObsoleteDumpVersionException(version) | 418 raise ObsoleteDumpVersionException(version) |
410 else: | 419 else: |
411 raise InvalidDumpException('Invalid version: %s' % version) | 420 raise InvalidDumpException('Invalid version: %s' % version) |
412 elif self.log_lines[ln] == 'STACKTRACES:\n': | 421 elif self.dump_lines[ln] == 'STACKTRACES:\n': |
413 raise ObsoleteDumpVersionException(DUMP_DEEP_1) | 422 raise ObsoleteDumpVersionException(DUMP_DEEP_1) |
414 elif self.log_lines[ln] == 'MMAP_STACKTRACES:\n': | 423 elif self.dump_lines[ln] == 'MMAP_STACKTRACES:\n': |
415 raise ObsoleteDumpVersionException(DUMP_DEEP_2) | 424 raise ObsoleteDumpVersionException(DUMP_DEEP_2) |
416 | 425 |
417 return (version, ln) | 426 return (version, ln) |
418 | 427 |
419 def parse_log(self, buckets): | 428 def parse_dump(self, buckets, appeared_addresses): |
420 self.log_version, ln = self.parse_version() | 429 self.dump_version, ln = self.parse_version() |
421 self.parse_global_stats() | 430 self.parse_global_stats() |
422 self.parse_stacktraces(buckets, ln) | 431 self.parse_stacktraces(buckets, ln, appeared_addresses) |
423 | 432 |
424 @staticmethod | 433 @staticmethod |
425 def accumulate_size_for_policy(stacktrace_lines, | 434 def accumulate_size_for_policy(stacktrace_lines, |
426 policy_list, buckets, sizes): | 435 rule_list, buckets, sizes, symbols): |
427 for line in stacktrace_lines: | 436 for line in stacktrace_lines: |
428 words = line.split() | 437 words = line.split() |
429 bucket = buckets.get(int(words[BUCKET_ID])) | 438 bucket = buckets.get(int(words[BUCKET_ID])) |
430 component_match = get_component(policy_list, bucket) | 439 component_match = get_component(rule_list, bucket, symbols) |
440 | |
M-A Ruel
2012/07/24 14:10:53
This new line is gratuitous. Intended?
Dai Mikurube (NOT FULLTIME)
2012/07/24 16:19:24
It was unintended. Thanks.
| |
431 sizes[component_match] += int(words[COMMITTED]) | 441 sizes[component_match] += int(words[COMMITTED]) |
432 | 442 |
433 if component_match.startswith('tc-'): | 443 if component_match.startswith('tc-'): |
434 sizes['tc-total-log'] += int(words[COMMITTED]) | 444 sizes['tc-total-log'] += int(words[COMMITTED]) |
435 elif component_match.startswith('mmap-'): | 445 elif component_match.startswith('mmap-'): |
436 sizes['mmap-total-log'] += int(words[COMMITTED]) | 446 sizes['mmap-total-log'] += int(words[COMMITTED]) |
437 else: | 447 else: |
438 sizes['other-total-log'] += int(words[COMMITTED]) | 448 sizes['other-total-log'] += int(words[COMMITTED]) |
439 | 449 |
440 def apply_policy(self, policy_list, buckets, first_log_time): | 450 def apply_policy( |
451 self, rule_list, buckets, first_dump_time, components, symbols): | |
441 """Aggregates the total memory size of each component. | 452 """Aggregates the total memory size of each component. |
442 | 453 |
443 Iterate through all stacktraces and attribute them to one of the components | 454 Iterate through all stacktraces and attribute them to one of the components |
444 based on the policy. It is important to apply policy in right order. | 455 based on the policy. It is important to apply policy in right order. |
445 | 456 |
446 Args: | 457 Args: |
447 policy_list: A list containing Policy objects. (Parsed policy data by | 458 rule_list: A list of Rule objects. |
448 parse_policy.) | 459 buckets: A dict mapping bucket ids to Bucket objects. |
449 buckets: A dict mapping bucket ids and their corresponding Bucket | 460 first_dump_time: An integer representing time when the first dump is |
450 objects. | |
451 first_log_time: An integer representing time when the first log is | |
452 dumped. | 461 dumped. |
462 components: A list of strings of component names. | |
463 symbols: A dict mapping runtime addresses to symbol names. | |
453 | 464 |
454 Returns: | 465 Returns: |
455 A dict mapping components and their corresponding sizes. | 466 A dict mapping components and their corresponding sizes. |
456 """ | 467 """ |
457 | 468 |
458 sys.stderr.write('apply policy:%s\n' % (self.log_path)) | 469 sys.stderr.write('Applying policy: "%s".\n' % self.dump_path) |
459 sizes = dict((c, 0) for c in components) | 470 sizes = dict((c, 0) for c in components) |
460 | 471 |
461 self.accumulate_size_for_policy(self.stacktrace_lines, | 472 self.accumulate_size_for_policy(self.stacktrace_lines, |
462 policy_list, buckets, sizes) | 473 rule_list, buckets, sizes, symbols) |
463 | 474 |
464 mmap_prefix = 'profiled-mmap' | 475 mmap_prefix = 'profiled-mmap' |
465 malloc_prefix = 'profiled-malloc' | 476 malloc_prefix = 'profiled-malloc' |
466 | 477 |
467 sizes['mmap-no-log'] = ( | 478 sizes['mmap-no-log'] = ( |
468 self.counters['%s_committed' % mmap_prefix] - sizes['mmap-total-log']) | 479 self.counters['%s_committed' % mmap_prefix] - sizes['mmap-total-log']) |
469 sizes['mmap-total-record'] = self.counters['%s_committed' % mmap_prefix] | 480 sizes['mmap-total-record'] = self.counters['%s_committed' % mmap_prefix] |
470 sizes['mmap-total-record-vm'] = self.counters['%s_virtual' % mmap_prefix] | 481 sizes['mmap-total-record-vm'] = self.counters['%s_virtual' % mmap_prefix] |
471 | 482 |
472 sizes['tc-no-log'] = ( | 483 sizes['tc-no-log'] = ( |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
506 'nonprofiled-file-nonexec_committed', | 517 'nonprofiled-file-nonexec_committed', |
507 'nonprofiled-stack_committed', | 518 'nonprofiled-stack_committed', |
508 'nonprofiled-other_committed') | 519 'nonprofiled-other_committed') |
509 sizes['mustbezero'] = ( | 520 sizes['mustbezero'] = ( |
510 self.counters['total_committed'] - | 521 self.counters['total_committed'] - |
511 sum(self.counters[i] for i in removed)) | 522 sum(self.counters[i] for i in removed)) |
512 if 'total-exclude-profiler' in sizes: | 523 if 'total-exclude-profiler' in sizes: |
513 sizes['total-exclude-profiler'] = ( | 524 sizes['total-exclude-profiler'] = ( |
514 self.counters['total_committed'] - sizes['mmap-profiler']) | 525 self.counters['total_committed'] - sizes['mmap-profiler']) |
515 if 'hour' in sizes: | 526 if 'hour' in sizes: |
516 sizes['hour'] = (self.log_time - first_log_time) / 60.0 / 60.0 | 527 sizes['hour'] = (self.dump_time - first_dump_time) / 60.0 / 60.0 |
517 if 'minute' in sizes: | 528 if 'minute' in sizes: |
518 sizes['minute'] = (self.log_time - first_log_time) / 60.0 | 529 sizes['minute'] = (self.dump_time - first_dump_time) / 60.0 |
519 if 'second' in sizes: | 530 if 'second' in sizes: |
520 sizes['second'] = self.log_time - first_log_time | 531 sizes['second'] = self.dump_time - first_dump_time |
521 | 532 |
522 return sizes | 533 return sizes |
523 | 534 |
524 @staticmethod | 535 @staticmethod |
525 def accumulate_size_for_expand(stacktrace_lines, policy_list, buckets, | 536 def accumulate_size_for_expand(stacktrace_lines, rule_list, buckets, |
526 component_name, depth, sizes): | 537 component_name, depth, sizes, symbols): |
527 for line in stacktrace_lines: | 538 for line in stacktrace_lines: |
528 words = line.split() | 539 words = line.split() |
529 bucket = buckets.get(int(words[BUCKET_ID])) | 540 bucket = buckets.get(int(words[BUCKET_ID])) |
530 component_match = get_component(policy_list, bucket) | 541 component_match = get_component(rule_list, bucket, symbols) |
531 if component_match == component_name: | 542 if component_match == component_name: |
532 stacktrace_sequence = '' | 543 stacktrace_sequence = '' |
533 for address in bucket.stacktrace[0 : min(len(bucket.stacktrace), | 544 for address in bucket.stacktrace[0 : min(len(bucket.stacktrace), |
534 1 + depth)]: | 545 1 + depth)]: |
535 stacktrace_sequence += address_symbol_dict[address] + ' ' | 546 stacktrace_sequence += symbols[address] + ' ' |
536 if not stacktrace_sequence in sizes: | 547 if not stacktrace_sequence in sizes: |
537 sizes[stacktrace_sequence] = 0 | 548 sizes[stacktrace_sequence] = 0 |
538 sizes[stacktrace_sequence] += int(words[COMMITTED]) | 549 sizes[stacktrace_sequence] += int(words[COMMITTED]) |
539 | 550 |
540 def expand(self, policy_list, buckets, component_name, depth): | 551 def expand(self, rule_list, buckets, component_name, depth, symbols): |
541 """Prints all stacktraces in a given component of given depth. | 552 """Prints all stacktraces in a given component of given depth. |
542 | 553 |
543 Args: | 554 Args: |
544 policy_list: A list containing Policy objects. (Parsed policy data by | 555 rule_list: A list of Rule objects. |
545 parse_policy.) | 556 buckets: A dict mapping bucket ids to Bucket objects. |
546 buckets: A dict mapping bucket ids and their corresponding Bucket | |
547 objects. | |
548 component_name: A name of component for filtering. | 557 component_name: A name of component for filtering. |
549 depth: An integer representing depth to be printed. | 558 depth: An integer representing depth to be printed. |
559 symbols: A dict mapping runtime addresses to symbol names. | |
550 """ | 560 """ |
551 sizes = {} | 561 sizes = {} |
552 | 562 |
553 self.accumulate_size_for_expand( | 563 self.accumulate_size_for_expand( |
554 self.stacktrace_lines, policy_list, buckets, component_name, | 564 self.stacktrace_lines, rule_list, buckets, component_name, |
555 depth, sizes) | 565 depth, sizes, symbols) |
556 | 566 |
557 sorted_sizes_list = sorted( | 567 sorted_sizes_list = sorted( |
558 sizes.iteritems(), key=(lambda x: x[1]), reverse=True) | 568 sizes.iteritems(), key=(lambda x: x[1]), reverse=True) |
559 total = 0 | 569 total = 0 |
560 for size_pair in sorted_sizes_list: | 570 for size_pair in sorted_sizes_list: |
561 sys.stdout.write('%10d %s\n' % (size_pair[1], size_pair[0])) | 571 sys.stdout.write('%10d %s\n' % (size_pair[1], size_pair[0])) |
562 total += size_pair[1] | 572 total += size_pair[1] |
563 sys.stderr.write('total: %d\n' % (total)) | 573 sys.stderr.write('total: %d\n' % (total)) |
564 | 574 |
565 | 575 |
566 def update_symbols(symbol_path, mapping_lines, maps_path): | 576 def update_symbols( |
577 symbol_path, maps_path, appeared_addresses, symbols): | |
567 """Updates address/symbol mapping on memory and in a .symbol cache file. | 578 """Updates address/symbol mapping on memory and in a .symbol cache file. |
568 | 579 |
569 It reads cached address/symbol mapping from a .symbol file if it exists. | 580 It reads cached address/symbol mapping from a .symbol file if it exists. |
570 Then, it resolves unresolved addresses from a Chrome binary with pprof. | 581 Then, it resolves unresolved addresses from a Chrome binary with pprof. |
571 Both mappings on memory and in a .symbol cache file are updated. | 582 Both mappings on memory and in a .symbol cache file are updated. |
572 | 583 |
573 Symbol files are formatted as follows: | 584 Symbol files are formatted as follows: |
574 <Address> <Symbol> | 585 <Address> <Symbol> |
575 <Address> <Symbol> | 586 <Address> <Symbol> |
576 <Address> <Symbol> | 587 <Address> <Symbol> |
577 ... | 588 ... |
578 | 589 |
579 Args: | 590 Args: |
580 symbol_path: A string representing a path for a .symbol file. | 591 symbol_path: A string representing a path for a .symbol file. |
581 mapping_lines: A list of strings containing /proc/.../maps. | |
582 maps_path: A string of the path of /proc/.../maps. | 592 maps_path: A string of the path of /proc/.../maps. |
593 appeared_addresses: A list of known addresses. | |
594 symbols: A dict mapping runtime addresses to symbol names. | |
583 """ | 595 """ |
584 with open(symbol_path, mode='a+') as symbol_f: | 596 with open(symbol_path, mode='a+') as symbol_f: |
585 symbol_lines = symbol_f.readlines() | 597 symbol_lines = symbol_f.readlines() |
586 if symbol_lines: | 598 if symbol_lines: |
587 for line in symbol_lines: | 599 for line in symbol_lines: |
588 items = line.split(None, 1) | 600 items = line.split(None, 1) |
589 address_symbol_dict[items[0]] = items[1].rstrip() | 601 if len(items) == 1: |
602 items.append('??') | |
603 symbols[items[0]] = items[1].rstrip() | |
604 if symbols: | |
605 sys.stderr.write(' Found %d symbols in cache.\n' % len(symbols)) | |
606 else: | |
607 sys.stderr.write(' No symbols found in cache.\n') | |
590 | 608 |
591 unresolved_addresses = sorted( | 609 unresolved_addresses = sorted( |
592 a for a in appeared_addresses if a not in address_symbol_dict) | 610 a for a in appeared_addresses if a not in symbols) |
593 | 611 |
594 if unresolved_addresses: | 612 if not unresolved_addresses: |
613 sys.stderr.write(' No need to resolve any more addresses.\n') | |
614 else: | |
615 sys.stderr.write(' %d addresses are unresolved.\n' % | |
616 len(unresolved_addresses)) | |
595 prepared_data_dir = tempfile.mkdtemp() | 617 prepared_data_dir = tempfile.mkdtemp() |
596 prepare_symbol_info(maps_path, prepared_data_dir) | 618 prepare_symbol_info(maps_path, prepared_data_dir) |
597 | 619 |
598 symbols = find_runtime_symbols_list( | 620 symbol_list = find_runtime_symbols_list( |
599 prepared_data_dir, unresolved_addresses) | 621 prepared_data_dir, unresolved_addresses) |
600 | 622 |
601 for address, symbol in zip(unresolved_addresses, symbols): | 623 for address, symbol in zip(unresolved_addresses, symbol_list): |
624 if not symbol: | |
625 symbol = '??' | |
602 stripped_symbol = symbol.strip() | 626 stripped_symbol = symbol.strip() |
603 address_symbol_dict[address] = stripped_symbol | 627 symbols[address] = stripped_symbol |
604 symbol_f.write('%s %s\n' % (address, stripped_symbol)) | 628 symbol_f.write('%s %s\n' % (address, stripped_symbol)) |
605 | 629 |
606 shutil.rmtree(prepared_data_dir) | 630 shutil.rmtree(prepared_data_dir) |
607 | 631 |
608 | 632 |
609 def parse_policy(policy_path): | 633 def parse_policy(policy_path): |
610 """Parses policy file. | 634 """Parses policy file. |
611 | 635 |
612 A policy file contains component's names and their | 636 A policy file contains component's names and their |
613 stacktrace pattern written in regular expression. | 637 stacktrace pattern written in regular expression. |
614 Those patterns are matched against each symbols of | 638 Those patterns are matched against each symbols of |
615 each stacktraces in the order written in the policy file | 639 each stacktraces in the order written in the policy file |
616 | 640 |
617 Args: | 641 Args: |
618 policy_path: A path for a policy file. | 642 policy_path: A path for a policy file. |
619 Returns: | 643 Returns: |
620 A list containing component's name and its regex object | 644 A list containing component's name and its regex object |
621 """ | 645 """ |
622 with open(policy_path, mode='r') as policy_f: | 646 with open(policy_path, mode='r') as policy_f: |
623 policy_lines = policy_f.readlines() | 647 policy_lines = policy_f.readlines() |
624 | 648 |
625 policy_version = POLICY_DEEP_1 | 649 policy_version = POLICY_DEEP_1 |
626 if policy_lines[0].startswith('heap profile policy: '): | 650 if policy_lines[0].startswith('heap profile policy: '): |
627 policy_version = policy_lines[0][21:].strip() | 651 policy_version = policy_lines[0][21:].strip() |
628 policy_lines.pop(0) | 652 policy_lines.pop(0) |
629 policy_list = [] | 653 rule_list = [] |
654 components = [] | |
630 | 655 |
631 if policy_version == POLICY_DEEP_2 or policy_version == POLICY_DEEP_1: | 656 if policy_version == POLICY_DEEP_2 or policy_version == POLICY_DEEP_1: |
632 sys.stderr.write(' heap profile policy version: %s\n' % policy_version) | |
633 for line in policy_lines: | 657 for line in policy_lines: |
634 if line[0] == '#': | 658 if line[0] == '#': |
635 continue | 659 continue |
636 | 660 |
637 if policy_version == POLICY_DEEP_2: | 661 if policy_version == POLICY_DEEP_2: |
638 (name, allocation_type, pattern) = line.strip().split(None, 2) | 662 (name, allocation_type, pattern) = line.strip().split(None, 2) |
639 mmap = False | 663 mmap = False |
640 if allocation_type == 'mmap': | 664 if allocation_type == 'mmap': |
641 mmap = True | 665 mmap = True |
642 elif policy_version == POLICY_DEEP_1: | 666 elif policy_version == POLICY_DEEP_1: |
643 name = line.split()[0] | 667 name = line.split()[0] |
644 pattern = line[len(name) : len(line)].strip() | 668 pattern = line[len(name) : len(line)].strip() |
645 mmap = False | 669 mmap = False |
646 | 670 |
647 if pattern != 'default': | 671 if pattern != 'default': |
648 policy_list.append(Policy(name, mmap, pattern)) | 672 rule_list.append(Rule(name, mmap, pattern)) |
649 if components.count(name) == 0: | 673 if components.count(name) == 0: |
650 components.append(name) | 674 components.append(name) |
651 | 675 |
652 else: | 676 else: |
653 sys.stderr.write(' invalid heap profile policy version: %s\n' % ( | 677 sys.stderr.write(' invalid heap profile policy version: %s\n' % ( |
654 policy_version)) | 678 policy_version)) |
655 | 679 |
656 return policy_list | 680 return rule_list, policy_version, components |
657 | 681 |
658 | 682 |
659 def main(): | 683 def find_prefix(path): |
660 if (len(sys.argv) < 4) or (not (sys.argv[1] in ['--csv', | 684 return re.sub('\.[0-9][0-9][0-9][0-9]\.heap', '', path) |
661 '--json', | |
662 '--expand', | |
663 '--list', | |
664 '--stacktrace', | |
665 '--pprof'])): | |
666 sys.stderr.write("""Usage: | |
667 %s [options] <chrome-binary> <policy> <profile> [component-name] [depth] | |
668 | 685 |
669 Options: | |
670 --csv Output result in csv format | |
671 --json Output result in json format | |
672 --stacktrace Convert raw address to symbol names | |
673 --list Lists components and their sizes | |
674 --expand Show all stacktraces in the specified component | |
675 of given depth with their sizes | |
676 --pprof Format the profile file so it can be processed | |
677 by pprof | |
678 | 686 |
679 Examples: | 687 def load_buckets(prefix): |
680 dmprof --csv Debug/chrome dmpolicy hprof.12345.0001.heap > result.csv | |
681 dmprof --json Debug/chrome dmpolicy hprof.12345.0001.heap > result.json | |
682 dmprof --list Debug/chrome dmpolicy hprof.12345.0012.heap | |
683 dmprof --expand Debug/chrome dmpolicy hprof.12345.0012.heap tc-webkit 4 | |
684 dmprof --pprof Debug/chrome dmpolicy hprof.12345.0012.heap > for_pprof.txt | |
685 """ % (sys.argv[0])) | |
686 sys.exit(1) | |
687 | |
688 action = sys.argv[1] | |
689 chrome_path = sys.argv[2] | |
690 policy_path = sys.argv[3] | |
691 log_path = sys.argv[4] | |
692 | |
693 sys.stderr.write('parsing a policy file\n') | |
694 policy_list = parse_policy(policy_path) | |
695 | |
696 p = re.compile('\.[0-9][0-9][0-9][0-9]\.heap') | |
697 prefix = p.sub('', log_path) | |
698 symbol_path = prefix + '.symbols' | |
699 | |
700 sys.stderr.write('parsing the maps file\n') | |
701 maps_path = prefix + '.maps' | |
702 with open(maps_path, 'r') as maps_f: | |
703 maps_lines = maps_f.readlines() | |
704 | |
705 # Reading buckets | 688 # Reading buckets |
706 sys.stderr.write('parsing the bucket file\n') | 689 sys.stderr.write('Loading bucket files.\n') |
707 buckets = {} | 690 buckets = {} |
708 bucket_count = 0 | 691 bucket_count = 0 |
709 n = 0 | 692 n = 0 |
710 while True: | 693 while True: |
711 buckets_path = '%s.%04d.buckets' % (prefix, n) | 694 buckets_path = '%s.%04d.buckets' % (prefix, n) |
712 if not os.path.exists(buckets_path): | 695 if not os.path.exists(buckets_path): |
713 if n > 10: | 696 if n > 10: |
714 break | 697 break |
715 n += 1 | 698 n += 1 |
716 continue | 699 continue |
717 sys.stderr.write('reading buckets from %s\n' % (buckets_path)) | 700 sys.stderr.write(' %s\n' % buckets_path) |
718 with open(buckets_path, 'r') as buckets_f: | 701 with open(buckets_path, 'r') as buckets_f: |
719 for line in buckets_f: | 702 for line in buckets_f: |
720 words = line.split() | 703 words = line.split() |
721 buckets[int(words[0])] = Bucket(words[2:], words[1] == 'mmap') | 704 buckets[int(words[0])] = Bucket(words[2:], words[1] == 'mmap') |
722 n += 1 | 705 n += 1 |
723 | 706 |
724 log_path_list = [log_path] | 707 return buckets |
725 | 708 |
726 if action in ('--csv', '--json'): | 709 |
727 # search for the sequence of files | 710 def determine_dump_path_list(dump_path, prefix): |
728 n = int(log_path[len(log_path) - 9 : len(log_path) - 5]) | 711 dump_path_list = [dump_path] |
729 n += 1 # skip current file | 712 |
730 while True: | 713 # search for the sequence of files |
731 p = '%s.%04d.heap' % (prefix, n) | 714 n = int(dump_path[len(dump_path) - 9 : len(dump_path) - 5]) |
732 if os.path.exists(p): | 715 n += 1 # skip current file |
733 log_path_list.append(p) | 716 while True: |
734 else: | 717 p = '%s.%04d.heap' % (prefix, n) |
735 break | 718 if os.path.exists(p): |
736 n += 1 | 719 dump_path_list.append(p) |
737 | |
738 logs = [] | |
739 for path in log_path_list: | |
740 new_log = Log(path) | |
741 sys.stderr.write('Parsing a dump: %s\n' % path) | |
742 try: | |
743 new_log.parse_log(buckets) | |
744 except EmptyDumpException: | |
745 sys.stderr.write(' WARNING: ignored an empty dump: %s\n' % path) | |
746 except ParsingException, e: | |
747 sys.stderr.write(' Error in parsing heap profile dump: %s\n' % e) | |
748 sys.exit(1) | |
749 else: | 720 else: |
750 logs.append(new_log) | 721 break |
751 | 722 n += 1 |
752 sys.stderr.write('getting symbols\n') | 723 |
753 update_symbols(symbol_path, maps_lines, maps_path) | 724 return dump_path_list |
754 | 725 |
755 # TODO(dmikurube): Many modes now. Split them into separete functions. | 726 |
756 if action == '--stacktrace': | 727 def load_single_dump(dump_path, buckets, appeared_addresses): |
757 logs[0].dump_stacktrace(buckets) | 728 new_dump = Dump(dump_path) |
758 | 729 try: |
759 elif action == '--csv': | 730 new_dump.parse_dump(buckets, appeared_addresses) |
760 sys.stdout.write(','.join(components)) | 731 except EmptyDumpException: |
761 sys.stdout.write('\n') | 732 sys.stderr.write('... ignored an empty dump') |
762 | 733 except ParsingException, e: |
763 for log in logs: | 734 sys.stderr.write('... error in parsing: %s' % e) |
764 component_sizes = log.apply_policy(policy_list, buckets, logs[0].log_time) | 735 sys.exit(1) |
736 else: | |
737 sys.stderr.write(' (version: %s)' % new_dump.dump_version) | |
738 | |
739 return new_dump | |
740 | |
741 | |
742 def load_dump(dump_path, buckets): | |
743 sys.stderr.write('Loading a heap dump file: "%s"' % dump_path) | |
744 appeared_addresses = set() | |
745 dump = load_single_dump(dump_path, buckets, appeared_addresses) | |
746 sys.stderr.write('.\n') | |
747 return dump, appeared_addresses | |
748 | |
749 | |
750 def load_dumps(dump_path_list, buckets): | |
751 sys.stderr.write('Loading heap dump files.\n') | |
752 appeared_addresses = set() | |
753 dumps = [] | |
754 for path in dump_path_list: | |
755 sys.stderr.write(' %s' % path) | |
756 dumps.append(load_single_dump(path, buckets, appeared_addresses)) | |
757 sys.stderr.write('\n') | |
758 return dumps, appeared_addresses | |
759 | |
760 | |
761 def load_and_update_symbol_cache(prefix, appeared_addresses): | |
762 maps_path = prefix + '.maps' | |
763 symbol_path = prefix + '.symbols' | |
764 sys.stderr.write('Loading and updating symbol cache: "%s".\n' % symbol_path) | |
765 symbols = {} | |
766 update_symbols(symbol_path, maps_path, appeared_addresses, symbols) | |
767 return symbols | |
768 | |
769 | |
770 def load_default_policies(): | |
771 with open(POLICIES_JSON_PATH, mode='r') as policies_f: | |
772 default_policies = json.load(policies_f) | |
773 return default_policies | |
774 | |
775 | |
776 def load_policy(policies_dict, policy_label): | |
777 policy_file = policies_dict[policy_label]['file'] | |
778 policy_path = os.path.join(os.path.dirname(__file__), policy_file) | |
779 rule_list, policy_version, components = parse_policy(policy_path) | |
780 sys.stderr.write(' %s: %s (version: %s)\n' % | |
781 (policy_label, policy_path, policy_version)) | |
782 return Policy(rule_list, policy_version, components) | |
783 | |
784 | |
785 def load_policies_dict(policies_dict): | |
786 sys.stderr.write('Loading policy files.\n') | |
787 policies = {} | |
788 for policy_label in policies_dict: | |
789 policies[policy_label] = load_policy(policies_dict, policy_label) | |
790 return policies | |
791 | |
792 | |
793 def load_policies(options_policy): | |
794 default_policies = load_default_policies() | |
795 if options_policy: | |
796 policy_labels = options_policy.split(',') | |
797 specified_policies = {} | |
798 for specified_policy in policy_labels: | |
799 if specified_policy in default_policies: | |
800 specified_policies[specified_policy] = ( | |
801 default_policies[specified_policy]) | |
802 policies = load_policies_dict(specified_policies) | |
803 else: | |
804 policies = load_policies_dict(default_policies) | |
805 return policies | |
806 | |
807 | |
808 def do_stacktrace(sys_argv): | |
809 parser = OptionParser(usage='Usage: %prog stacktrace <dump>') | |
810 options, args = parser.parse_args(sys_argv) | |
811 | |
812 if len(args) < 2: | |
813 parser.error('needs 1 argument.') | |
814 | |
815 dump_path = args[1] | |
816 | |
817 prefix = find_prefix(dump_path) | |
818 buckets = load_buckets(prefix) | |
819 dump, appeared_addresses = load_dump(dump_path, buckets) | |
820 symbols = load_and_update_symbol_cache(prefix, appeared_addresses) | |
821 | |
822 dump.print_stacktrace(buckets, symbols) | |
823 | |
824 return 0 | |
825 | |
826 | |
827 def do_csv(sys_argv): | |
828 parser = OptionParser('Usage: %prog csv [-p POLICY] <first-dump>') | |
829 parser.add_option('-p', '--policy', type='string', dest='policy', | |
830 help='profile with POLICY', metavar='POLICY') | |
831 options, args = parser.parse_args(sys_argv) | |
832 | |
833 if len(args) < 2: | |
M-A Ruel
2012/07/24 14:10:53
what with 10 args?
Dai Mikurube (NOT FULLTIME)
2012/07/24 14:53:45
It just ignores extra args. Should it warn or abo
M-A Ruel
2012/07/24 14:57:59
Please abort. Unless it is necessary to ignore dur
Dai Mikurube (NOT FULLTIME)
2012/07/24 16:19:24
Done.
| |
834 parser.error('needs 1 argument.') | |
835 | |
836 dump_path = args[1] | |
837 | |
838 prefix = find_prefix(dump_path) | |
839 buckets = load_buckets(prefix) | |
840 dumps, appeared_addresses = load_dumps( | |
841 determine_dump_path_list(dump_path, prefix), buckets) | |
842 symbols = load_and_update_symbol_cache(prefix, appeared_addresses) | |
843 policies = load_policies(options.policy) | |
844 | |
845 max_components = 0 | |
846 for policy in policies: | |
847 max_components = max(max_components, len(policies[policy].components)) | |
848 | |
849 for policy in sorted(policies): | |
850 rule_list = policies[policy].rules | |
851 components = policies[policy].components | |
852 | |
853 if len(policies) > 1: | |
854 sys.stdout.write('%s%s\n' % (policy, ',' * (max_components - 1))) | |
855 sys.stdout.write('%s%s\n' % ( | |
856 ','.join(components), ',' * (max_components - len(components)))) | |
857 | |
858 for dump in dumps: | |
859 component_sizes = dump.apply_policy( | |
860 rule_list, buckets, dumps[0].dump_time, components, symbols) | |
765 s = [] | 861 s = [] |
766 for c in components: | 862 for c in components: |
767 if c in ('hour', 'minute', 'second'): | 863 if c in ('hour', 'minute', 'second'): |
768 s.append('%05.5f' % (component_sizes[c])) | 864 s.append('%05.5f' % (component_sizes[c])) |
769 else: | 865 else: |
770 s.append('%05.5f' % (component_sizes[c] / 1024.0 / 1024.0)) | 866 s.append('%05.5f' % (component_sizes[c] / 1024.0 / 1024.0)) |
771 sys.stdout.write(','.join(s)) | 867 sys.stdout.write('%s%s\n' % ( |
772 sys.stdout.write('\n') | 868 ','.join(s), ',' * (max_components - len(components)))) |
773 | 869 |
774 elif action == '--json': | 870 for bucket in buckets.itervalues(): |
775 json_base = { | 871 bucket.clear_component_cache() |
776 'version': 'JSON_DEEP_1', | 872 |
873 return 0 | |
874 | |
875 | |
876 def do_json(sys_argv): | |
877 parser = OptionParser('Usage: %prog json [-p POLICY] <first-dump>') | |
878 parser.add_option('-p', '--policy', type='string', dest='policy', | |
879 help='profile with POLICY', metavar='POLICY') | |
880 options, args = parser.parse_args(sys_argv) | |
881 | |
882 if len(args) < 2: | |
883 parser.error('needs 1 argument.') | |
884 | |
885 dump_path = args[1] | |
886 | |
887 prefix = find_prefix(dump_path) | |
888 buckets = load_buckets(prefix) | |
889 dumps, appeared_addresses = load_dumps( | |
890 determine_dump_path_list(dump_path, prefix), buckets) | |
891 symbols = load_and_update_symbol_cache(prefix, appeared_addresses) | |
892 policies = load_policies(options.policy) | |
893 | |
894 json_base = { | |
895 'version': 'JSON_DEEP_2', | |
896 'policies': {}, | |
897 } | |
898 | |
899 for policy in sorted(policies): | |
900 rule_list = policies[policy].rules | |
901 components = policies[policy].components | |
902 | |
903 json_base['policies'][policy] = { | |
777 'legends': components, | 904 'legends': components, |
778 'snapshots': [], | 905 'snapshots': [], |
779 } | 906 } |
780 for log in logs: | 907 |
781 component_sizes = log.apply_policy(policy_list, buckets, logs[0].log_time) | 908 for dump in dumps: |
782 component_sizes['log_path'] = log.log_path | 909 component_sizes = dump.apply_policy( |
783 component_sizes['log_time'] = datetime.fromtimestamp( | 910 rule_list, buckets, dumps[0].dump_time, components, symbols) |
784 log.log_time).strftime('%Y-%m-%d %H:%M:%S') | 911 component_sizes['dump_path'] = dump.dump_path |
785 json_base['snapshots'].append(component_sizes) | 912 component_sizes['dump_time'] = datetime.fromtimestamp( |
786 json.dump(json_base, sys.stdout, indent=2, sort_keys=True) | 913 dump.dump_time).strftime('%Y-%m-%d %H:%M:%S') |
787 | 914 json_base['policies'][policy]['snapshots'].append(component_sizes) |
788 elif action == '--list': | 915 |
789 component_sizes = logs[0].apply_policy( | 916 for bucket in buckets.itervalues(): |
790 policy_list, buckets, logs[0].log_time) | 917 bucket.clear_component_cache() |
918 | |
919 json.dump(json_base, sys.stdout, indent=2, sort_keys=True) | |
920 | |
921 return 0 | |
922 | |
923 | |
924 def do_list(sys_argv): | |
925 parser = OptionParser('Usage: %prog [-p POLICY] list <first-dump>') | |
926 parser.add_option('-p', '--policy', type='string', dest='policy', | |
927 help='profile with POLICY', metavar='POLICY') | |
928 options, args = parser.parse_args(sys_argv) | |
929 | |
930 if len(args) < 2: | |
931 parser.error('needs 1 argument.') | |
932 | |
933 dump_path = args[1] | |
934 | |
935 prefix = find_prefix(dump_path) | |
936 buckets = load_buckets(prefix) | |
937 dumps, appeared_addresses = load_dumps( | |
938 determine_dump_path_list(dump_path, prefix), buckets) | |
939 symbols = load_and_update_symbol_cache(prefix, appeared_addresses) | |
940 policies = load_policies(options.policy) | |
941 | |
942 for policy in sorted(policies): | |
943 rule_list = policies[policy].rules | |
944 components = policies[policy].components | |
945 | |
946 component_sizes = dumps[0].apply_policy( | |
947 rule_list, buckets, dumps[0].dump_time, components, symbols) | |
948 sys.stdout.write('%s:\n' % policy) | |
791 for c in components: | 949 for c in components: |
792 if c in ['hour', 'minute', 'second']: | 950 if c in ['hour', 'minute', 'second']: |
793 sys.stdout.write('%30s %10.3f\n' % (c, component_sizes[c])) | 951 sys.stdout.write('%30s %10.3f\n' % (c, component_sizes[c])) |
794 else: | 952 else: |
795 sys.stdout.write('%30s %10.3f\n' % ( | 953 sys.stdout.write('%30s %10.3f\n' % ( |
796 c, component_sizes[c] / 1024.0 / 1024.0)) | 954 c, component_sizes[c] / 1024.0 / 1024.0)) |
797 | 955 |
798 elif action == '--expand': | 956 for bucket in buckets.itervalues(): |
799 component_name = sys.argv[5] | 957 bucket.clear_component_cache() |
800 depth = sys.argv[6] | |
801 logs[0].expand(policy_list, buckets, component_name, int(depth)) | |
802 | 958 |
803 elif action == '--pprof': | 959 return 0 |
804 if len(sys.argv) > 5: | 960 |
805 logs[0].dump_for_pprof(policy_list, buckets, maps_lines, sys.argv[5]) | 961 |
806 else: | 962 def do_expand(sys_argv): |
807 logs[0].dump_for_pprof(policy_list, buckets, maps_lines, None) | 963 parser = OptionParser( |
964 'Usage: %prog expand <dump> <policy> <component> <depth>') | |
965 options, args = parser.parse_args(sys_argv) | |
966 | |
967 if len(args) < 5: | |
968 parser.error('needs 4 arguments.') | |
969 | |
970 dump_path = args[1] | |
971 target_policy = args[2] | |
972 component_name = args[3] | |
973 depth = args[4] | |
974 | |
975 prefix = find_prefix(dump_path) | |
976 buckets = load_buckets(prefix) | |
977 dump, appeared_addresses = load_dump(dump_path, buckets) | |
978 symbols = load_and_update_symbol_cache(prefix, appeared_addresses) | |
979 policies = load_policies(target_policy) | |
980 | |
981 rule_list = policies[target_policy].rules | |
982 | |
983 dump.expand(rule_list, buckets, component_name, int(depth), symbols) | |
984 | |
985 return 0 | |
986 | |
987 | |
988 def do_pprof(sys_argv): | |
989 parser = OptionParser( | |
990 'Usage: %prog pprof [-c COMPONENT] <dump> <policy>') | |
991 parser.add_option('-c', '--component', type='string', dest='component', | |
992 help='restrict to COMPONENT', metavar='COMPONENT') | |
993 options, args = parser.parse_args(sys_argv) | |
994 | |
995 if len(args) < 3: | |
996 parser.error('needs 2 arguments.') | |
997 | |
998 dump_path = args[1] | |
999 target_policy = args[2] | |
1000 component = options.component | |
1001 | |
1002 prefix = find_prefix(dump_path) | |
1003 buckets = load_buckets(prefix) | |
1004 dump, appeared_addresses = load_dump(dump_path, buckets) | |
1005 symbols = load_and_update_symbol_cache(prefix, appeared_addresses) | |
1006 policies = load_policies(target_policy) | |
1007 | |
1008 rule_list = policies[target_policy].rules | |
1009 | |
1010 with open(prefix + '.maps', 'r') as maps_f: | |
1011 maps_lines = maps_f.readlines() | |
1012 dump.print_for_pprof(rule_list, buckets, maps_lines, component, symbols) | |
1013 | |
1014 return 0 | |
1015 | |
1016 | |
1017 def main(): | |
1018 COMMANDS = { | |
1019 'csv': do_csv, | |
1020 'expand': do_expand, | |
1021 'json': do_json, | |
1022 'list': do_list, | |
1023 'pprof': do_pprof, | |
1024 'stacktrace': do_stacktrace, | |
1025 } | |
1026 | |
1027 # TODO(dmikurube): Remove this message after a while. | |
1028 if len(sys.argv) >= 2 and sys.argv[1].startswith('--'): | |
1029 sys.stderr.write(""" | |
1030 **************** NOTICE!! **************** | |
1031 The command line format has changed. | |
1032 Please look at the description below. | |
1033 ****************************************** | |
1034 | |
1035 """) | |
1036 | |
1037 if len(sys.argv) < 2 or (not sys.argv[1] in COMMANDS): | |
1038 sys.stderr.write("""Usage: %s <command> [options] [<args>] | |
1039 | |
1040 Commands: | |
1041 csv Classify memory usage in CSV | |
1042 expand Show all stacktraces contained in the specified component | |
1043 json Classify memory usage in JSON | |
1044 list Classify memory usage in simple listing format | |
1045 pprof Format the profile dump so that it can be processed by pprof | |
1046 stacktrace Convert runtime addresses to symbol names | |
1047 | |
1048 Quick Reference: | |
1049 dmprof csv [-p POLICY] <first-dump> | |
1050 dmprof expand <dump> <policy> <component> <depth> | |
1051 dmprof json [-p POLICY] <first-dump> | |
1052 dmprof list [-p POLICY] <first-dump> | |
1053 dmprof pprof [-c COMPONENT] <dump> <policy> | |
1054 dmprof stacktrace <dump> | |
1055 """ % (sys.argv[0])) | |
1056 sys.exit(1) | |
1057 action = sys.argv.pop(1) | |
1058 | |
1059 return COMMANDS[action](sys.argv) | |
808 | 1060 |
809 | 1061 |
810 if __name__ == '__main__': | 1062 if __name__ == '__main__': |
811 sys.exit(main()) | 1063 sys.exit(main()) |
OLD | NEW |