OLD | NEW |
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
5 | 5 |
6 """The deep heap profiler script for Chrome.""" | 6 """The deep heap profiler script for Chrome.""" |
7 | 7 |
8 from datetime import datetime | 8 from datetime import datetime |
9 import json | 9 import json |
| 10 import optparse |
10 import os | 11 import os |
11 import re | 12 import re |
12 import shutil | 13 import shutil |
13 import subprocess | 14 import subprocess |
14 import sys | 15 import sys |
15 import tempfile | 16 import tempfile |
16 | 17 |
17 FIND_RUNTIME_SYMBOLS_PATH = os.path.join( | 18 FIND_RUNTIME_SYMBOLS_PATH = os.path.join( |
18 os.path.dirname(os.path.abspath(__file__)), | 19 os.path.dirname(os.path.abspath(__file__)), |
19 os.pardir, | 20 os.pardir, |
20 'find_runtime_symbols') | 21 'find_runtime_symbols') |
21 sys.path.append(FIND_RUNTIME_SYMBOLS_PATH) | 22 sys.path.append(FIND_RUNTIME_SYMBOLS_PATH) |
22 | 23 |
23 from prepare_symbol_info import prepare_symbol_info | 24 from prepare_symbol_info import prepare_symbol_info |
24 from find_runtime_symbols import find_runtime_symbols_list | 25 from find_runtime_symbols import find_runtime_symbols_list |
25 | 26 |
26 BUCKET_ID = 5 | 27 BUCKET_ID = 5 |
27 VIRTUAL = 0 | 28 VIRTUAL = 0 |
28 COMMITTED = 1 | 29 COMMITTED = 1 |
29 ALLOC_COUNT = 2 | 30 ALLOC_COUNT = 2 |
30 FREE_COUNT = 3 | 31 FREE_COUNT = 3 |
31 NULL_REGEX = re.compile('') | 32 NULL_REGEX = re.compile('') |
32 | 33 |
| 34 POLICIES_JSON_PATH = os.path.join( |
| 35 os.path.dirname(os.path.abspath(__file__)), |
| 36 'policies.json') |
| 37 |
33 # Heap Profile Dump versions | 38 # Heap Profile Dump versions |
34 | 39 |
35 # DUMP_DEEP_1 is OBSOLETE. | 40 # DUMP_DEEP_1 is OBSOLETE. |
36 # DUMP_DEEP_1 DOES NOT distinct mmap regions and malloc chunks. | 41 # DUMP_DEEP_1 DOES NOT distinct mmap regions and malloc chunks. |
37 # Their stacktraces DO contain mmap* or tc-* at their tops. | 42 # Their stacktraces DO contain mmap* or tc-* at their tops. |
38 # They should be processed by POLICY_DEEP_1. | 43 # They should be processed by POLICY_DEEP_1. |
39 DUMP_DEEP_1 = 'DUMP_DEEP_1' | 44 DUMP_DEEP_1 = 'DUMP_DEEP_1' |
40 | 45 |
41 # DUMP_DEEP_2 is OBSOLETE. | 46 # DUMP_DEEP_2 is OBSOLETE. |
42 # DUMP_DEEP_2 DOES distinct mmap regions and malloc chunks. | 47 # DUMP_DEEP_2 DOES distinct mmap regions and malloc chunks. |
(...skipping 22 matching lines...) Expand all Loading... |
65 # Heap Profile Policy versions | 70 # Heap Profile Policy versions |
66 | 71 |
67 # POLICY_DEEP_1 DOES NOT include allocation_type columns. | 72 # POLICY_DEEP_1 DOES NOT include allocation_type columns. |
68 # mmap regions are distincted w/ mmap frames in the pattern column. | 73 # mmap regions are distincted w/ mmap frames in the pattern column. |
69 POLICY_DEEP_1 = 'POLICY_DEEP_1' | 74 POLICY_DEEP_1 = 'POLICY_DEEP_1' |
70 | 75 |
71 # POLICY_DEEP_2 DOES include allocation_type columns. | 76 # POLICY_DEEP_2 DOES include allocation_type columns. |
72 # mmap regions are distincted w/ the allocation_type column. | 77 # mmap regions are distincted w/ the allocation_type column. |
73 POLICY_DEEP_2 = 'POLICY_DEEP_2' | 78 POLICY_DEEP_2 = 'POLICY_DEEP_2' |
74 | 79 |
75 # TODO(dmikurube): Avoid global variables. | |
76 address_symbol_dict = {} | |
77 appeared_addresses = set() | |
78 components = [] | |
79 | |
80 | 80 |
81 class EmptyDumpException(Exception): | 81 class EmptyDumpException(Exception): |
82 def __init__(self, value): | 82 def __init__(self, value): |
83 self.value = value | 83 self.value = value |
84 def __str__(self): | 84 def __str__(self): |
85 return repr(self.value) | 85 return repr(self.value) |
86 | 86 |
87 | 87 |
88 class ParsingException(Exception): | 88 class ParsingException(Exception): |
89 def __init__(self, value): | 89 def __init__(self, value): |
90 self.value = value | 90 self.value = value |
91 def __str__(self): | 91 def __str__(self): |
92 return repr(self.value) | 92 return repr(self.value) |
93 | 93 |
94 | 94 |
95 class InvalidDumpException(ParsingException): | 95 class InvalidDumpException(ParsingException): |
96 def __init__(self, value): | 96 def __init__(self, value): |
97 self.value = value | 97 self.value = value |
98 def __str__(self): | 98 def __str__(self): |
99 return "invalid heap profile dump: %s" % repr(self.value) | 99 return "invalid heap profile dump: %s" % repr(self.value) |
100 | 100 |
101 | 101 |
102 class ObsoleteDumpVersionException(ParsingException): | 102 class ObsoleteDumpVersionException(ParsingException): |
103 def __init__(self, value): | 103 def __init__(self, value): |
104 self.value = value | 104 self.value = value |
105 def __str__(self): | 105 def __str__(self): |
106 return "obsolete heap profile dump version: %s" % repr(self.value) | 106 return "obsolete heap profile dump version: %s" % repr(self.value) |
107 | 107 |
108 | 108 |
109 class Policy(object): | 109 class Rule(object): |
| 110 """Represents one matching rule in a policy file.""" |
110 | 111 |
111 def __init__(self, name, mmap, pattern): | 112 def __init__(self, name, mmap, pattern): |
112 self.name = name | 113 self.name = name |
113 self.mmap = mmap | 114 self.mmap = mmap |
114 self.condition = re.compile(pattern + r'\Z') | 115 self.condition = re.compile(pattern + r'\Z') |
115 | 116 |
116 | 117 |
117 def get_component(policy_list, bucket): | 118 class Policy(object): |
| 119 """Represents a policy, a content of a policy file.""" |
| 120 |
| 121 def __init__(self, rules, version, components): |
| 122 self.rules = rules |
| 123 self.version = version |
| 124 self.components = components |
| 125 |
| 126 def append_rule(self, rule): |
| 127 self.rules.append(rule) |
| 128 |
| 129 |
| 130 def get_component(rule_list, bucket, symbols): |
118 """Returns a component name which a given bucket belongs to. | 131 """Returns a component name which a given bucket belongs to. |
119 | 132 |
120 Args: | 133 Args: |
121 policy_list: A list containing Policy objects. (Parsed policy data by | 134 rule_list: A list of Rule objects. |
122 parse_policy.) | |
123 bucket: A Bucket object to be searched for. | 135 bucket: A Bucket object to be searched for. |
| 136 symbols: A dict mapping runtime addresses to symbol names. |
124 | 137 |
125 Returns: | 138 Returns: |
126 A string representing a component name. | 139 A string representing a component name. |
127 """ | 140 """ |
128 if not bucket: | 141 if not bucket: |
129 return 'no-bucket' | 142 return 'no-bucket' |
130 if bucket.component: | 143 if bucket.component_cache: |
131 return bucket.component | 144 return bucket.component_cache |
132 | 145 |
133 stacktrace = ''.join( | 146 stacktrace = ''.join(symbols[a] + ' ' for a in bucket.stacktrace).strip() |
134 address_symbol_dict[a] + ' ' for a in bucket.stacktrace).strip() | |
135 | 147 |
136 for policy in policy_list: | 148 for rule in rule_list: |
137 if bucket.mmap == policy.mmap and policy.condition.match(stacktrace): | 149 if bucket.mmap == rule.mmap and rule.condition.match(stacktrace): |
138 bucket.component = policy.name | 150 bucket.component_cache = rule.name |
139 return policy.name | 151 return rule.name |
140 | 152 |
141 assert False | 153 assert False |
142 | 154 |
143 | 155 |
144 class Bucket(object): | 156 class Bucket(object): |
| 157 """Represents a bucket, which is a unit of memory classification.""" |
145 | 158 |
146 def __init__(self, stacktrace, mmap): | 159 def __init__(self, stacktrace, mmap): |
147 self.stacktrace = stacktrace | 160 self.stacktrace = stacktrace |
148 self.mmap = mmap | 161 self.mmap = mmap |
149 self.component = '' | 162 self.component_cache = '' |
| 163 |
| 164 def clear_component_cache(self): |
| 165 self.component_cache = '' |
150 | 166 |
151 | 167 |
152 class Log(object): | 168 class Dump(object): |
| 169 """Represents one heap profile dump.""" |
153 | 170 |
154 """A class representing one dumped log data.""" | 171 def __init__(self, dump_path): |
155 def __init__(self, log_path): | 172 self.dump_path = dump_path |
156 self.log_path = log_path | 173 self.dump_lines = [ |
157 self.log_lines = [ | 174 l for l in open(self.dump_path, 'r') if l and not l.startswith('#')] |
158 l for l in open(self.log_path, 'r') if l and not l.startswith('#')] | 175 self.dump_version = '' |
159 self.log_version = '' | |
160 sys.stderr.write('Loading a dump: %s\n' % log_path) | |
161 self.stacktrace_lines = [] | 176 self.stacktrace_lines = [] |
162 self.counters = {} | 177 self.counters = {} |
163 self.log_time = os.stat(self.log_path).st_mtime | 178 self.dump_time = os.stat(self.dump_path).st_mtime |
164 | 179 |
165 def dump_stacktrace(buckets): | 180 def print_stacktrace(self, buckets, symbols): |
166 """Prints a given stacktrace. | 181 """Prints a given stacktrace. |
167 | 182 |
168 Args: | 183 Args: |
169 buckets: A dict mapping bucket ids and their corresponding Bucket | 184 buckets: A dict mapping bucket ids to Bucket objects. |
170 objects. | 185 symbols: A dict mapping runtime addresses to symbol names. |
171 """ | 186 """ |
172 for line in self.stacktrace_lines: | 187 for line in self.stacktrace_lines: |
173 words = line.split() | 188 words = line.split() |
174 bucket = buckets.get(int(words[BUCKET_ID])) | 189 bucket = buckets.get(int(words[BUCKET_ID])) |
175 if not bucket: | 190 if not bucket: |
176 continue | 191 continue |
177 for i in range(0, BUCKET_ID - 1): | 192 for i in range(0, BUCKET_ID - 1): |
178 sys.stdout.write(words[i] + ' ') | 193 sys.stdout.write(words[i] + ' ') |
179 for address in bucket.stacktrace: | 194 for address in bucket.stacktrace: |
180 sys.stdout.write((address_symbol_dict.get(address) or address) + ' ') | 195 sys.stdout.write((symbols.get(address) or address) + ' ') |
181 sys.stdout.write('\n') | 196 sys.stdout.write('\n') |
182 | 197 |
183 @staticmethod | 198 @staticmethod |
184 def accumulate_size_for_pprof(stacktrace_lines, policy_list, buckets, | 199 def accumulate_size_for_pprof(stacktrace_lines, rule_list, buckets, |
185 component_name): | 200 component_name, symbols): |
186 """Accumulates size of committed chunks and the number of allocated chunks. | 201 """Accumulates size of committed chunks and the number of allocated chunks. |
187 | 202 |
188 Args: | 203 Args: |
189 stacktrace_lines: A list of strings which are valid as stacktraces. | 204 stacktrace_lines: A list of strings which are valid as stacktraces. |
190 policy_list: A list containing Policy objects. (Parsed policy data by | 205 rule_list: A list of Rule objects. |
191 parse_policy.) | 206 buckets: A dict mapping bucket ids to Bucket objects. |
192 buckets: A dict mapping bucket ids and their corresponding Bucket | |
193 objects. | |
194 component_name: A name of component for filtering. | 207 component_name: A name of component for filtering. |
| 208 symbols: A dict mapping runtime addresses to symbol names. |
195 | 209 |
196 Returns: | 210 Returns: |
197 Two integers which are the accumulated size of committed regions and the | 211 Two integers which are the accumulated size of committed regions and the |
198 number of allocated chunks, respectively. | 212 number of allocated chunks, respectively. |
199 """ | 213 """ |
200 com_committed = 0 | 214 com_committed = 0 |
201 com_allocs = 0 | 215 com_allocs = 0 |
202 for line in stacktrace_lines: | 216 for line in stacktrace_lines: |
203 words = line.split() | 217 words = line.split() |
204 bucket = buckets.get(int(words[BUCKET_ID])) | 218 bucket = buckets.get(int(words[BUCKET_ID])) |
205 if (not bucket or | 219 if (not bucket or |
206 (component_name and | 220 (component_name and |
207 component_name != get_component(policy_list, bucket))): | 221 component_name != get_component(rule_list, bucket, symbols))): |
208 continue | 222 continue |
209 | 223 |
210 com_committed += int(words[COMMITTED]) | 224 com_committed += int(words[COMMITTED]) |
211 com_allocs += int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]) | 225 com_allocs += int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]) |
212 | 226 |
213 return com_committed, com_allocs | 227 return com_committed, com_allocs |
214 | 228 |
215 @staticmethod | 229 @staticmethod |
216 def dump_stacktrace_lines_for_pprof(stacktrace_lines, policy_list, | 230 def print_stacktrace_lines_for_pprof(stacktrace_lines, rule_list, |
217 buckets, component_name): | 231 buckets, component_name, symbols): |
218 """Prints information of stacktrace lines for pprof. | 232 """Prints information of stacktrace lines for pprof. |
219 | 233 |
220 Args: | 234 Args: |
221 stacktrace_lines: A list of strings which are valid as stacktraces. | 235 stacktrace_lines: A list of strings which are valid as stacktraces. |
222 policy_list: A list containing Policy objects. (Parsed policy data by | 236 rule_list: A list of Rule objects. |
223 parse_policy.) | 237 buckets: A dict mapping bucket ids to Bucket objects. |
224 buckets: A dict mapping bucket ids and their corresponding Bucket | |
225 objects. | |
226 component_name: A name of component for filtering. | 238 component_name: A name of component for filtering. |
| 239 symbols: A dict mapping runtime addresses to symbol names. |
227 """ | 240 """ |
228 for line in stacktrace_lines: | 241 for line in stacktrace_lines: |
229 words = line.split() | 242 words = line.split() |
230 bucket = buckets.get(int(words[BUCKET_ID])) | 243 bucket = buckets.get(int(words[BUCKET_ID])) |
231 if (not bucket or | 244 if (not bucket or |
232 (component_name and | 245 (component_name and |
233 component_name != get_component(policy_list, bucket))): | 246 component_name != get_component(rule_list, bucket, symbols))): |
234 continue | 247 continue |
235 | 248 |
236 sys.stdout.write('%6d: %8s [%6d: %8s] @' % ( | 249 sys.stdout.write('%6d: %8s [%6d: %8s] @' % ( |
237 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]), | 250 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]), |
238 words[COMMITTED], | 251 words[COMMITTED], |
239 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]), | 252 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]), |
240 words[COMMITTED])) | 253 words[COMMITTED])) |
241 for address in bucket.stacktrace: | 254 for address in bucket.stacktrace: |
242 sys.stdout.write(' ' + address) | 255 sys.stdout.write(' ' + address) |
243 sys.stdout.write('\n') | 256 sys.stdout.write('\n') |
244 | 257 |
245 def dump_for_pprof(self, policy_list, buckets, mapping_lines, component_name): | 258 def print_for_pprof( |
246 """Converts the log file so it can be processed by pprof. | 259 self, rule_list, buckets, maps_lines, component_name, symbols): |
| 260 """Converts the heap profile dump so it can be processed by pprof. |
247 | 261 |
248 Args: | 262 Args: |
249 policy_list: A list containing Policy objects. (Parsed policy data by | 263 rule_list: A list of Rule objects. |
250 parse_policy.) | 264 buckets: A dict mapping bucket ids to Bucket objects. |
251 buckets: A dict mapping bucket ids and their corresponding Bucket | 265 maps_lines: A list of strings containing /proc/.../maps. |
252 objects. | |
253 mapping_lines: A list of strings containing /proc/.../maps. | |
254 component_name: A name of component for filtering. | 266 component_name: A name of component for filtering. |
| 267 symbols: A dict mapping runtime addresses to symbol names. |
255 """ | 268 """ |
256 sys.stdout.write('heap profile: ') | 269 sys.stdout.write('heap profile: ') |
257 com_committed, com_allocs = self.accumulate_size_for_pprof( | 270 com_committed, com_allocs = self.accumulate_size_for_pprof( |
258 self.stacktrace_lines, policy_list, buckets, component_name) | 271 self.stacktrace_lines, rule_list, buckets, component_name, symbols) |
259 | 272 |
260 sys.stdout.write('%6d: %8s [%6d: %8s] @ heapprofile\n' % ( | 273 sys.stdout.write('%6d: %8s [%6d: %8s] @ heapprofile\n' % ( |
261 com_allocs, com_committed, com_allocs, com_committed)) | 274 com_allocs, com_committed, com_allocs, com_committed)) |
262 | 275 |
263 self.dump_stacktrace_lines_for_pprof( | 276 self.print_stacktrace_lines_for_pprof( |
264 self.stacktrace_lines, policy_list, buckets, component_name) | 277 self.stacktrace_lines, rule_list, buckets, component_name, symbols) |
265 | 278 |
266 sys.stdout.write('MAPPED_LIBRARIES:\n') | 279 sys.stdout.write('MAPPED_LIBRARIES:\n') |
267 for line in mapping_lines: | 280 for line in maps_lines: |
268 sys.stdout.write(line) | 281 sys.stdout.write(line) |
269 | 282 |
270 @staticmethod | 283 @staticmethod |
271 def check_stacktrace_line(stacktrace_line, buckets): | 284 def check_stacktrace_line(stacktrace_line, buckets, appeared_addresses): |
272 """Checks if a given stacktrace_line is valid as stacktrace. | 285 """Checks if a given stacktrace_line is valid as stacktrace. |
273 | 286 |
274 Args: | 287 Args: |
275 stacktrace_line: A string to be checked. | 288 stacktrace_line: A string to be checked. |
276 buckets: A dict mapping bucket ids and their corresponding Bucket | 289 buckets: A dict mapping bucket ids to Bucket objects. |
277 objects. | 290 appeared_addresses: A list where appeared addresses will be stored. |
278 | 291 |
279 Returns: | 292 Returns: |
280 True if the given stacktrace_line is valid. | 293 True if the given stacktrace_line is valid. |
281 """ | 294 """ |
282 words = stacktrace_line.split() | 295 words = stacktrace_line.split() |
283 if len(words) < BUCKET_ID + 1: | 296 if len(words) < BUCKET_ID + 1: |
284 return False | 297 return False |
285 if words[BUCKET_ID - 1] != '@': | 298 if words[BUCKET_ID - 1] != '@': |
286 return False | 299 return False |
287 bucket = buckets.get(int(words[BUCKET_ID])) | 300 bucket = buckets.get(int(words[BUCKET_ID])) |
(...skipping 10 matching lines...) Expand all Loading... |
298 A pair of an integer indicating a line number after skipped, and a | 311 A pair of an integer indicating a line number after skipped, and a |
299 boolean value which is True if found a line which skipping_condition | 312 boolean value which is True if found a line which skipping_condition |
300 is False for. | 313 is False for. |
301 """ | 314 """ |
302 while skipping_condition(line_number): | 315 while skipping_condition(line_number): |
303 line_number += 1 | 316 line_number += 1 |
304 if line_number >= max_line_number: | 317 if line_number >= max_line_number: |
305 return line_number, False | 318 return line_number, False |
306 return line_number, True | 319 return line_number, True |
307 | 320 |
308 def parse_stacktraces_while_valid(self, buckets, log_lines, line_number): | 321 def parse_stacktraces_while_valid( |
| 322 self, buckets, dump_lines, line_number, appeared_addresses): |
309 """Parses stacktrace lines while the lines are valid. | 323 """Parses stacktrace lines while the lines are valid. |
310 | 324 |
311 Args: | 325 Args: |
312 buckets: A dict mapping bucket ids and their corresponding Bucket | 326 buckets: A dict mapping bucket ids to Bucket objects. |
313 objects. | 327 dump_lines: A list of lines to be parsed. |
314 log_lines: A list of lines to be parsed. | 328 line_number: A line number to start parsing in dump_lines. |
315 line_number: An integer representing the starting line number in | 329 appeared_addresses: A list where appeared addresses will be stored. |
316 log_lines. | |
317 | 330 |
318 Returns: | 331 Returns: |
319 A pair of a list of valid lines and an integer representing the last | 332 A pair of a list of valid lines and an integer representing the last |
320 line number in log_lines. | 333 line number in dump_lines. |
321 """ | 334 """ |
322 (line_number, _) = self.skip_lines_while( | 335 (line_number, _) = self.skip_lines_while( |
323 line_number, len(log_lines), | 336 line_number, len(dump_lines), |
324 lambda n: not log_lines[n].split()[0].isdigit()) | 337 lambda n: not dump_lines[n].split()[0].isdigit()) |
325 stacktrace_lines_start = line_number | 338 stacktrace_lines_start = line_number |
326 (line_number, _) = self.skip_lines_while( | 339 (line_number, _) = self.skip_lines_while( |
327 line_number, len(log_lines), | 340 line_number, len(dump_lines), |
328 lambda n: self.check_stacktrace_line(log_lines[n], buckets)) | 341 lambda n: self.check_stacktrace_line( |
329 return (log_lines[stacktrace_lines_start:line_number], line_number) | 342 dump_lines[n], buckets, appeared_addresses)) |
| 343 return (dump_lines[stacktrace_lines_start:line_number], line_number) |
330 | 344 |
331 def parse_stacktraces(self, buckets, line_number): | 345 def parse_stacktraces(self, buckets, line_number, appeared_addresses): |
332 """Parses lines in self.log_lines as stacktrace. | 346 """Parses lines in self.dump_lines as stacktrace. |
333 | 347 |
334 Valid stacktrace lines are stored into self.stacktrace_lines. | 348 Valid stacktrace lines are stored into self.stacktrace_lines. |
335 | 349 |
336 Args: | 350 Args: |
337 buckets: A dict mapping bucket ids and their corresponding Bucket | 351 buckets: A dict mapping bucket ids to Bucket objects. |
338 objects. | 352 line_number: A line number to start parsing in dump_lines. |
339 line_number: An integer representing the starting line number in | 353 appeared_addresses: A list where appeared addresses will be stored. |
340 log_lines. | |
341 | 354 |
342 Raises: | 355 Raises: |
343 ParsingException for invalid dump versions. | 356 ParsingException for invalid dump versions. |
344 """ | 357 """ |
345 sys.stderr.write(' Version: %s\n' % self.log_version) | 358 if self.dump_version == DUMP_DEEP_5: |
346 | |
347 if self.log_version == DUMP_DEEP_5: | |
348 (self.stacktrace_lines, line_number) = ( | 359 (self.stacktrace_lines, line_number) = ( |
349 self.parse_stacktraces_while_valid( | 360 self.parse_stacktraces_while_valid( |
350 buckets, self.log_lines, line_number)) | 361 buckets, self.dump_lines, line_number, appeared_addresses)) |
351 | 362 |
352 elif self.log_version in DUMP_DEEP_OBSOLETE: | 363 elif self.dump_version in DUMP_DEEP_OBSOLETE: |
353 raise ObsoleteDumpVersionException(self.log_version) | 364 raise ObsoleteDumpVersionException(self.dump_version) |
354 | 365 |
355 else: | 366 else: |
356 raise InvalidDumpException('Invalid version: %s' % self.log_version) | 367 raise InvalidDumpException('Invalid version: %s' % self.dump_version) |
357 | 368 |
358 def parse_global_stats(self): | 369 def parse_global_stats(self): |
359 """Parses lines in self.log_lines as global stats.""" | 370 """Parses lines in self.dump_lines as global stats.""" |
360 (ln, _) = self.skip_lines_while( | 371 (ln, _) = self.skip_lines_while( |
361 0, len(self.log_lines), | 372 0, len(self.dump_lines), |
362 lambda n: self.log_lines[n] != 'GLOBAL_STATS:\n') | 373 lambda n: self.dump_lines[n] != 'GLOBAL_STATS:\n') |
363 | 374 |
364 global_stat_names = [ | 375 global_stat_names = [ |
365 'total', 'file-exec', 'file-nonexec', 'anonymous', 'stack', 'other', | 376 'total', 'file-exec', 'file-nonexec', 'anonymous', 'stack', 'other', |
366 'nonprofiled-absent', 'nonprofiled-anonymous', | 377 'nonprofiled-absent', 'nonprofiled-anonymous', |
367 'nonprofiled-file-exec', 'nonprofiled-file-nonexec', | 378 'nonprofiled-file-exec', 'nonprofiled-file-nonexec', |
368 'nonprofiled-stack', 'nonprofiled-other', | 379 'nonprofiled-stack', 'nonprofiled-other', |
369 'profiled-mmap', 'profiled-malloc'] | 380 'profiled-mmap', 'profiled-malloc'] |
370 | 381 |
371 for prefix in global_stat_names: | 382 for prefix in global_stat_names: |
372 (ln, _) = self.skip_lines_while( | 383 (ln, _) = self.skip_lines_while( |
373 ln, len(self.log_lines), | 384 ln, len(self.dump_lines), |
374 lambda n: self.log_lines[n].split()[0] != prefix) | 385 lambda n: self.dump_lines[n].split()[0] != prefix) |
375 words = self.log_lines[ln].split() | 386 words = self.dump_lines[ln].split() |
376 self.counters[prefix + '_virtual'] = int(words[-2]) | 387 self.counters[prefix + '_virtual'] = int(words[-2]) |
377 self.counters[prefix + '_committed'] = int(words[-1]) | 388 self.counters[prefix + '_committed'] = int(words[-1]) |
378 | 389 |
379 def parse_version(self): | 390 def parse_version(self): |
380 """Parses a version string in self.log_lines. | 391 """Parses a version string in self.dump_lines. |
381 | 392 |
382 Returns: | 393 Returns: |
383 A pair of (a string representing a version of the stacktrace dump, | 394 A pair of (a string representing a version of the stacktrace dump, |
384 and an integer indicating a line number next to the version string). | 395 and an integer indicating a line number next to the version string). |
385 | 396 |
386 Raises: | 397 Raises: |
387 ParsingException for invalid dump versions. | 398 ParsingException for invalid dump versions. |
388 """ | 399 """ |
389 version = '' | 400 version = '' |
390 | 401 |
391 # Skip until an identifiable line. | 402 # Skip until an identifiable line. |
392 headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ') | 403 headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ') |
393 if not self.log_lines: | 404 if not self.dump_lines: |
394 raise EmptyDumpException('Empty heap dump file.') | 405 raise EmptyDumpException('Empty heap dump file.') |
395 (ln, found) = self.skip_lines_while( | 406 (ln, found) = self.skip_lines_while( |
396 0, len(self.log_lines), | 407 0, len(self.dump_lines), |
397 lambda n: not self.log_lines[n].startswith(headers)) | 408 lambda n: not self.dump_lines[n].startswith(headers)) |
398 if not found: | 409 if not found: |
399 raise InvalidDumpException('No version header.') | 410 raise InvalidDumpException('No version header.') |
400 | 411 |
401 # Identify a version. | 412 # Identify a version. |
402 if self.log_lines[ln].startswith('heap profile: '): | 413 if self.dump_lines[ln].startswith('heap profile: '): |
403 version = self.log_lines[ln][13:].strip() | 414 version = self.dump_lines[ln][13:].strip() |
404 if version == DUMP_DEEP_5: | 415 if version == DUMP_DEEP_5: |
405 (ln, _) = self.skip_lines_while( | 416 (ln, _) = self.skip_lines_while( |
406 ln, len(self.log_lines), | 417 ln, len(self.dump_lines), |
407 lambda n: self.log_lines[n] != 'STACKTRACES:\n') | 418 lambda n: self.dump_lines[n] != 'STACKTRACES:\n') |
408 elif version in DUMP_DEEP_OBSOLETE: | 419 elif version in DUMP_DEEP_OBSOLETE: |
409 raise ObsoleteDumpVersionException(version) | 420 raise ObsoleteDumpVersionException(version) |
410 else: | 421 else: |
411 raise InvalidDumpException('Invalid version: %s' % version) | 422 raise InvalidDumpException('Invalid version: %s' % version) |
412 elif self.log_lines[ln] == 'STACKTRACES:\n': | 423 elif self.dump_lines[ln] == 'STACKTRACES:\n': |
413 raise ObsoleteDumpVersionException(DUMP_DEEP_1) | 424 raise ObsoleteDumpVersionException(DUMP_DEEP_1) |
414 elif self.log_lines[ln] == 'MMAP_STACKTRACES:\n': | 425 elif self.dump_lines[ln] == 'MMAP_STACKTRACES:\n': |
415 raise ObsoleteDumpVersionException(DUMP_DEEP_2) | 426 raise ObsoleteDumpVersionException(DUMP_DEEP_2) |
416 | 427 |
417 return (version, ln) | 428 return (version, ln) |
418 | 429 |
419 def parse_log(self, buckets): | 430 def parse_dump(self, buckets, appeared_addresses): |
420 self.log_version, ln = self.parse_version() | 431 self.dump_version, ln = self.parse_version() |
421 self.parse_global_stats() | 432 self.parse_global_stats() |
422 self.parse_stacktraces(buckets, ln) | 433 self.parse_stacktraces(buckets, ln, appeared_addresses) |
423 | 434 |
424 @staticmethod | 435 @staticmethod |
425 def accumulate_size_for_policy(stacktrace_lines, | 436 def accumulate_size_for_policy(stacktrace_lines, |
426 policy_list, buckets, sizes): | 437 rule_list, buckets, sizes, symbols): |
427 for line in stacktrace_lines: | 438 for line in stacktrace_lines: |
428 words = line.split() | 439 words = line.split() |
429 bucket = buckets.get(int(words[BUCKET_ID])) | 440 bucket = buckets.get(int(words[BUCKET_ID])) |
430 component_match = get_component(policy_list, bucket) | 441 component_match = get_component(rule_list, bucket, symbols) |
431 sizes[component_match] += int(words[COMMITTED]) | 442 sizes[component_match] += int(words[COMMITTED]) |
432 | 443 |
433 if component_match.startswith('tc-'): | 444 if component_match.startswith('tc-'): |
434 sizes['tc-total-log'] += int(words[COMMITTED]) | 445 sizes['tc-total-log'] += int(words[COMMITTED]) |
435 elif component_match.startswith('mmap-'): | 446 elif component_match.startswith('mmap-'): |
436 sizes['mmap-total-log'] += int(words[COMMITTED]) | 447 sizes['mmap-total-log'] += int(words[COMMITTED]) |
437 else: | 448 else: |
438 sizes['other-total-log'] += int(words[COMMITTED]) | 449 sizes['other-total-log'] += int(words[COMMITTED]) |
439 | 450 |
440 def apply_policy(self, policy_list, buckets, first_log_time): | 451 def apply_policy( |
| 452 self, rule_list, buckets, first_dump_time, components, symbols): |
441 """Aggregates the total memory size of each component. | 453 """Aggregates the total memory size of each component. |
442 | 454 |
443 Iterate through all stacktraces and attribute them to one of the components | 455 Iterate through all stacktraces and attribute them to one of the components |
444 based on the policy. It is important to apply policy in right order. | 456 based on the policy. It is important to apply policy in right order. |
445 | 457 |
446 Args: | 458 Args: |
447 policy_list: A list containing Policy objects. (Parsed policy data by | 459 rule_list: A list of Rule objects. |
448 parse_policy.) | 460 buckets: A dict mapping bucket ids to Bucket objects. |
449 buckets: A dict mapping bucket ids and their corresponding Bucket | 461 first_dump_time: An integer representing time when the first dump is |
450 objects. | |
451 first_log_time: An integer representing time when the first log is | |
452 dumped. | 462 dumped. |
| 463 components: A list of strings of component names. |
| 464 symbols: A dict mapping runtime addresses to symbol names. |
453 | 465 |
454 Returns: | 466 Returns: |
455 A dict mapping components and their corresponding sizes. | 467 A dict mapping components and their corresponding sizes. |
456 """ | 468 """ |
457 | 469 |
458 sys.stderr.write('apply policy:%s\n' % (self.log_path)) | 470 sys.stderr.write('Applying policy: "%s".\n' % self.dump_path) |
459 sizes = dict((c, 0) for c in components) | 471 sizes = dict((c, 0) for c in components) |
460 | 472 |
461 self.accumulate_size_for_policy(self.stacktrace_lines, | 473 self.accumulate_size_for_policy(self.stacktrace_lines, |
462 policy_list, buckets, sizes) | 474 rule_list, buckets, sizes, symbols) |
463 | 475 |
464 mmap_prefix = 'profiled-mmap' | 476 mmap_prefix = 'profiled-mmap' |
465 malloc_prefix = 'profiled-malloc' | 477 malloc_prefix = 'profiled-malloc' |
466 | 478 |
467 sizes['mmap-no-log'] = ( | 479 sizes['mmap-no-log'] = ( |
468 self.counters['%s_committed' % mmap_prefix] - sizes['mmap-total-log']) | 480 self.counters['%s_committed' % mmap_prefix] - sizes['mmap-total-log']) |
469 sizes['mmap-total-record'] = self.counters['%s_committed' % mmap_prefix] | 481 sizes['mmap-total-record'] = self.counters['%s_committed' % mmap_prefix] |
470 sizes['mmap-total-record-vm'] = self.counters['%s_virtual' % mmap_prefix] | 482 sizes['mmap-total-record-vm'] = self.counters['%s_virtual' % mmap_prefix] |
471 | 483 |
472 sizes['tc-no-log'] = ( | 484 sizes['tc-no-log'] = ( |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
507 'nonprofiled-stack_committed', | 519 'nonprofiled-stack_committed', |
508 'nonprofiled-other_committed') | 520 'nonprofiled-other_committed') |
509 sizes['mustbezero'] = ( | 521 sizes['mustbezero'] = ( |
510 self.counters['total_committed'] - | 522 self.counters['total_committed'] - |
511 sum(self.counters[i] for i in removed)) | 523 sum(self.counters[i] for i in removed)) |
512 if 'total-exclude-profiler' in sizes: | 524 if 'total-exclude-profiler' in sizes: |
513 sizes['total-exclude-profiler'] = ( | 525 sizes['total-exclude-profiler'] = ( |
514 self.counters['total_committed'] - | 526 self.counters['total_committed'] - |
515 (sizes['mmap-profiler'] + sizes['mmap-allocated-type'])) | 527 (sizes['mmap-profiler'] + sizes['mmap-allocated-type'])) |
516 if 'hour' in sizes: | 528 if 'hour' in sizes: |
517 sizes['hour'] = (self.log_time - first_log_time) / 60.0 / 60.0 | 529 sizes['hour'] = (self.dump_time - first_dump_time) / 60.0 / 60.0 |
518 if 'minute' in sizes: | 530 if 'minute' in sizes: |
519 sizes['minute'] = (self.log_time - first_log_time) / 60.0 | 531 sizes['minute'] = (self.dump_time - first_dump_time) / 60.0 |
520 if 'second' in sizes: | 532 if 'second' in sizes: |
521 sizes['second'] = self.log_time - first_log_time | 533 sizes['second'] = self.dump_time - first_dump_time |
522 | 534 |
523 return sizes | 535 return sizes |
524 | 536 |
525 @staticmethod | 537 @staticmethod |
526 def accumulate_size_for_expand(stacktrace_lines, policy_list, buckets, | 538 def accumulate_size_for_expand(stacktrace_lines, rule_list, buckets, |
527 component_name, depth, sizes): | 539 component_name, depth, sizes, symbols): |
528 for line in stacktrace_lines: | 540 for line in stacktrace_lines: |
529 words = line.split() | 541 words = line.split() |
530 bucket = buckets.get(int(words[BUCKET_ID])) | 542 bucket = buckets.get(int(words[BUCKET_ID])) |
531 component_match = get_component(policy_list, bucket) | 543 component_match = get_component(rule_list, bucket, symbols) |
532 if component_match == component_name: | 544 if component_match == component_name: |
533 stacktrace_sequence = '' | 545 stacktrace_sequence = '' |
534 for address in bucket.stacktrace[0 : min(len(bucket.stacktrace), | 546 for address in bucket.stacktrace[0 : min(len(bucket.stacktrace), |
535 1 + depth)]: | 547 1 + depth)]: |
536 stacktrace_sequence += address_symbol_dict[address] + ' ' | 548 stacktrace_sequence += symbols[address] + ' ' |
537 if not stacktrace_sequence in sizes: | 549 if not stacktrace_sequence in sizes: |
538 sizes[stacktrace_sequence] = 0 | 550 sizes[stacktrace_sequence] = 0 |
539 sizes[stacktrace_sequence] += int(words[COMMITTED]) | 551 sizes[stacktrace_sequence] += int(words[COMMITTED]) |
540 | 552 |
541 def expand(self, policy_list, buckets, component_name, depth): | 553 def expand(self, rule_list, buckets, component_name, depth, symbols): |
542 """Prints all stacktraces in a given component of given depth. | 554 """Prints all stacktraces in a given component of given depth. |
543 | 555 |
544 Args: | 556 Args: |
545 policy_list: A list containing Policy objects. (Parsed policy data by | 557 rule_list: A list of Rule objects. |
546 parse_policy.) | 558 buckets: A dict mapping bucket ids to Bucket objects. |
547 buckets: A dict mapping bucket ids and their corresponding Bucket | |
548 objects. | |
549 component_name: A name of component for filtering. | 559 component_name: A name of component for filtering. |
550 depth: An integer representing depth to be printed. | 560 depth: An integer representing depth to be printed. |
| 561 symbols: A dict mapping runtime addresses to symbol names. |
551 """ | 562 """ |
552 sizes = {} | 563 sizes = {} |
553 | 564 |
554 self.accumulate_size_for_expand( | 565 self.accumulate_size_for_expand( |
555 self.stacktrace_lines, policy_list, buckets, component_name, | 566 self.stacktrace_lines, rule_list, buckets, component_name, |
556 depth, sizes) | 567 depth, sizes, symbols) |
557 | 568 |
558 sorted_sizes_list = sorted( | 569 sorted_sizes_list = sorted( |
559 sizes.iteritems(), key=(lambda x: x[1]), reverse=True) | 570 sizes.iteritems(), key=(lambda x: x[1]), reverse=True) |
560 total = 0 | 571 total = 0 |
561 for size_pair in sorted_sizes_list: | 572 for size_pair in sorted_sizes_list: |
562 sys.stdout.write('%10d %s\n' % (size_pair[1], size_pair[0])) | 573 sys.stdout.write('%10d %s\n' % (size_pair[1], size_pair[0])) |
563 total += size_pair[1] | 574 total += size_pair[1] |
564 sys.stderr.write('total: %d\n' % (total)) | 575 sys.stderr.write('total: %d\n' % (total)) |
565 | 576 |
566 | 577 |
567 def update_symbols(symbol_path, mapping_lines, maps_path): | 578 def update_symbols( |
| 579 symbol_path, maps_path, appeared_addresses, symbols): |
568 """Updates address/symbol mapping on memory and in a .symbol cache file. | 580 """Updates address/symbol mapping on memory and in a .symbol cache file. |
569 | 581 |
570 It reads cached address/symbol mapping from a .symbol file if it exists. | 582 It reads cached address/symbol mapping from a .symbol file if it exists. |
571 Then, it resolves unresolved addresses from a Chrome binary with pprof. | 583 Then, it resolves unresolved addresses from a Chrome binary with pprof. |
572 Both mappings on memory and in a .symbol cache file are updated. | 584 Both mappings on memory and in a .symbol cache file are updated. |
573 | 585 |
574 Symbol files are formatted as follows: | 586 Symbol files are formatted as follows: |
575 <Address> <Symbol> | 587 <Address> <Symbol> |
576 <Address> <Symbol> | 588 <Address> <Symbol> |
577 <Address> <Symbol> | 589 <Address> <Symbol> |
578 ... | 590 ... |
579 | 591 |
580 Args: | 592 Args: |
581 symbol_path: A string representing a path for a .symbol file. | 593 symbol_path: A string representing a path for a .symbol file. |
582 mapping_lines: A list of strings containing /proc/.../maps. | |
583 maps_path: A string of the path of /proc/.../maps. | 594 maps_path: A string of the path of /proc/.../maps. |
| 595 appeared_addresses: A list of known addresses. |
| 596 symbols: A dict mapping runtime addresses to symbol names. |
584 """ | 597 """ |
585 with open(symbol_path, mode='a+') as symbol_f: | 598 with open(symbol_path, mode='a+') as symbol_f: |
586 symbol_lines = symbol_f.readlines() | 599 symbol_lines = symbol_f.readlines() |
587 if symbol_lines: | 600 if symbol_lines: |
588 for line in symbol_lines: | 601 for line in symbol_lines: |
589 items = line.split(None, 1) | 602 items = line.split(None, 1) |
590 address_symbol_dict[items[0]] = items[1].rstrip() | 603 if len(items) == 1: |
| 604 items.append('??') |
| 605 symbols[items[0]] = items[1].rstrip() |
| 606 if symbols: |
| 607 sys.stderr.write(' Found %d symbols in cache.\n' % len(symbols)) |
| 608 else: |
| 609 sys.stderr.write(' No symbols found in cache.\n') |
591 | 610 |
592 unresolved_addresses = sorted( | 611 unresolved_addresses = sorted( |
593 a for a in appeared_addresses if a not in address_symbol_dict) | 612 a for a in appeared_addresses if a not in symbols) |
594 | 613 |
595 if unresolved_addresses: | 614 if not unresolved_addresses: |
| 615 sys.stderr.write(' No need to resolve any more addresses.\n') |
| 616 else: |
| 617 sys.stderr.write(' %d addresses are unresolved.\n' % |
| 618 len(unresolved_addresses)) |
596 prepared_data_dir = tempfile.mkdtemp() | 619 prepared_data_dir = tempfile.mkdtemp() |
597 try: | 620 try: |
598 prepare_symbol_info(maps_path, prepared_data_dir) | 621 prepare_symbol_info(maps_path, prepared_data_dir) |
599 | 622 |
600 symbols = find_runtime_symbols_list( | 623 symbol_list = find_runtime_symbols_list( |
601 prepared_data_dir, unresolved_addresses) | 624 prepared_data_dir, unresolved_addresses) |
602 | 625 |
603 for address, symbol in zip(unresolved_addresses, symbols): | 626 for address, symbol in zip(unresolved_addresses, symbol_list): |
| 627 if not symbol: |
| 628 symbol = '??' |
604 stripped_symbol = symbol.strip() | 629 stripped_symbol = symbol.strip() |
605 address_symbol_dict[address] = stripped_symbol | 630 symbols[address] = stripped_symbol |
606 symbol_f.write('%s %s\n' % (address, stripped_symbol)) | 631 symbol_f.write('%s %s\n' % (address, stripped_symbol)) |
607 finally: | 632 finally: |
608 shutil.rmtree(prepared_data_dir) | 633 shutil.rmtree(prepared_data_dir) |
609 | 634 |
610 | 635 |
611 def parse_policy(policy_path): | 636 def parse_policy(policy_path): |
612 """Parses policy file. | 637 """Parses policy file. |
613 | 638 |
614 A policy file contains component's names and their | 639 A policy file contains component's names and their |
615 stacktrace pattern written in regular expression. | 640 stacktrace pattern written in regular expression. |
616 Those patterns are matched against each symbols of | 641 Those patterns are matched against each symbols of |
617 each stacktraces in the order written in the policy file | 642 each stacktraces in the order written in the policy file |
618 | 643 |
619 Args: | 644 Args: |
620 policy_path: A path for a policy file. | 645 policy_path: A path for a policy file. |
621 Returns: | 646 Returns: |
622 A list containing component's name and its regex object | 647 A list containing component's name and its regex object |
623 """ | 648 """ |
624 with open(policy_path, mode='r') as policy_f: | 649 with open(policy_path, mode='r') as policy_f: |
625 policy_lines = policy_f.readlines() | 650 policy_lines = policy_f.readlines() |
626 | 651 |
627 policy_version = POLICY_DEEP_1 | 652 policy_version = POLICY_DEEP_1 |
628 if policy_lines[0].startswith('heap profile policy: '): | 653 if policy_lines[0].startswith('heap profile policy: '): |
629 policy_version = policy_lines[0][21:].strip() | 654 policy_version = policy_lines[0][21:].strip() |
630 policy_lines.pop(0) | 655 policy_lines.pop(0) |
631 policy_list = [] | 656 rule_list = [] |
| 657 components = [] |
632 | 658 |
633 if policy_version == POLICY_DEEP_2 or policy_version == POLICY_DEEP_1: | 659 if policy_version == POLICY_DEEP_2 or policy_version == POLICY_DEEP_1: |
634 sys.stderr.write(' heap profile policy version: %s\n' % policy_version) | |
635 for line in policy_lines: | 660 for line in policy_lines: |
636 if line[0] == '#': | 661 if line[0] == '#': |
637 continue | 662 continue |
638 | 663 |
639 if policy_version == POLICY_DEEP_2: | 664 if policy_version == POLICY_DEEP_2: |
640 (name, allocation_type, pattern) = line.strip().split(None, 2) | 665 (name, allocation_type, pattern) = line.strip().split(None, 2) |
641 mmap = False | 666 mmap = False |
642 if allocation_type == 'mmap': | 667 if allocation_type == 'mmap': |
643 mmap = True | 668 mmap = True |
644 elif policy_version == POLICY_DEEP_1: | 669 elif policy_version == POLICY_DEEP_1: |
645 name = line.split()[0] | 670 name = line.split()[0] |
646 pattern = line[len(name) : len(line)].strip() | 671 pattern = line[len(name) : len(line)].strip() |
647 mmap = False | 672 mmap = False |
648 | 673 |
649 if pattern != 'default': | 674 if pattern != 'default': |
650 policy_list.append(Policy(name, mmap, pattern)) | 675 rule_list.append(Rule(name, mmap, pattern)) |
651 if components.count(name) == 0: | 676 if components.count(name) == 0: |
652 components.append(name) | 677 components.append(name) |
653 | 678 |
654 else: | 679 else: |
655 sys.stderr.write(' invalid heap profile policy version: %s\n' % ( | 680 sys.stderr.write(' invalid heap profile policy version: %s\n' % ( |
656 policy_version)) | 681 policy_version)) |
657 | 682 |
658 return policy_list | 683 return rule_list, policy_version, components |
659 | 684 |
660 | 685 |
661 def main(): | 686 def find_prefix(path): |
662 if (len(sys.argv) < 4) or (not (sys.argv[1] in ['--csv', | 687 return re.sub('\.[0-9][0-9][0-9][0-9]\.heap', '', path) |
663 '--json', | |
664 '--expand', | |
665 '--list', | |
666 '--stacktrace', | |
667 '--pprof'])): | |
668 sys.stderr.write("""Usage: | |
669 %s [options] <chrome-binary> <policy> <profile> [component-name] [depth] | |
670 | 688 |
671 Options: | |
672 --csv Output result in csv format | |
673 --json Output result in json format | |
674 --stacktrace Convert raw address to symbol names | |
675 --list Lists components and their sizes | |
676 --expand Show all stacktraces in the specified component | |
677 of given depth with their sizes | |
678 --pprof Format the profile file so it can be processed | |
679 by pprof | |
680 | 689 |
681 Examples: | 690 def load_buckets(prefix): |
682 dmprof --csv Debug/chrome dmpolicy hprof.12345.0001.heap > result.csv | |
683 dmprof --json Debug/chrome dmpolicy hprof.12345.0001.heap > result.json | |
684 dmprof --list Debug/chrome dmpolicy hprof.12345.0012.heap | |
685 dmprof --expand Debug/chrome dmpolicy hprof.12345.0012.heap tc-webkit 4 | |
686 dmprof --pprof Debug/chrome dmpolicy hprof.12345.0012.heap > for_pprof.txt | |
687 """ % (sys.argv[0])) | |
688 sys.exit(1) | |
689 | |
690 action = sys.argv[1] | |
691 chrome_path = sys.argv[2] | |
692 policy_path = sys.argv[3] | |
693 log_path = sys.argv[4] | |
694 | |
695 sys.stderr.write('parsing a policy file\n') | |
696 policy_list = parse_policy(policy_path) | |
697 | |
698 p = re.compile('\.[0-9][0-9][0-9][0-9]\.heap') | |
699 prefix = p.sub('', log_path) | |
700 symbol_path = prefix + '.symbols' | |
701 | |
702 sys.stderr.write('parsing the maps file\n') | |
703 maps_path = prefix + '.maps' | |
704 with open(maps_path, 'r') as maps_f: | |
705 maps_lines = maps_f.readlines() | |
706 | |
707 # Reading buckets | 691 # Reading buckets |
708 sys.stderr.write('parsing the bucket file\n') | 692 sys.stderr.write('Loading bucket files.\n') |
709 buckets = {} | 693 buckets = {} |
710 bucket_count = 0 | 694 bucket_count = 0 |
711 n = 0 | 695 n = 0 |
712 while True: | 696 while True: |
713 buckets_path = '%s.%04d.buckets' % (prefix, n) | 697 buckets_path = '%s.%04d.buckets' % (prefix, n) |
714 if not os.path.exists(buckets_path): | 698 if not os.path.exists(buckets_path): |
715 if n > 10: | 699 if n > 10: |
716 break | 700 break |
717 n += 1 | 701 n += 1 |
718 continue | 702 continue |
719 sys.stderr.write('reading buckets from %s\n' % (buckets_path)) | 703 sys.stderr.write(' %s\n' % buckets_path) |
720 with open(buckets_path, 'r') as buckets_f: | 704 with open(buckets_path, 'r') as buckets_f: |
721 for line in buckets_f: | 705 for line in buckets_f: |
722 words = line.split() | 706 words = line.split() |
723 buckets[int(words[0])] = Bucket(words[2:], words[1] == 'mmap') | 707 buckets[int(words[0])] = Bucket(words[2:], words[1] == 'mmap') |
724 n += 1 | 708 n += 1 |
725 | 709 |
726 log_path_list = [log_path] | 710 return buckets |
727 | 711 |
728 if action in ('--csv', '--json'): | 712 |
729 # search for the sequence of files | 713 def determine_dump_path_list(dump_path, prefix): |
730 n = int(log_path[len(log_path) - 9 : len(log_path) - 5]) | 714 dump_path_list = [dump_path] |
731 n += 1 # skip current file | 715 |
732 while True: | 716 # search for the sequence of files |
733 p = '%s.%04d.heap' % (prefix, n) | 717 n = int(dump_path[len(dump_path) - 9 : len(dump_path) - 5]) |
734 if os.path.exists(p): | 718 n += 1 # skip current file |
735 log_path_list.append(p) | 719 while True: |
736 else: | 720 p = '%s.%04d.heap' % (prefix, n) |
737 break | 721 if os.path.exists(p): |
738 n += 1 | 722 dump_path_list.append(p) |
739 | |
740 logs = [] | |
741 for path in log_path_list: | |
742 new_log = Log(path) | |
743 sys.stderr.write('Parsing a dump: %s\n' % path) | |
744 try: | |
745 new_log.parse_log(buckets) | |
746 except EmptyDumpException: | |
747 sys.stderr.write(' WARNING: ignored an empty dump: %s\n' % path) | |
748 except ParsingException, e: | |
749 sys.stderr.write(' Error in parsing heap profile dump: %s\n' % e) | |
750 sys.exit(1) | |
751 else: | 723 else: |
752 logs.append(new_log) | 724 break |
753 | 725 n += 1 |
754 sys.stderr.write('getting symbols\n') | 726 |
755 update_symbols(symbol_path, maps_lines, maps_path) | 727 return dump_path_list |
756 | 728 |
757 # TODO(dmikurube): Many modes now. Split them into separete functions. | 729 |
758 if action == '--stacktrace': | 730 def load_single_dump(dump_path, buckets, appeared_addresses): |
759 logs[0].dump_stacktrace(buckets) | 731 new_dump = Dump(dump_path) |
760 | 732 try: |
761 elif action == '--csv': | 733 new_dump.parse_dump(buckets, appeared_addresses) |
762 sys.stdout.write(','.join(components)) | 734 except EmptyDumpException: |
763 sys.stdout.write('\n') | 735 sys.stderr.write('... ignored an empty dump') |
764 | 736 except ParsingException, e: |
765 for log in logs: | 737 sys.stderr.write('... error in parsing: %s' % e) |
766 component_sizes = log.apply_policy(policy_list, buckets, logs[0].log_time) | 738 sys.exit(1) |
| 739 else: |
| 740 sys.stderr.write(' (version: %s)' % new_dump.dump_version) |
| 741 |
| 742 return new_dump |
| 743 |
| 744 |
| 745 def load_dump(dump_path, buckets): |
| 746 sys.stderr.write('Loading a heap dump file: "%s"' % dump_path) |
| 747 appeared_addresses = set() |
| 748 dump = load_single_dump(dump_path, buckets, appeared_addresses) |
| 749 sys.stderr.write('.\n') |
| 750 return dump, appeared_addresses |
| 751 |
| 752 |
| 753 def load_dumps(dump_path_list, buckets): |
| 754 sys.stderr.write('Loading heap dump files.\n') |
| 755 appeared_addresses = set() |
| 756 dumps = [] |
| 757 for path in dump_path_list: |
| 758 sys.stderr.write(' %s' % path) |
| 759 dumps.append(load_single_dump(path, buckets, appeared_addresses)) |
| 760 sys.stderr.write('\n') |
| 761 return dumps, appeared_addresses |
| 762 |
| 763 |
| 764 def load_and_update_symbol_cache(prefix, appeared_addresses): |
| 765 maps_path = prefix + '.maps' |
| 766 symbol_path = prefix + '.symbols' |
| 767 sys.stderr.write('Loading and updating symbol cache: "%s".\n' % symbol_path) |
| 768 symbols = {} |
| 769 update_symbols(symbol_path, maps_path, appeared_addresses, symbols) |
| 770 return symbols |
| 771 |
| 772 |
| 773 def load_default_policies(): |
| 774 with open(POLICIES_JSON_PATH, mode='r') as policies_f: |
| 775 default_policies = json.load(policies_f) |
| 776 return default_policies |
| 777 |
| 778 |
| 779 def load_policy(policies_dict, policy_label): |
| 780 policy_file = policies_dict[policy_label]['file'] |
| 781 policy_path = os.path.join(os.path.dirname(__file__), policy_file) |
| 782 rule_list, policy_version, components = parse_policy(policy_path) |
| 783 sys.stderr.write(' %s: %s (version: %s)\n' % |
| 784 (policy_label, policy_path, policy_version)) |
| 785 return Policy(rule_list, policy_version, components) |
| 786 |
| 787 |
| 788 def load_policies_dict(policies_dict): |
| 789 sys.stderr.write('Loading policy files.\n') |
| 790 policies = {} |
| 791 for policy_label in policies_dict: |
| 792 policies[policy_label] = load_policy(policies_dict, policy_label) |
| 793 return policies |
| 794 |
| 795 |
| 796 def load_policies(options_policy): |
| 797 default_policies = load_default_policies() |
| 798 if options_policy: |
| 799 policy_labels = options_policy.split(',') |
| 800 specified_policies = {} |
| 801 for specified_policy in policy_labels: |
| 802 if specified_policy in default_policies: |
| 803 specified_policies[specified_policy] = ( |
| 804 default_policies[specified_policy]) |
| 805 policies = load_policies_dict(specified_policies) |
| 806 else: |
| 807 policies = load_policies_dict(default_policies) |
| 808 return policies |
| 809 |
| 810 |
| 811 def do_stacktrace(sys_argv): |
| 812 parser = optparse.OptionParser(usage='Usage: %prog stacktrace <dump>') |
| 813 options, args = parser.parse_args(sys_argv) |
| 814 |
| 815 if len(args) != 2: |
| 816 parser.error('needs 1 argument.') |
| 817 return 1 |
| 818 |
| 819 dump_path = args[1] |
| 820 |
| 821 prefix = find_prefix(dump_path) |
| 822 buckets = load_buckets(prefix) |
| 823 dump, appeared_addresses = load_dump(dump_path, buckets) |
| 824 symbols = load_and_update_symbol_cache(prefix, appeared_addresses) |
| 825 |
| 826 dump.print_stacktrace(buckets, symbols) |
| 827 |
| 828 return 0 |
| 829 |
| 830 |
| 831 def do_csv(sys_argv): |
| 832 parser = optparse.OptionParser('Usage: %prog csv [-p POLICY] <first-dump>') |
| 833 parser.add_option('-p', '--policy', type='string', dest='policy', |
| 834 help='profile with POLICY', metavar='POLICY') |
| 835 options, args = parser.parse_args(sys_argv) |
| 836 |
| 837 if len(args) != 2: |
| 838 parser.error('needs 1 argument.') |
| 839 return 1 |
| 840 |
| 841 dump_path = args[1] |
| 842 |
| 843 prefix = find_prefix(dump_path) |
| 844 buckets = load_buckets(prefix) |
| 845 dumps, appeared_addresses = load_dumps( |
| 846 determine_dump_path_list(dump_path, prefix), buckets) |
| 847 symbols = load_and_update_symbol_cache(prefix, appeared_addresses) |
| 848 policies = load_policies(options.policy) |
| 849 |
| 850 max_components = 0 |
| 851 for policy in policies: |
| 852 max_components = max(max_components, len(policies[policy].components)) |
| 853 |
| 854 for policy in sorted(policies): |
| 855 rule_list = policies[policy].rules |
| 856 components = policies[policy].components |
| 857 |
| 858 if len(policies) > 1: |
| 859 sys.stdout.write('%s%s\n' % (policy, ',' * (max_components - 1))) |
| 860 sys.stdout.write('%s%s\n' % ( |
| 861 ','.join(components), ',' * (max_components - len(components)))) |
| 862 |
| 863 for dump in dumps: |
| 864 component_sizes = dump.apply_policy( |
| 865 rule_list, buckets, dumps[0].dump_time, components, symbols) |
767 s = [] | 866 s = [] |
768 for c in components: | 867 for c in components: |
769 if c in ('hour', 'minute', 'second'): | 868 if c in ('hour', 'minute', 'second'): |
770 s.append('%05.5f' % (component_sizes[c])) | 869 s.append('%05.5f' % (component_sizes[c])) |
771 else: | 870 else: |
772 s.append('%05.5f' % (component_sizes[c] / 1024.0 / 1024.0)) | 871 s.append('%05.5f' % (component_sizes[c] / 1024.0 / 1024.0)) |
773 sys.stdout.write(','.join(s)) | 872 sys.stdout.write('%s%s\n' % ( |
774 sys.stdout.write('\n') | 873 ','.join(s), ',' * (max_components - len(components)))) |
775 | 874 |
776 elif action == '--json': | 875 for bucket in buckets.itervalues(): |
777 json_base = { | 876 bucket.clear_component_cache() |
778 'version': 'JSON_DEEP_1', | 877 |
| 878 return 0 |
| 879 |
| 880 |
| 881 def do_json(sys_argv): |
| 882 parser = optparse.OptionParser('Usage: %prog json [-p POLICY] <first-dump>') |
| 883 parser.add_option('-p', '--policy', type='string', dest='policy', |
| 884 help='profile with POLICY', metavar='POLICY') |
| 885 options, args = parser.parse_args(sys_argv) |
| 886 |
| 887 if len(args) != 2: |
| 888 parser.error('needs 1 argument.') |
| 889 return 1 |
| 890 |
| 891 dump_path = args[1] |
| 892 |
| 893 prefix = find_prefix(dump_path) |
| 894 buckets = load_buckets(prefix) |
| 895 dumps, appeared_addresses = load_dumps( |
| 896 determine_dump_path_list(dump_path, prefix), buckets) |
| 897 symbols = load_and_update_symbol_cache(prefix, appeared_addresses) |
| 898 policies = load_policies(options.policy) |
| 899 |
| 900 json_base = { |
| 901 'version': 'JSON_DEEP_2', |
| 902 'policies': {}, |
| 903 } |
| 904 |
| 905 for policy in sorted(policies): |
| 906 rule_list = policies[policy].rules |
| 907 components = policies[policy].components |
| 908 |
| 909 json_base['policies'][policy] = { |
779 'legends': components, | 910 'legends': components, |
780 'snapshots': [], | 911 'snapshots': [], |
781 } | 912 } |
782 for log in logs: | 913 |
783 component_sizes = log.apply_policy(policy_list, buckets, logs[0].log_time) | 914 for dump in dumps: |
784 component_sizes['log_path'] = log.log_path | 915 component_sizes = dump.apply_policy( |
785 component_sizes['log_time'] = datetime.fromtimestamp( | 916 rule_list, buckets, dumps[0].dump_time, components, symbols) |
786 log.log_time).strftime('%Y-%m-%d %H:%M:%S') | 917 component_sizes['dump_path'] = dump.dump_path |
787 json_base['snapshots'].append(component_sizes) | 918 component_sizes['dump_time'] = datetime.fromtimestamp( |
788 json.dump(json_base, sys.stdout, indent=2, sort_keys=True) | 919 dump.dump_time).strftime('%Y-%m-%d %H:%M:%S') |
789 | 920 json_base['policies'][policy]['snapshots'].append(component_sizes) |
790 elif action == '--list': | 921 |
791 component_sizes = logs[0].apply_policy( | 922 for bucket in buckets.itervalues(): |
792 policy_list, buckets, logs[0].log_time) | 923 bucket.clear_component_cache() |
| 924 |
| 925 json.dump(json_base, sys.stdout, indent=2, sort_keys=True) |
| 926 |
| 927 return 0 |
| 928 |
| 929 |
| 930 def do_list(sys_argv): |
| 931 parser = optparse.OptionParser('Usage: %prog [-p POLICY] list <first-dump>') |
| 932 parser.add_option('-p', '--policy', type='string', dest='policy', |
| 933 help='profile with POLICY', metavar='POLICY') |
| 934 options, args = parser.parse_args(sys_argv) |
| 935 |
| 936 if len(args) != 2: |
| 937 parser.error('needs 1 argument.') |
| 938 return 1 |
| 939 |
| 940 dump_path = args[1] |
| 941 |
| 942 prefix = find_prefix(dump_path) |
| 943 buckets = load_buckets(prefix) |
| 944 dumps, appeared_addresses = load_dumps( |
| 945 determine_dump_path_list(dump_path, prefix), buckets) |
| 946 symbols = load_and_update_symbol_cache(prefix, appeared_addresses) |
| 947 policies = load_policies(options.policy) |
| 948 |
| 949 for policy in sorted(policies): |
| 950 rule_list = policies[policy].rules |
| 951 components = policies[policy].components |
| 952 |
| 953 component_sizes = dumps[0].apply_policy( |
| 954 rule_list, buckets, dumps[0].dump_time, components, symbols) |
| 955 sys.stdout.write('%s:\n' % policy) |
793 for c in components: | 956 for c in components: |
794 if c in ['hour', 'minute', 'second']: | 957 if c in ['hour', 'minute', 'second']: |
795 sys.stdout.write('%30s %10.3f\n' % (c, component_sizes[c])) | 958 sys.stdout.write('%30s %10.3f\n' % (c, component_sizes[c])) |
796 else: | 959 else: |
797 sys.stdout.write('%30s %10.3f\n' % ( | 960 sys.stdout.write('%30s %10.3f\n' % ( |
798 c, component_sizes[c] / 1024.0 / 1024.0)) | 961 c, component_sizes[c] / 1024.0 / 1024.0)) |
799 | 962 |
800 elif action == '--expand': | 963 for bucket in buckets.itervalues(): |
801 component_name = sys.argv[5] | 964 bucket.clear_component_cache() |
802 depth = sys.argv[6] | |
803 logs[0].expand(policy_list, buckets, component_name, int(depth)) | |
804 | 965 |
805 elif action == '--pprof': | 966 return 0 |
806 if len(sys.argv) > 5: | 967 |
807 logs[0].dump_for_pprof(policy_list, buckets, maps_lines, sys.argv[5]) | 968 |
808 else: | 969 def do_expand(sys_argv): |
809 logs[0].dump_for_pprof(policy_list, buckets, maps_lines, None) | 970 parser = optparse.OptionParser( |
| 971 'Usage: %prog expand <dump> <policy> <component> <depth>') |
| 972 options, args = parser.parse_args(sys_argv) |
| 973 |
| 974 if len(args) != 5: |
| 975 parser.error('needs 4 arguments.') |
| 976 return 1 |
| 977 |
| 978 dump_path = args[1] |
| 979 target_policy = args[2] |
| 980 component_name = args[3] |
| 981 depth = args[4] |
| 982 |
| 983 prefix = find_prefix(dump_path) |
| 984 buckets = load_buckets(prefix) |
| 985 dump, appeared_addresses = load_dump(dump_path, buckets) |
| 986 symbols = load_and_update_symbol_cache(prefix, appeared_addresses) |
| 987 policies = load_policies(target_policy) |
| 988 |
| 989 rule_list = policies[target_policy].rules |
| 990 |
| 991 dump.expand(rule_list, buckets, component_name, int(depth), symbols) |
| 992 |
| 993 return 0 |
| 994 |
| 995 |
| 996 def do_pprof(sys_argv): |
| 997 parser = optparse.OptionParser( |
| 998 'Usage: %prog pprof [-c COMPONENT] <dump> <policy>') |
| 999 parser.add_option('-c', '--component', type='string', dest='component', |
| 1000 help='restrict to COMPONENT', metavar='COMPONENT') |
| 1001 options, args = parser.parse_args(sys_argv) |
| 1002 |
| 1003 if len(args) != 3: |
| 1004 parser.error('needs 2 arguments.') |
| 1005 return 1 |
| 1006 |
| 1007 dump_path = args[1] |
| 1008 target_policy = args[2] |
| 1009 component = options.component |
| 1010 |
| 1011 prefix = find_prefix(dump_path) |
| 1012 buckets = load_buckets(prefix) |
| 1013 dump, appeared_addresses = load_dump(dump_path, buckets) |
| 1014 symbols = load_and_update_symbol_cache(prefix, appeared_addresses) |
| 1015 policies = load_policies(target_policy) |
| 1016 |
| 1017 rule_list = policies[target_policy].rules |
| 1018 |
| 1019 with open(prefix + '.maps', 'r') as maps_f: |
| 1020 maps_lines = maps_f.readlines() |
| 1021 dump.print_for_pprof(rule_list, buckets, maps_lines, component, symbols) |
| 1022 |
| 1023 return 0 |
| 1024 |
| 1025 |
| 1026 def main(): |
| 1027 COMMANDS = { |
| 1028 'csv': do_csv, |
| 1029 'expand': do_expand, |
| 1030 'json': do_json, |
| 1031 'list': do_list, |
| 1032 'pprof': do_pprof, |
| 1033 'stacktrace': do_stacktrace, |
| 1034 } |
| 1035 |
| 1036 # TODO(dmikurube): Remove this message after a while. |
| 1037 if len(sys.argv) >= 2 and sys.argv[1].startswith('--'): |
| 1038 sys.stderr.write(""" |
| 1039 **************** NOTICE!! **************** |
| 1040 The command line format has changed. |
| 1041 Please look at the description below. |
| 1042 ****************************************** |
| 1043 |
| 1044 """) |
| 1045 |
| 1046 if len(sys.argv) < 2 or (not sys.argv[1] in COMMANDS): |
| 1047 sys.stderr.write("""Usage: %s <command> [options] [<args>] |
| 1048 |
| 1049 Commands: |
| 1050 csv Classify memory usage in CSV |
| 1051 expand Show all stacktraces contained in the specified component |
| 1052 json Classify memory usage in JSON |
| 1053 list Classify memory usage in simple listing format |
| 1054 pprof Format the profile dump so that it can be processed by pprof |
| 1055 stacktrace Convert runtime addresses to symbol names |
| 1056 |
| 1057 Quick Reference: |
| 1058 dmprof csv [-p POLICY] <first-dump> |
| 1059 dmprof expand <dump> <policy> <component> <depth> |
| 1060 dmprof json [-p POLICY] <first-dump> |
| 1061 dmprof list [-p POLICY] <first-dump> |
| 1062 dmprof pprof [-c COMPONENT] <dump> <policy> |
| 1063 dmprof stacktrace <dump> |
| 1064 """ % (sys.argv[0])) |
| 1065 sys.exit(1) |
| 1066 action = sys.argv.pop(1) |
| 1067 |
| 1068 return COMMANDS[action](sys.argv) |
810 | 1069 |
811 | 1070 |
812 if __name__ == '__main__': | 1071 if __name__ == '__main__': |
813 sys.exit(main()) | 1072 sys.exit(main()) |
OLD | NEW |