OLD | NEW |
(Empty) | |
| 1 #!/usr/bin/python |
| 2 # Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 3 |
| 4 import sys |
| 5 import subprocess |
| 6 import re |
| 7 import os |
| 8 |
| 9 BUCKET_ID = 5 |
| 10 VIRTUAL = 0 |
| 11 COMMITTED = 1 |
| 12 ALLOC_COUNT = 2 |
| 13 FREE_COUNT = 3 |
| 14 addr_symbol_dict = dict() |
| 15 components = list() |
| 16 action = '' |
| 17 |
| 18 def get_val(l): |
| 19 return l[1] |
| 20 |
| 21 |
| 22 def get_component(policy, bucket): |
| 23 if bucket == None: |
| 24 return 'no-bucket' |
| 25 if bucket.component != '': |
| 26 return bucket.component |
| 27 for name, condition in policy: |
| 28 st = '' |
| 29 for addr in bucket.stacktrace: |
| 30 st += addr_symbol_dict[addr] + ' ' |
| 31 st = st.strip() |
| 32 if condition.match(st): |
| 33 bucket.component = name |
| 34 return name |
| 35 |
| 36 assert False |
| 37 |
| 38 class Bucket(object): |
| 39 stacktrace = list() |
| 40 component = '' |
| 41 |
| 42 def __init__(self, st): |
| 43 self.stacktrace = st |
| 44 component = '' |
| 45 |
| 46 class Log(object): |
| 47 log_path = '' |
| 48 log_lines = list() |
| 49 stacktrace_lines = list() |
| 50 total_committed = 0 |
| 51 total_virtual = 0 |
| 52 filemapped_committed = 0 |
| 53 filemapped_virtual = 0 |
| 54 anonymous_committed = 0 |
| 55 anonymous_virtual = 0 |
| 56 other_committed = 0 |
| 57 other_virtual = 0 |
| 58 mmap_committed = 0 |
| 59 mmap_virtual = 0 |
| 60 tcmalloc_committed = 0 |
| 61 tcmalloc_virtual = 0 |
| 62 log_time = 0 |
| 63 |
| 64 def __init__(self, log_path, buckets): |
| 65 self.log_path = log_path |
| 66 log = open(self.log_path, mode='r') |
| 67 self.log_lines = log.readlines(); |
| 68 log.close() |
| 69 sys.stderr.write('parsing a log file:%s\n' % (log_path)) |
| 70 self.parse_log(buckets) |
| 71 self.log_time = os.stat(self.log_path).st_mtime |
| 72 |
| 73 |
| 74 def dump_stacktrace(self, buckets): |
| 75 for l in self.stacktrace_lines: |
| 76 words = l.split() |
| 77 bucket = buckets[int(words[BUCKET_ID])] |
| 78 if bucket == None: |
| 79 continue |
| 80 for i in range(0, BUCKET_ID - 1): |
| 81 sys.stdout.write(words[i] + ' ') |
| 82 for addr in bucket.stacktrace: |
| 83 if addr_symbol_dict.has_key(addr): |
| 84 if addr_symbol_dict[addr] != '': |
| 85 sys.stdout.write(addr_symbol_dict[addr] + ' ') |
| 86 else: |
| 87 sys.stdout.write(addr + ' ') |
| 88 else: |
| 89 sys.stdout.write(addr + ' ') |
| 90 sys.stdout.write('\n') |
| 91 |
| 92 |
| 93 def dump_for_pprof(self, policy, buckets, mapping_lines, com): |
| 94 """ Convert the log file so it can be processed by pprof |
| 95 Args: |
| 96 com: component name for filtering |
| 97 """ |
| 98 sys.stdout.write('heap profile: ') |
| 99 com_committed = 0 |
| 100 com_allocs = 0 |
| 101 for l in self.stacktrace_lines: |
| 102 words = l.split() |
| 103 bucket = buckets[int(words[BUCKET_ID])] |
| 104 if bucket == None: |
| 105 continue |
| 106 if com == None or com == '': |
| 107 pass |
| 108 elif com != get_component(policy, bucket): |
| 109 continue |
| 110 |
| 111 com_committed += int(words[COMMITTED]) |
| 112 com_allocs += int(words[ALLOC_COUNT])-int(words[FREE_COUNT]) |
| 113 |
| 114 sys.stdout.write('%6d: %8s [%6d: %8s] @ heapprofile\n' % (com_allocs, |
| 115 com_committed, |
| 116 com_allocs, |
| 117 com_committed)
) |
| 118 |
| 119 for l in self.stacktrace_lines: |
| 120 words = l.split() |
| 121 bucket = buckets[int(words[BUCKET_ID])] |
| 122 if bucket == None: |
| 123 continue |
| 124 if com == None or com == '': |
| 125 pass |
| 126 elif com != get_component(policy, bucket): |
| 127 continue |
| 128 |
| 129 com_committed += int(words[COMMITTED]) |
| 130 com_allocs += int(words[ALLOC_COUNT]) |
| 131 sys.stdout.write('%6d: %8s [%6d: %8s] @' % (int(words[ALLOC_COUNT])-
int(words[FREE_COUNT]), |
| 132 words[COMMITTED], |
| 133 int(words[ALLOC_COUNT])-
int(words[FREE_COUNT]), |
| 134 words[COMMITTED])) |
| 135 for addr in bucket.stacktrace: |
| 136 sys.stdout.write(' ' + addr) |
| 137 sys.stdout.write('\n') |
| 138 |
| 139 sys.stdout.write('MAPPED_LIBRARIES:\n') |
| 140 for l in mapping_lines: |
| 141 sys.stdout.write(l) |
| 142 |
| 143 def parse_stacktraces(self, buckets): |
| 144 ln = 0 |
| 145 while self.log_lines[ln] != "STACKTRACES:\n": |
| 146 ln += 1 |
| 147 while self.log_lines[ln].split()[0].isdigit() == False: |
| 148 ln += 1 |
| 149 lines_start = ln |
| 150 while ln < len(self.log_lines): |
| 151 words = self.log_lines[ln].split() |
| 152 if len(words) < BUCKET_ID + 1: |
| 153 break |
| 154 if words[BUCKET_ID - 1] != '@': |
| 155 break |
| 156 bucket = buckets[int(words[BUCKET_ID])] |
| 157 if bucket != None: |
| 158 for addr in bucket.stacktrace: |
| 159 addr_symbol_dict[addr] = "" |
| 160 ln += 1 |
| 161 lines_end = ln |
| 162 self.stacktrace_lines = self.log_lines[lines_start:lines_end] |
| 163 |
| 164 def parse_global_stats(self): |
| 165 ln = 0 |
| 166 while self.log_lines[ln] != "GLOBAL_STATS:\n": |
| 167 ln += 1 |
| 168 |
| 169 while self.log_lines[ln].split()[0] != "total": |
| 170 ln += 1 |
| 171 words = self.log_lines[ln].split() |
| 172 self.total_virtual = int(words[1]) |
| 173 self.total_committed = int(words[2]) |
| 174 |
| 175 while self.log_lines[ln].split()[0] != "file": |
| 176 ln += 1 |
| 177 words = self.log_lines[ln].split() |
| 178 self.filemapped_virtual = int(words[2]) |
| 179 self.filemapped_committed = int(words[3]) |
| 180 |
| 181 while self.log_lines[ln].split()[0] != "anonymous": |
| 182 ln += 1 |
| 183 words = self.log_lines[ln].split() |
| 184 self.anonymous_virtual = int(words[1]) |
| 185 self.anonymous_committed = int(words[2]) |
| 186 |
| 187 while self.log_lines[ln].split()[0] != "other": |
| 188 ln += 1 |
| 189 words = self.log_lines[ln].split() |
| 190 self.other_virtual = int(words[1]) |
| 191 self.other_committed = int(words[2]) |
| 192 |
| 193 while self.log_lines[ln].split()[0] != "mmap": |
| 194 ln += 1 |
| 195 words = self.log_lines[ln].split() |
| 196 self.mmap_virtual = int(words[1]) |
| 197 self.mmap_committed = int(words[2]) |
| 198 |
| 199 while self.log_lines[ln].split()[0] != "tcmalloc": |
| 200 ln += 1 |
| 201 words = self.log_lines[ln].split() |
| 202 self.tcmalloc_virtual = int(words[1]) |
| 203 self.tcmalloc_committed = int(words[2]) |
| 204 |
| 205 def parse_log(self, buckets): |
| 206 self.parse_global_stats() |
| 207 self.parse_stacktraces(buckets) |
| 208 |
| 209 def apply_policy(self, policy, buckets): |
| 210 """ Aggregate the total memory size of each component |
| 211 |
| 212 Iterate through all stacktraces and attribute them |
| 213 to one of the components based on the policy. |
| 214 It is important to apply policy in right order. |
| 215 """ |
| 216 |
| 217 sys.stderr.write('apply policy:%s\n' % (self.log_path)) |
| 218 sizes = dict() |
| 219 for c in components: |
| 220 sizes[c] = 0 |
| 221 |
| 222 for l in self.stacktrace_lines: |
| 223 words = l.split() |
| 224 bucket = buckets[int(words[BUCKET_ID])] |
| 225 component_match = get_component(policy, bucket) |
| 226 sizes[component_match] += int(words[COMMITTED]) |
| 227 |
| 228 if component_match[0:3] == 'tc-': |
| 229 sizes['tc-total-log'] += int(words[COMMITTED]) |
| 230 elif component_match[0:5] == 'mmap-': |
| 231 sizes['mmap-total-log'] += int(words[COMMITTED]) |
| 232 else: |
| 233 sizes['other-total-log'] += int(words[COMMITTED]) |
| 234 |
| 235 sizes['mmap-no-log'] = self.mmap_committed - sizes['mmap-total-log'] |
| 236 sizes['mmap-total-record'] = self.mmap_committed |
| 237 sizes['mmap-total-record-vm'] = self.mmap_virtual |
| 238 |
| 239 sizes['tc-no-log'] = self.tcmalloc_committed - sizes['tc-total-log'] |
| 240 sizes['tc-total-record'] = self.tcmalloc_committed |
| 241 sizes['tc-unused'] = sizes['mmap-tcmalloc'] - self.tcmalloc_committed |
| 242 sizes['tc-total'] = sizes['mmap-tcmalloc'] |
| 243 |
| 244 if sizes.has_key('total'): |
| 245 sizes['total'] = self.total_committed |
| 246 if sizes.has_key('filemapped'): |
| 247 sizes['filemapped'] = self.filemapped_committed |
| 248 if sizes.has_key('anonymous'): |
| 249 sizes['anonymous'] = self.anonymous_committed |
| 250 if sizes.has_key('other'): |
| 251 sizes['other'] = self.other_committed |
| 252 if sizes.has_key('total-vm'): |
| 253 sizes['total-vm'] = self.total_virtual |
| 254 if sizes.has_key('filemapped-vm'): |
| 255 sizes['filemapped-vm'] = self.filemapped_virtual |
| 256 if sizes.has_key('anonymous-vm'): |
| 257 sizes['anonymous-vm'] = self.anonymous_virtual |
| 258 if sizes.has_key('other-vm'): |
| 259 sizes['other-vm'] = self.other_virtual |
| 260 if sizes.has_key('unknown'): |
| 261 sizes['unknown'] = self.total_committed - self.mmap_committed |
| 262 if sizes.has_key('total-exclude-profiler'): |
| 263 sizes['total-exclude-profiler'] = self.total_committed - sizes['mmap-
profiler'] |
| 264 |
| 265 if sizes.has_key('hour'): |
| 266 sizes['hour'] = (self.log_time - logs[0].log_time)/60.0/60.0 |
| 267 if sizes.has_key('minute'): |
| 268 sizes['minute'] = (self.log_time - logs[0].log_time)/60.0 |
| 269 if sizes.has_key('second'): |
| 270 sizes['second'] = self.log_time - logs[0].log_time |
| 271 |
| 272 return sizes |
| 273 |
| 274 def expand(self, policy, buckets, com, depth): |
| 275 sizes = dict() |
| 276 |
| 277 for l in self.stacktrace_lines: |
| 278 words = l.split() |
| 279 bucket = buckets[int(words[BUCKET_ID])] |
| 280 component_match = get_component(policy, bucket) |
| 281 if component_match == com: |
| 282 a = '' |
| 283 for addr in bucket.stacktrace[1 : min(len(bucket.stacktrace), 1
+ depth)]: |
| 284 a += addr_symbol_dict[addr] + ' ' |
| 285 if sizes.has_key(a) == False: |
| 286 sizes[a] = 0 |
| 287 sizes[a] += int(words[COMMITTED]) |
| 288 |
| 289 s = sizes.items() |
| 290 s.sort(key=get_val,reverse=True) |
| 291 total = 0 |
| 292 for l in s: |
| 293 sys.stdout.write('%10d %s\n' % (l[1], l[0])) |
| 294 total += l[1] |
| 295 sys.stderr.write('total: %d\n' % (total)) |
| 296 |
| 297 |
| 298 def get_symbols(symbol_path, mapping_lines): |
| 299 symbol_f = open(symbol_path, 'a+') |
| 300 symbol_lines = symbol_f.readlines() |
| 301 |
| 302 if(len(symbol_lines) == 0): |
| 303 pprof_in = open("/tmp/maps", 'w+') |
| 304 pprof_out = open("/tmp/symbols", 'w+') |
| 305 |
| 306 for l in mapping_lines: |
| 307 pprof_in.write(l) |
| 308 |
| 309 addr_list = addr_symbol_dict.keys() |
| 310 addr_list.sort() |
| 311 for key in addr_list: |
| 312 pprof_in.write(key + "\n") |
| 313 |
| 314 pprof_in.seek(0) |
| 315 |
| 316 p = subprocess.Popen( |
| 317 'pprof --symbols %s' % (chrome_path), |
| 318 shell='/usr/bash', stdin=pprof_in, stdout=pprof_out) |
| 319 p.wait() |
| 320 |
| 321 pprof_out.seek(0) |
| 322 symbols = pprof_out.readlines() |
| 323 i = 0 |
| 324 for key in addr_list: |
| 325 addr_symbol_dict[key] = symbols[i].strip() |
| 326 i += 1 |
| 327 |
| 328 pprof_in.close() |
| 329 pprof_out.close() |
| 330 |
| 331 for a in addr_symbol_dict.items(): |
| 332 symbol_f.write(a[0] + ' ' + a[1] + '\n') |
| 333 else: |
| 334 for l in symbol_lines: |
| 335 addr_symbol_dict[l.split()[0]] = l.split()[1] |
| 336 |
| 337 symbol_f.close() |
| 338 |
| 339 |
| 340 def parse_policy(policy_path): |
| 341 """ Parses policy file |
| 342 |
| 343 A policy file contains component's names and their |
| 344 stacktrace pattern written in regular expression. |
| 345 Those patterns are matched against each symbols of |
| 346 each stacktraces in the order written in the policy file |
| 347 |
| 348 Args: |
| 349 policy file path |
| 350 Returns: |
| 351 A list containing component's name and its regex object |
| 352 """ |
| 353 policy_f = open(policy_path, mode='r') |
| 354 policy_lines = policy_f.readlines(); |
| 355 policy = list() |
| 356 for l in policy_lines: |
| 357 name = l.split()[0] |
| 358 if name[0] == '#': |
| 359 continue |
| 360 pattern = l[len(name) : len(l)].strip() |
| 361 if pattern != 'default': |
| 362 policy.append([name, re.compile(pattern + r'\Z')]) |
| 363 if components.count(name) == 0: |
| 364 components.append(name) |
| 365 |
| 366 return policy |
| 367 |
| 368 action = sys.argv[1] |
| 369 |
| 370 if (action in ['--csv','--expand','--list','--stacktrace','--pprof']) == False: |
| 371 sys.stderr.write( |
| 372 """Usage: |
| 373 %s [options] <chrome-binary-path> <policy-file> <profile> [component-name] [dept
h] |
| 374 |
| 375 Options: |
| 376 --csv Output result in csv format |
| 377 --stacktrace Convert raw address to symbol names |
| 378 --list Lists components and their sizes |
| 379 --expand Show all stacktraces in the specified component |
| 380 of given depth with their sizes |
| 381 --pprof Format the profile file so it can be processed
by pprof |
| 382 |
| 383 Examples: |
| 384 dmprof --csv out/Debug/chrome ./policy o1211/heap.hprof.01221.0001.heap > render
er.csv |
| 385 dmprof --list out/Debug/chrome ./policy o1211/heap.hprof.01221.0101.heap |
| 386 dmprof --expand out/Debug/chrome ./policy o1211/heap.hprof.01221.0101.heap tc-we
bkit 4 |
| 387 dmprof --pprof out/Debug/chrome ./policy o1211/heap.hprof.01221.0101.heap > for_
pprof |
| 388 |
| 389 |
| 390 """ % (sys.argv[0])) |
| 391 sys.exit(1) |
| 392 |
| 393 chrome_path = sys.argv[2] |
| 394 policy_path = sys.argv[3] |
| 395 log_path = sys.argv[4] |
| 396 |
| 397 sys.stderr.write('parsing a policy file\n') |
| 398 policy = parse_policy(policy_path) |
| 399 |
| 400 p = re.compile('\.[0-9][0-9][0-9][0-9]\.heap') |
| 401 prefix = p.sub('',log_path) |
| 402 symbol_path = prefix + '.symbols' |
| 403 |
| 404 sys.stderr.write('parsing the maps file\n') |
| 405 maps_path = prefix + '.maps' |
| 406 maps_f = open(maps_path, mode='r') |
| 407 maps_lines = maps_f.readlines() |
| 408 |
| 409 # Reading buckets |
| 410 sys.stderr.write('parsing the bucket file\n') |
| 411 buckets = [None for i in range(0, 10000000)] |
| 412 bucket_count = 0 |
| 413 #n = int(log_path[len(log_path) - 9 : len(log_path) - 5]) |
| 414 n = 0 |
| 415 while True: |
| 416 buckets_path = '%s.%04d.buckets'% (prefix, n) |
| 417 if os.path.exists(buckets_path) == False: |
| 418 if n > 10: |
| 419 break |
| 420 else: |
| 421 n+=1 |
| 422 continue |
| 423 sys.stderr.write('reading buckets from %s\n' % (buckets_path)) |
| 424 buckets_f = open(buckets_path, mode='r') |
| 425 for l in buckets_f.readlines(): |
| 426 words = l.split() |
| 427 st = list() |
| 428 for i in range(1, len(words)): |
| 429 st.append(words[i]) |
| 430 buckets[int(words[0])] = Bucket(st) |
| 431 bucket_count+=1 |
| 432 buckets_f.close() |
| 433 n+=1 |
| 434 |
| 435 sys.stderr.write('the number buckets: %d\n' % (bucket_count)) |
| 436 |
| 437 log_path_list = list() |
| 438 log_path_list.append(log_path) |
| 439 |
| 440 if action == '--csv': |
| 441 # search for the sequence of files |
| 442 n = int(log_path[len(log_path) - 9 : len(log_path) - 5]) |
| 443 n += 1 # skip current file |
| 444 while True: |
| 445 p = '%s.%04d.heap'% (prefix, n) |
| 446 if os.path.exists(p): |
| 447 log_path_list.append(p) |
| 448 else: |
| 449 break |
| 450 n += 1 |
| 451 |
| 452 logs = list() |
| 453 for path in log_path_list: |
| 454 logs.append(Log(path, buckets)) |
| 455 |
| 456 sys.stderr.write('getting symbols\n') |
| 457 get_symbols(symbol_path, maps_lines) |
| 458 |
| 459 if action == '--stacktrace': |
| 460 logs[0].dump_stacktrace(buckets) |
| 461 |
| 462 elif action == '--csv': |
| 463 sys.stdout.write(','.join(components)) |
| 464 sys.stdout.write('\n') |
| 465 |
| 466 for log in logs: |
| 467 component_sizes = log.apply_policy(policy, buckets) |
| 468 s = list() |
| 469 for c in components: |
| 470 if c in ['hour', 'minute', 'second']: |
| 471 s.append('%05.5f' % (component_sizes[c])) |
| 472 else: |
| 473 s.append('%05.5f' % (component_sizes[c]/1024./1024.)) |
| 474 sys.stdout.write(','.join(s)) |
| 475 sys.stdout.write('\n') |
| 476 |
| 477 elif action == '--list': |
| 478 component_sizes = logs[0].apply_policy(policy, buckets) |
| 479 for c in components: |
| 480 if c in ['hour', 'minute', 'second']: |
| 481 sys.stdout.write('%30s %10.3f\n' % (c, component_sizes[c])) |
| 482 else: |
| 483 sys.stdout.write('%30s %10.3f\n' % (c, component_sizes[c]/1024./1024.)) |
| 484 elif action == '--expand': |
| 485 com_name = sys.argv[5] |
| 486 depth = sys.argv[6] |
| 487 logs[0].expand(policy, buckets, com_name, int(depth)) |
| 488 elif action == '--pprof': |
| 489 if len(sys.argv) > 5: |
| 490 logs[0].dump_for_pprof(policy, buckets, maps_lines, sys.argv[5]) |
| 491 else: |
| 492 logs[0].dump_for_pprof(policy, buckets, maps_lines, None) |
OLD | NEW |