third_party/tcmalloc/chromium/src/dmprof - Issue 7865021: Deep-Memory-Profiler (DMP) implementation

Unified Diff: third_party/tcmalloc/chromium/src/dmprof

Issue 7865021: Deep-Memory-Profiler (DMP) implementation (Closed) Base URL: http://git.chromium.org/git/chromium.git@trunk

Patch Set: Fixed bugs Created 9 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « third_party/tcmalloc/chromium/src/deep-memory-profiler.cc ('k') | third_party/tcmalloc/chromium/src/gmail-send-self-test » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: third_party/tcmalloc/chromium/src/dmprof

diff --git a/third_party/tcmalloc/chromium/src/dmprof b/third_party/tcmalloc/chromium/src/dmprof

new file mode 100755

index 0000000000000000000000000000000000000000..cfe7426ee4685ddbdca0a2914b3b2c5140cfcbab

--- /dev/null

+++ b/third_party/tcmalloc/chromium/src/dmprof

@@ -0,0 +1,492 @@

+#!/usr/bin/python

+import sys

+import subprocess

+import re

+import os

+BUCKET_ID = 5

+VIRTUAL = 0

+COMMITTED = 1

+ALLOC_COUNT = 2

+FREE_COUNT = 3

+addr_symbol_dict = dict()

+components = list()

+action = ''

+def get_val(l):

+ return l[1]

+def get_component(policy, bucket):

+ if bucket == None:

+ return 'no-bucket'

+ if bucket.component != '':

+ return bucket.component

+ for name, condition in policy:

+ st = ''

+ for addr in bucket.stacktrace:

+ st += addr_symbol_dict[addr] + ' '

+ st = st.strip()

+ if condition.match(st):

+ bucket.component = name

+ return name

+ assert False

+class Bucket(object):

+ stacktrace = list()

+ component = ''

+ def __init__(self, st):

+ self.stacktrace = st

+ component = ''

+class Log(object):

+ log_path = ''

+ log_lines = list()

+ stacktrace_lines = list()

+ total_committed = 0

+ total_virtual = 0

+ filemapped_committed = 0

+ filemapped_virtual = 0

+ anonymous_committed = 0

+ anonymous_virtual = 0

+ other_committed = 0

+ other_virtual = 0

+ mmap_committed = 0

+ mmap_virtual = 0

+ tcmalloc_committed = 0

+ tcmalloc_virtual = 0

+ log_time = 0

+ def __init__(self, log_path, buckets):

+ self.log_path = log_path

+ log = open(self.log_path, mode='r')

+ self.log_lines = log.readlines();

+ log.close()

+ sys.stderr.write('parsing a log file:%s\n' % (log_path))

+ self.parse_log(buckets)

+ self.log_time = os.stat(self.log_path).st_mtime

+ def dump_stacktrace(self, buckets):

+ for l in self.stacktrace_lines:

+ words = l.split()

+ bucket = buckets[int(words[BUCKET_ID])]

+ if bucket == None:

+ continue

+ for i in range(0, BUCKET_ID - 1):

+ sys.stdout.write(words[i] + ' ')

+ for addr in bucket.stacktrace:

+ if addr_symbol_dict.has_key(addr):

+ if addr_symbol_dict[addr] != '':

+ sys.stdout.write(addr_symbol_dict[addr] + ' ')

+ else:

+ sys.stdout.write(addr + ' ')

+ else:

+ sys.stdout.write(addr + ' ')

+ sys.stdout.write('\n')

+ def dump_for_pprof(self, policy, buckets, mapping_lines, com):

+ """ Convert the log file so it can be processed by pprof

+ Args:

+ com: component name for filtering

+ """

+ sys.stdout.write('heap profile: ')

+ com_committed = 0

+ com_allocs = 0

+ for l in self.stacktrace_lines:

+ words = l.split()

+ bucket = buckets[int(words[BUCKET_ID])]

+ if bucket == None:

+ continue

+ if com == None or com == '':

+ pass

+ elif com != get_component(policy, bucket):

+ continue

+ com_committed += int(words[COMMITTED])

+ com_allocs += int(words[ALLOC_COUNT])-int(words[FREE_COUNT])

+ sys.stdout.write('%6d: %8s [%6d: %8s] @ heapprofile\n' % (com_allocs,

+ com_committed,

+ com_allocs,

+ com_committed))

+ for l in self.stacktrace_lines:

+ words = l.split()

+ bucket = buckets[int(words[BUCKET_ID])]

+ if bucket == None:

+ continue

+ if com == None or com == '':

+ pass

+ elif com != get_component(policy, bucket):

+ continue

+ com_committed += int(words[COMMITTED])

+ com_allocs += int(words[ALLOC_COUNT])

+ sys.stdout.write('%6d: %8s [%6d: %8s] @' % (int(words[ALLOC_COUNT])-int(words[FREE_COUNT]),

+ words[COMMITTED],

+ int(words[ALLOC_COUNT])-int(words[FREE_COUNT]),

+ words[COMMITTED]))

+ for addr in bucket.stacktrace:

+ sys.stdout.write(' ' + addr)

+ sys.stdout.write('\n')

+ sys.stdout.write('MAPPED_LIBRARIES:\n')

+ for l in mapping_lines:

+ sys.stdout.write(l)

+ def parse_stacktraces(self, buckets):

+ ln = 0

+ while self.log_lines[ln] != "STACKTRACES:\n":

+ ln += 1

+ while self.log_lines[ln].split()[0].isdigit() == False:

+ ln += 1

+ lines_start = ln

+ while ln < len(self.log_lines):

+ words = self.log_lines[ln].split()

+ if len(words) < BUCKET_ID + 1:

+ break

+ if words[BUCKET_ID - 1] != '@':

+ break

+ bucket = buckets[int(words[BUCKET_ID])]

+ if bucket != None:

+ for addr in bucket.stacktrace:

+ addr_symbol_dict[addr] = ""

+ ln += 1

+ lines_end = ln

+ self.stacktrace_lines = self.log_lines[lines_start:lines_end]

+ def parse_global_stats(self):

+ ln = 0

+ while self.log_lines[ln] != "GLOBAL_STATS:\n":

+ ln += 1

+ while self.log_lines[ln].split()[0] != "total":

+ ln += 1

+ words = self.log_lines[ln].split()

+ self.total_virtual = int(words[1])

+ self.total_committed = int(words[2])

+ while self.log_lines[ln].split()[0] != "file":

+ ln += 1

+ words = self.log_lines[ln].split()

+ self.filemapped_virtual = int(words[2])

+ self.filemapped_committed = int(words[3])

+ while self.log_lines[ln].split()[0] != "anonymous":

+ ln += 1

+ words = self.log_lines[ln].split()

+ self.anonymous_virtual = int(words[1])

+ self.anonymous_committed = int(words[2])

+ while self.log_lines[ln].split()[0] != "other":

+ ln += 1

+ words = self.log_lines[ln].split()

+ self.other_virtual = int(words[1])

+ self.other_committed = int(words[2])

+ while self.log_lines[ln].split()[0] != "mmap":

+ ln += 1

+ words = self.log_lines[ln].split()

+ self.mmap_virtual = int(words[1])

+ self.mmap_committed = int(words[2])

+ while self.log_lines[ln].split()[0] != "tcmalloc":

+ ln += 1

+ words = self.log_lines[ln].split()

+ self.tcmalloc_virtual = int(words[1])

+ self.tcmalloc_committed = int(words[2])

+ def parse_log(self, buckets):

+ self.parse_global_stats()

+ self.parse_stacktraces(buckets)

+ def apply_policy(self, policy, buckets):

+ """ Aggregate the total memory size of each component

+ Iterate through all stacktraces and attribute them

+ to one of the components based on the policy.

+ It is important to apply policy in right order.

+ """

+ sys.stderr.write('apply policy:%s\n' % (self.log_path))

+ sizes = dict()

+ for c in components:

+ sizes[c] = 0

+ for l in self.stacktrace_lines:

+ words = l.split()

+ bucket = buckets[int(words[BUCKET_ID])]

+ component_match = get_component(policy, bucket)

+ sizes[component_match] += int(words[COMMITTED])

+ if component_match[0:3] == 'tc-':

+ sizes['tc-total-log'] += int(words[COMMITTED])

+ elif component_match[0:5] == 'mmap-':

+ sizes['mmap-total-log'] += int(words[COMMITTED])

+ else:

+ sizes['other-total-log'] += int(words[COMMITTED])

+ sizes['mmap-no-log'] = self.mmap_committed - sizes['mmap-total-log']

+ sizes['mmap-total-record'] = self.mmap_committed

+ sizes['mmap-total-record-vm'] = self.mmap_virtual

+ sizes['tc-no-log'] = self.tcmalloc_committed - sizes['tc-total-log']

+ sizes['tc-total-record'] = self.tcmalloc_committed

+ sizes['tc-unused'] = sizes['mmap-tcmalloc'] - self.tcmalloc_committed

+ sizes['tc-total'] = sizes['mmap-tcmalloc']

+ if sizes.has_key('total'):

+ sizes['total'] = self.total_committed

+ if sizes.has_key('filemapped'):

+ sizes['filemapped'] = self.filemapped_committed

+ if sizes.has_key('anonymous'):

+ sizes['anonymous'] = self.anonymous_committed

+ if sizes.has_key('other'):

+ sizes['other'] = self.other_committed

+ if sizes.has_key('total-vm'):

+ sizes['total-vm'] = self.total_virtual

+ if sizes.has_key('filemapped-vm'):

+ sizes['filemapped-vm'] = self.filemapped_virtual

+ if sizes.has_key('anonymous-vm'):

+ sizes['anonymous-vm'] = self.anonymous_virtual

+ if sizes.has_key('other-vm'):

+ sizes['other-vm'] = self.other_virtual

+ if sizes.has_key('unknown'):

+ sizes['unknown'] = self.total_committed - self.mmap_committed

+ if sizes.has_key('total-exclude-profiler'):

+ sizes['total-exclude-profiler'] = self.total_committed - sizes['mmap-profiler']

+ if sizes.has_key('hour'):

+ sizes['hour'] = (self.log_time - logs[0].log_time)/60.0/60.0

+ if sizes.has_key('minute'):

+ sizes['minute'] = (self.log_time - logs[0].log_time)/60.0

+ if sizes.has_key('second'):

+ sizes['second'] = self.log_time - logs[0].log_time

+ return sizes

+ def expand(self, policy, buckets, com, depth):

+ sizes = dict()

+ for l in self.stacktrace_lines:

+ words = l.split()

+ bucket = buckets[int(words[BUCKET_ID])]

+ component_match = get_component(policy, bucket)

+ if component_match == com:

+ a = ''

+ for addr in bucket.stacktrace[1 : min(len(bucket.stacktrace), 1 + depth)]:

+ a += addr_symbol_dict[addr] + ' '

+ if sizes.has_key(a) == False:

+ sizes[a] = 0

+ sizes[a] += int(words[COMMITTED])

+ s = sizes.items()

+ s.sort(key=get_val,reverse=True)

+ total = 0

+ for l in s:

+ sys.stdout.write('%10d %s\n' % (l[1], l[0]))

+ total += l[1]

+ sys.stderr.write('total: %d\n' % (total))

+def get_symbols(symbol_path, mapping_lines):

+ symbol_f = open(symbol_path, 'a+')

+ symbol_lines = symbol_f.readlines()

+ if(len(symbol_lines) == 0):

+ pprof_in = open("/tmp/maps", 'w+')

+ pprof_out = open("/tmp/symbols", 'w+')

+ for l in mapping_lines:

+ pprof_in.write(l)

+ addr_list = addr_symbol_dict.keys()

+ addr_list.sort()

+ for key in addr_list:

+ pprof_in.write(key + "\n")

+ pprof_in.seek(0)

+ p = subprocess.Popen(

+ 'pprof --symbols %s' % (chrome_path),

+ shell='/usr/bash', stdin=pprof_in, stdout=pprof_out)

+ p.wait()

+ pprof_out.seek(0)

+ symbols = pprof_out.readlines()

+ i = 0

+ for key in addr_list:

+ addr_symbol_dict[key] = symbols[i].strip()

+ i += 1

+ pprof_in.close()

+ pprof_out.close()

+ for a in addr_symbol_dict.items():

+ symbol_f.write(a[0] + ' ' + a[1] + '\n')

+ else:

+ for l in symbol_lines:

+ addr_symbol_dict[l.split()[0]] = l.split()[1]

+ symbol_f.close()

+def parse_policy(policy_path):

+ """ Parses policy file

+ A policy file contains component's names and their

+ stacktrace pattern written in regular expression.

+ Those patterns are matched against each symbols of

+ each stacktraces in the order written in the policy file

+ Args:

+ policy file path

+ Returns:

+ A list containing component's name and its regex object

+ """

+ policy_f = open(policy_path, mode='r')

+ policy_lines = policy_f.readlines();

+ policy = list()

+ for l in policy_lines:

+ name = l.split()[0]

+ if name[0] == '#':

+ continue

+ pattern = l[len(name) : len(l)].strip()

+ if pattern != 'default':

+ policy.append([name, re.compile(pattern + r'\Z')])

+ if components.count(name) == 0:

+ components.append(name)

+ return policy

+action = sys.argv[1]

+if (action in ['--csv','--expand','--list','--stacktrace','--pprof']) == False:

+ sys.stderr.write(

+"""Usage:

+%s [options] <chrome-binary-path> <policy-file> <profile> [component-name] [depth]

+Options:

+ --csv Output result in csv format

+ --stacktrace Convert raw address to symbol names

+ --list Lists components and their sizes

+ --expand Show all stacktraces in the specified component

+ of given depth with their sizes

+ --pprof Format the profile file so it can be processed by pprof

+Examples:

+dmprof --csv out/Debug/chrome ./policy o1211/heap.hprof.01221.0001.heap > renderer.csv

+dmprof --list out/Debug/chrome ./policy o1211/heap.hprof.01221.0101.heap

+dmprof --expand out/Debug/chrome ./policy o1211/heap.hprof.01221.0101.heap tc-webkit 4

+dmprof --pprof out/Debug/chrome ./policy o1211/heap.hprof.01221.0101.heap > for_pprof

+""" % (sys.argv[0]))

+ sys.exit(1)

+chrome_path = sys.argv[2]

+policy_path = sys.argv[3]

+log_path = sys.argv[4]

+sys.stderr.write('parsing a policy file\n')

+policy = parse_policy(policy_path)

+p = re.compile('\.[0-9][0-9][0-9][0-9]\.heap')

+prefix = p.sub('',log_path)

+symbol_path = prefix + '.symbols'

+sys.stderr.write('parsing the maps file\n')

+maps_path = prefix + '.maps'

+maps_f = open(maps_path, mode='r')

+maps_lines = maps_f.readlines()

+# Reading buckets

+sys.stderr.write('parsing the bucket file\n')

+buckets = [None for i in range(0, 10000000)]

+bucket_count = 0

+#n = int(log_path[len(log_path) - 9 : len(log_path) - 5])

+n = 0

+while True:

+ buckets_path = '%s.%04d.buckets'% (prefix, n)

+ if os.path.exists(buckets_path) == False:

+ if n > 10:

+ break

+ else:

+ n+=1

+ continue

+ sys.stderr.write('reading buckets from %s\n' % (buckets_path))

+ buckets_f = open(buckets_path, mode='r')

+ for l in buckets_f.readlines():

+ words = l.split()

+ st = list()

+ for i in range(1, len(words)):

+ st.append(words[i])

+ buckets[int(words[0])] = Bucket(st)

+ bucket_count+=1

+ buckets_f.close()

+ n+=1

+sys.stderr.write('the number buckets: %d\n' % (bucket_count))

+log_path_list = list()

+log_path_list.append(log_path)

+if action == '--csv':

+ # search for the sequence of files

+ n = int(log_path[len(log_path) - 9 : len(log_path) - 5])

+ n += 1 # skip current file

+ while True:

+ p = '%s.%04d.heap'% (prefix, n)

+ if os.path.exists(p):

+ log_path_list.append(p)

+ else:

+ break

+ n += 1

+logs = list()

+for path in log_path_list:

+ logs.append(Log(path, buckets))

+sys.stderr.write('getting symbols\n')

+get_symbols(symbol_path, maps_lines)

+if action == '--stacktrace':

+ logs[0].dump_stacktrace(buckets)

+elif action == '--csv':

+ sys.stdout.write(','.join(components))

+ sys.stdout.write('\n')

+ for log in logs:

+ component_sizes = log.apply_policy(policy, buckets)

+ s = list()

+ for c in components:

+ if c in ['hour', 'minute', 'second']:

+ s.append('%05.5f' % (component_sizes[c]))

+ else:

+ s.append('%05.5f' % (component_sizes[c]/1024./1024.))

+ sys.stdout.write(','.join(s))

+ sys.stdout.write('\n')

+elif action == '--list':

+ component_sizes = logs[0].apply_policy(policy, buckets)

+ for c in components:

+ if c in ['hour', 'minute', 'second']:

+ sys.stdout.write('%30s %10.3f\n' % (c, component_sizes[c]))

+ else:

+ sys.stdout.write('%30s %10.3f\n' % (c, component_sizes[c]/1024./1024.))

+elif action == '--expand':

+ com_name = sys.argv[5]

+ depth = sys.argv[6]

+ logs[0].expand(policy, buckets, com_name, int(depth))

+elif action == '--pprof':

+ if len(sys.argv) > 5:

+ logs[0].dump_for_pprof(policy, buckets, maps_lines, sys.argv[5])

+ else:

+ logs[0].dump_for_pprof(policy, buckets, maps_lines, None)