third_party/tcmalloc/chromium/src/dmprof - Issue 7865021: Deep-Memory-Profiler (DMP) implementation

Side by Side Diff: third_party/tcmalloc/chromium/src/dmprof

Issue 7865021: Deep-Memory-Profiler (DMP) implementation (Closed) Base URL: http://git.chromium.org/git/chromium.git@trunk

Patch Set: Fixed bugs Created 9 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 #!/usr/bin/python

	2 # Copyright (c) 2011 The Chromium Authors. All rights reserved.

	3

	4 import sys

	5 import subprocess

	6 import re

	7 import os

	8

	9 BUCKET_ID = 5

	10 VIRTUAL = 0

	11 COMMITTED = 1

	12 ALLOC_COUNT = 2

	13 FREE_COUNT = 3

	14 addr_symbol_dict = dict()

	15 components = list()

	16 action = ''

	17

	18 def get_val(l):

	19 return l[1]

	20

	21

	22 def get_component(policy, bucket):

	23 if bucket == None:

	24 return 'no-bucket'

	25 if bucket.component != '':

	26 return bucket.component

	27 for name, condition in policy:

	28 st = ''

	29 for addr in bucket.stacktrace:

	30 st += addr_symbol_dict[addr] + ' '

	31 st = st.strip()

	32 if condition.match(st):

	33 bucket.component = name

	34 return name

	35

	36 assert False

	37

	38 class Bucket(object):

	39 stacktrace = list()

	40 component = ''

	41

	42 def __init__(self, st):

	43 self.stacktrace = st

	44 component = ''

	45

	46 class Log(object):

	47 log_path = ''

	48 log_lines = list()

	49 stacktrace_lines = list()

	50 total_committed = 0

	51 total_virtual = 0

	52 filemapped_committed = 0

	53 filemapped_virtual = 0

	54 anonymous_committed = 0

	55 anonymous_virtual = 0

	56 other_committed = 0

	57 other_virtual = 0

	58 mmap_committed = 0

	59 mmap_virtual = 0

	60 tcmalloc_committed = 0

	61 tcmalloc_virtual = 0

	62 log_time = 0

	63

	64 def __init__(self, log_path, buckets):

	65 self.log_path = log_path

	66 log = open(self.log_path, mode='r')

	67 self.log_lines = log.readlines();

	68 log.close()

	69 sys.stderr.write('parsing a log file:%s\n' % (log_path))

	70 self.parse_log(buckets)

	71 self.log_time = os.stat(self.log_path).st_mtime

	72

	73

	74 def dump_stacktrace(self, buckets):

	75 for l in self.stacktrace_lines:

	76 words = l.split()

	77 bucket = buckets[int(words[BUCKET_ID])]

	78 if bucket == None:

	79 continue

	80 for i in range(0, BUCKET_ID - 1):

	81 sys.stdout.write(words[i] + ' ')

	82 for addr in bucket.stacktrace:

	83 if addr_symbol_dict.has_key(addr):

	84 if addr_symbol_dict[addr] != '':

	85 sys.stdout.write(addr_symbol_dict[addr] + ' ')

	86 else:

	87 sys.stdout.write(addr + ' ')

	88 else:

	89 sys.stdout.write(addr + ' ')

	90 sys.stdout.write('\n')

	91

	92

	93 def dump_for_pprof(self, policy, buckets, mapping_lines, com):

	94 """ Convert the log file so it can be processed by pprof

	95 Args:

	96 com: component name for filtering

	97 """

	98 sys.stdout.write('heap profile: ')

	99 com_committed = 0

	100 com_allocs = 0

	101 for l in self.stacktrace_lines:

	102 words = l.split()

	103 bucket = buckets[int(words[BUCKET_ID])]

	104 if bucket == None:

	105 continue

	106 if com == None or com == '':

	107 pass

	108 elif com != get_component(policy, bucket):

	109 continue

	110

	111 com_committed += int(words[COMMITTED])

	112 com_allocs += int(words[ALLOC_COUNT])-int(words[FREE_COUNT])

	113

	114 sys.stdout.write('%6d: %8s [%6d: %8s] @ heapprofile\n' % (com_allocs,

	115 com_committed,

	116 com_allocs,

	117 com_committed) )

	118

	119 for l in self.stacktrace_lines:

	120 words = l.split()

	121 bucket = buckets[int(words[BUCKET_ID])]

	122 if bucket == None:

	123 continue

	124 if com == None or com == '':

	125 pass

	126 elif com != get_component(policy, bucket):

	127 continue

	128

	129 com_committed += int(words[COMMITTED])

	130 com_allocs += int(words[ALLOC_COUNT])

	131 sys.stdout.write('%6d: %8s [%6d: %8s] @' % (int(words[ALLOC_COUNT])- int(words[FREE_COUNT]),

	132 words[COMMITTED],

	133 int(words[ALLOC_COUNT])- int(words[FREE_COUNT]),

	134 words[COMMITTED]))

	135 for addr in bucket.stacktrace:

	136 sys.stdout.write(' ' + addr)

	137 sys.stdout.write('\n')

	138

	139 sys.stdout.write('MAPPED_LIBRARIES:\n')

	140 for l in mapping_lines:

	141 sys.stdout.write(l)

	142

	143 def parse_stacktraces(self, buckets):

	144 ln = 0

	145 while self.log_lines[ln] != "STACKTRACES:\n":

	146 ln += 1

	147 while self.log_lines[ln].split()[0].isdigit() == False:

	148 ln += 1

	149 lines_start = ln

	150 while ln < len(self.log_lines):

	151 words = self.log_lines[ln].split()

	152 if len(words) < BUCKET_ID + 1:

	153 break

	154 if words[BUCKET_ID - 1] != '@':

	155 break

	156 bucket = buckets[int(words[BUCKET_ID])]

	157 if bucket != None:

	158 for addr in bucket.stacktrace:

	159 addr_symbol_dict[addr] = ""

	160 ln += 1

	161 lines_end = ln

	162 self.stacktrace_lines = self.log_lines[lines_start:lines_end]

	163

	164 def parse_global_stats(self):

	165 ln = 0

	166 while self.log_lines[ln] != "GLOBAL_STATS:\n":

	167 ln += 1

	168

	169 while self.log_lines[ln].split()[0] != "total":

	170 ln += 1

	171 words = self.log_lines[ln].split()

	172 self.total_virtual = int(words[1])

	173 self.total_committed = int(words[2])

	174

	175 while self.log_lines[ln].split()[0] != "file":

	176 ln += 1

	177 words = self.log_lines[ln].split()

	178 self.filemapped_virtual = int(words[2])

	179 self.filemapped_committed = int(words[3])

	180

	181 while self.log_lines[ln].split()[0] != "anonymous":

	182 ln += 1

	183 words = self.log_lines[ln].split()

	184 self.anonymous_virtual = int(words[1])

	185 self.anonymous_committed = int(words[2])

	186

	187 while self.log_lines[ln].split()[0] != "other":

	188 ln += 1

	189 words = self.log_lines[ln].split()

	190 self.other_virtual = int(words[1])

	191 self.other_committed = int(words[2])

	192

	193 while self.log_lines[ln].split()[0] != "mmap":

	194 ln += 1

	195 words = self.log_lines[ln].split()

	196 self.mmap_virtual = int(words[1])

	197 self.mmap_committed = int(words[2])

	198

	199 while self.log_lines[ln].split()[0] != "tcmalloc":

	200 ln += 1

	201 words = self.log_lines[ln].split()

	202 self.tcmalloc_virtual = int(words[1])

	203 self.tcmalloc_committed = int(words[2])

	204

	205 def parse_log(self, buckets):

	206 self.parse_global_stats()

	207 self.parse_stacktraces(buckets)

	208

	209 def apply_policy(self, policy, buckets):

	210 """ Aggregate the total memory size of each component

	211

	212 Iterate through all stacktraces and attribute them

	213 to one of the components based on the policy.

	214 It is important to apply policy in right order.

	215 """

	216

	217 sys.stderr.write('apply policy:%s\n' % (self.log_path))

	218 sizes = dict()

	219 for c in components:

	220 sizes[c] = 0

	221

	222 for l in self.stacktrace_lines:

	223 words = l.split()

	224 bucket = buckets[int(words[BUCKET_ID])]

	225 component_match = get_component(policy, bucket)

	226 sizes[component_match] += int(words[COMMITTED])

	227

	228 if component_match[0:3] == 'tc-':

	229 sizes['tc-total-log'] += int(words[COMMITTED])

	230 elif component_match[0:5] == 'mmap-':

	231 sizes['mmap-total-log'] += int(words[COMMITTED])

	232 else:

	233 sizes['other-total-log'] += int(words[COMMITTED])

	234

	235 sizes['mmap-no-log'] = self.mmap_committed - sizes['mmap-total-log']

	236 sizes['mmap-total-record'] = self.mmap_committed

	237 sizes['mmap-total-record-vm'] = self.mmap_virtual

	238

	239 sizes['tc-no-log'] = self.tcmalloc_committed - sizes['tc-total-log']

	240 sizes['tc-total-record'] = self.tcmalloc_committed

	241 sizes['tc-unused'] = sizes['mmap-tcmalloc'] - self.tcmalloc_committed

	242 sizes['tc-total'] = sizes['mmap-tcmalloc']

	243

	244 if sizes.has_key('total'):

	245 sizes['total'] = self.total_committed

	246 if sizes.has_key('filemapped'):

	247 sizes['filemapped'] = self.filemapped_committed

	248 if sizes.has_key('anonymous'):

	249 sizes['anonymous'] = self.anonymous_committed

	250 if sizes.has_key('other'):

	251 sizes['other'] = self.other_committed

	252 if sizes.has_key('total-vm'):

	253 sizes['total-vm'] = self.total_virtual

	254 if sizes.has_key('filemapped-vm'):

	255 sizes['filemapped-vm'] = self.filemapped_virtual

	256 if sizes.has_key('anonymous-vm'):

	257 sizes['anonymous-vm'] = self.anonymous_virtual

	258 if sizes.has_key('other-vm'):

	259 sizes['other-vm'] = self.other_virtual

	260 if sizes.has_key('unknown'):

	261 sizes['unknown'] = self.total_committed - self.mmap_committed

	262 if sizes.has_key('total-exclude-profiler'):

	263 sizes['total-exclude-profiler'] = self.total_committed - sizes['mmap- profiler']

	264

	265 if sizes.has_key('hour'):

	266 sizes['hour'] = (self.log_time - logs[0].log_time)/60.0/60.0

	267 if sizes.has_key('minute'):

	268 sizes['minute'] = (self.log_time - logs[0].log_time)/60.0

	269 if sizes.has_key('second'):

	270 sizes['second'] = self.log_time - logs[0].log_time

	271

	272 return sizes

	273

	274 def expand(self, policy, buckets, com, depth):

	275 sizes = dict()

	276

	277 for l in self.stacktrace_lines:

	278 words = l.split()

	279 bucket = buckets[int(words[BUCKET_ID])]

	280 component_match = get_component(policy, bucket)

	281 if component_match == com:

	282 a = ''

	283 for addr in bucket.stacktrace[1 : min(len(bucket.stacktrace), 1 + depth)]:

	284 a += addr_symbol_dict[addr] + ' '

	285 if sizes.has_key(a) == False:

	286 sizes[a] = 0

	287 sizes[a] += int(words[COMMITTED])

	288

	289 s = sizes.items()

	290 s.sort(key=get_val,reverse=True)

	291 total = 0

	292 for l in s:

	293 sys.stdout.write('%10d %s\n' % (l[1], l[0]))

	294 total += l[1]

	295 sys.stderr.write('total: %d\n' % (total))

	296

	297

	298 def get_symbols(symbol_path, mapping_lines):

	299 symbol_f = open(symbol_path, 'a+')

	300 symbol_lines = symbol_f.readlines()

	301

	302 if(len(symbol_lines) == 0):

	303 pprof_in = open("/tmp/maps", 'w+')

	304 pprof_out = open("/tmp/symbols", 'w+')

	305

	306 for l in mapping_lines:

	307 pprof_in.write(l)

	308

	309 addr_list = addr_symbol_dict.keys()

	310 addr_list.sort()

	311 for key in addr_list:

	312 pprof_in.write(key + "\n")

	313

	314 pprof_in.seek(0)

	315

	316 p = subprocess.Popen(

	317 'pprof --symbols %s' % (chrome_path),

	318 shell='/usr/bash', stdin=pprof_in, stdout=pprof_out)

	319 p.wait()

	320

	321 pprof_out.seek(0)

	322 symbols = pprof_out.readlines()

	323 i = 0

	324 for key in addr_list:

	325 addr_symbol_dict[key] = symbols[i].strip()

	326 i += 1

	327

	328 pprof_in.close()

	329 pprof_out.close()

	330

	331 for a in addr_symbol_dict.items():

	332 symbol_f.write(a[0] + ' ' + a[1] + '\n')

	333 else:

	334 for l in symbol_lines:

	335 addr_symbol_dict[l.split()[0]] = l.split()[1]

	336

	337 symbol_f.close()

	338

	339

	340 def parse_policy(policy_path):

	341 """ Parses policy file

	342

	343 A policy file contains component's names and their

	344 stacktrace pattern written in regular expression.

	345 Those patterns are matched against each symbols of

	346 each stacktraces in the order written in the policy file

	347

	348 Args:

	349 policy file path

	350 Returns:

	351 A list containing component's name and its regex object

	352 """

	353 policy_f = open(policy_path, mode='r')

	354 policy_lines = policy_f.readlines();

	355 policy = list()

	356 for l in policy_lines:

	357 name = l.split()[0]

	358 if name[0] == '#':

	359 continue

	360 pattern = l[len(name) : len(l)].strip()

	361 if pattern != 'default':

	362 policy.append([name, re.compile(pattern + r'\Z')])

	363 if components.count(name) == 0:

	364 components.append(name)

	365

	366 return policy

	367

	368 action = sys.argv[1]

	369

	370 if (action in ['--csv','--expand','--list','--stacktrace','--pprof']) == False:

	371 sys.stderr.write(

	372 """Usage:

	373 %s [options] <chrome-binary-path> <policy-file> <profile> [component-name] [dept h]

	374

	375 Options:

	376 --csv Output result in csv format

	377 --stacktrace Convert raw address to symbol names

	378 --list Lists components and their sizes

	379 --expand Show all stacktraces in the specified component

	380 of given depth with their sizes

	381 --pprof Format the profile file so it can be processed by pprof

	382

	383 Examples:

	384 dmprof --csv out/Debug/chrome ./policy o1211/heap.hprof.01221.0001.heap > render er.csv

	385 dmprof --list out/Debug/chrome ./policy o1211/heap.hprof.01221.0101.heap

	386 dmprof --expand out/Debug/chrome ./policy o1211/heap.hprof.01221.0101.heap tc-we bkit 4

	387 dmprof --pprof out/Debug/chrome ./policy o1211/heap.hprof.01221.0101.heap > for_ pprof

	388

	389

	390 """ % (sys.argv[0]))

	391 sys.exit(1)

	392

	393 chrome_path = sys.argv[2]

	394 policy_path = sys.argv[3]

	395 log_path = sys.argv[4]

	396

	397 sys.stderr.write('parsing a policy file\n')

	398 policy = parse_policy(policy_path)

	399

	400 p = re.compile('\.[0-9][0-9][0-9][0-9]\.heap')

	401 prefix = p.sub('',log_path)

	402 symbol_path = prefix + '.symbols'

	403

	404 sys.stderr.write('parsing the maps file\n')

	405 maps_path = prefix + '.maps'

	406 maps_f = open(maps_path, mode='r')

	407 maps_lines = maps_f.readlines()

	408

	409 # Reading buckets

	410 sys.stderr.write('parsing the bucket file\n')

	411 buckets = [None for i in range(0, 10000000)]

	412 bucket_count = 0

	413 #n = int(log_path[len(log_path) - 9 : len(log_path) - 5])

	414 n = 0

	415 while True:

	416 buckets_path = '%s.%04d.buckets'% (prefix, n)

	417 if os.path.exists(buckets_path) == False:

	418 if n > 10:

	419 break

	420 else:

	421 n+=1

	422 continue

	423 sys.stderr.write('reading buckets from %s\n' % (buckets_path))

	424 buckets_f = open(buckets_path, mode='r')

	425 for l in buckets_f.readlines():

	426 words = l.split()

	427 st = list()

	428 for i in range(1, len(words)):

	429 st.append(words[i])

	430 buckets[int(words[0])] = Bucket(st)

	431 bucket_count+=1

	432 buckets_f.close()

	433 n+=1

	434

	435 sys.stderr.write('the number buckets: %d\n' % (bucket_count))

	436

	437 log_path_list = list()

	438 log_path_list.append(log_path)

	439

	440 if action == '--csv':

	441 # search for the sequence of files

	442 n = int(log_path[len(log_path) - 9 : len(log_path) - 5])

	443 n += 1 # skip current file

	444 while True:

	445 p = '%s.%04d.heap'% (prefix, n)

	446 if os.path.exists(p):

	447 log_path_list.append(p)

	448 else:

	449 break

	450 n += 1

	451

	452 logs = list()

	453 for path in log_path_list:

	454 logs.append(Log(path, buckets))

	455

	456 sys.stderr.write('getting symbols\n')

	457 get_symbols(symbol_path, maps_lines)

	458

	459 if action == '--stacktrace':

	460 logs[0].dump_stacktrace(buckets)

	461

	462 elif action == '--csv':

	463 sys.stdout.write(','.join(components))

	464 sys.stdout.write('\n')

	465

	466 for log in logs:

	467 component_sizes = log.apply_policy(policy, buckets)

	468 s = list()

	469 for c in components:

	470 if c in ['hour', 'minute', 'second']:

	471 s.append('%05.5f' % (component_sizes[c]))

	472 else:

	473 s.append('%05.5f' % (component_sizes[c]/1024./1024.))

	474 sys.stdout.write(','.join(s))

	475 sys.stdout.write('\n')

	476

	477 elif action == '--list':

	478 component_sizes = logs[0].apply_policy(policy, buckets)

	479 for c in components:

	480 if c in ['hour', 'minute', 'second']:

	481 sys.stdout.write('%30s %10.3f\n' % (c, component_sizes[c]))

	482 else:

	483 sys.stdout.write('%30s %10.3f\n' % (c, component_sizes[c]/1024./1024.))

	484 elif action == '--expand':

	485 com_name = sys.argv[5]

	486 depth = sys.argv[6]

	487 logs[0].expand(policy, buckets, com_name, int(depth))

	488 elif action == '--pprof':

	489 if len(sys.argv) > 5:

	490 logs[0].dump_for_pprof(policy, buckets, maps_lines, sys.argv[5])

	491 else:

	492 logs[0].dump_for_pprof(policy, buckets, maps_lines, None)

OLD	NEW