Index: tools/deep_memory_profiler/dmprof |
diff --git a/tools/deep_memory_profiler/dmprof b/tools/deep_memory_profiler/dmprof |
index 759caa0deae0523d83763b62625d475a0df64f05..ea983f85411b6d6242d62105c45a63487a924be1 100755 |
--- a/tools/deep_memory_profiler/dmprof |
+++ b/tools/deep_memory_profiler/dmprof |
@@ -9,6 +9,7 @@ from datetime import datetime |
import json |
import os |
import re |
+from optparse import OptionParser |
M-A Ruel
2012/07/24 14:10:53
just import optparse
Dai Mikurube (NOT FULLTIME)
2012/07/24 16:19:24
Done.
|
import shutil |
import subprocess |
import sys |
@@ -30,6 +31,8 @@ ALLOC_COUNT = 2 |
FREE_COUNT = 3 |
NULL_REGEX = re.compile('') |
+POLICIES_JSON_PATH = os.path.join(os.path.dirname(__file__), 'policies.json') |
M-A Ruel
2012/07/24 14:10:53
no
Dai Mikurube (NOT FULLTIME)
2012/07/24 14:53:45
Sorry, what do you mean by this?
|
+ |
# Heap Profile Dump versions |
# DUMP_DEEP_1 is OBSOLETE. |
@@ -72,11 +75,6 @@ POLICY_DEEP_1 = 'POLICY_DEEP_1' |
# mmap regions are distincted w/ the allocation_type column. |
POLICY_DEEP_2 = 'POLICY_DEEP_2' |
-# TODO(dmikurube): Avoid global variables. |
-address_symbol_dict = {} |
-appeared_addresses = set() |
-components = [] |
- |
class EmptyDumpException(Exception): |
def __init__(self, value): |
@@ -106,7 +104,8 @@ class ObsoleteDumpVersionException(ParsingException): |
return "obsolete heap profile dump version: %s" % repr(self.value) |
-class Policy(object): |
+class Rule(object): |
+ """Represents one matching rule in a policy file.""" |
def __init__(self, name, mmap, pattern): |
self.name = name |
@@ -114,60 +113,74 @@ class Policy(object): |
self.condition = re.compile(pattern + r'\Z') |
-def get_component(policy_list, bucket): |
+class Policy(object): |
+ """Represents a policy, a content of a policy file.""" |
+ |
+ def __init__(self, rules, version, components): |
+ self.rules = rules |
+ self.version = version |
+ self.components = components |
+ |
+ def append_rule(self, rule): |
+ self.rules.append(rule) |
+ |
+ |
+def get_component(rule_list, bucket, symbols): |
"""Returns a component name which a given bucket belongs to. |
Args: |
- policy_list: A list containing Policy objects. (Parsed policy data by |
- parse_policy.) |
+ rule_list: A list of Rule objects. |
bucket: A Bucket object to be searched for. |
+ symbols: A dict mapping runtime addresses to symbol names. |
Returns: |
A string representing a component name. |
""" |
if not bucket: |
return 'no-bucket' |
- if bucket.component: |
- return bucket.component |
+ if bucket.component_cache: |
+ return bucket.component_cache |
- stacktrace = ''.join( |
- address_symbol_dict[a] + ' ' for a in bucket.stacktrace).strip() |
+ stacktrace = ''.join(symbols[a] + ' ' for a in bucket.stacktrace).strip() |
- for policy in policy_list: |
- if bucket.mmap == policy.mmap and policy.condition.match(stacktrace): |
- bucket.component = policy.name |
- return policy.name |
+ for rule in rule_list: |
+ if bucket.mmap == rule.mmap and rule.condition.match(stacktrace): |
+ bucket.component_cache = rule.name |
+ return rule.name |
assert False |
class Bucket(object): |
+ """Represents a bucket, which is a unit of memory classification.""" |
def __init__(self, stacktrace, mmap): |
self.stacktrace = stacktrace |
self.mmap = mmap |
- self.component = '' |
+ self.component_cache = '' |
+ |
+ def clear_component_cache(self): |
+ self.component_cache = '' |
-class Log(object): |
+class Dump(object): |
+ """Represents one heap profile dump.""" |
- """A class representing one dumped log data.""" |
- def __init__(self, log_path): |
- self.log_path = log_path |
- self.log_lines = [ |
- l for l in open(self.log_path, 'r') if l and not l.startswith('#')] |
- self.log_version = '' |
- sys.stderr.write('Loading a dump: %s\n' % log_path) |
+ def __init__(self, dump_path): |
+ self.dump_path = dump_path |
+ self.dump_lines = [ |
+ l for l in open(self.dump_path, 'r') if l and not l.startswith('#')] |
+ self.dump_version = '' |
self.stacktrace_lines = [] |
self.counters = {} |
- self.log_time = os.stat(self.log_path).st_mtime |
+ self.dump_time = os.stat(self.dump_path).st_mtime |
- def dump_stacktrace(buckets): |
+ def print_stacktrace(self, buckets, symbols): |
"""Prints a given stacktrace. |
Args: |
- buckets: A dict mapping bucket ids and their corresponding Bucket |
- objects. |
+ buckets: A dict mapping bucket ids to Bucket objects. |
+ symbols: A dict mapping runtime addresses to symbol names. |
""" |
for line in self.stacktrace_lines: |
words = line.split() |
@@ -177,21 +190,20 @@ class Log(object): |
for i in range(0, BUCKET_ID - 1): |
sys.stdout.write(words[i] + ' ') |
for address in bucket.stacktrace: |
- sys.stdout.write((address_symbol_dict.get(address) or address) + ' ') |
+ sys.stdout.write((symbols.get(address) or address) + ' ') |
sys.stdout.write('\n') |
@staticmethod |
- def accumulate_size_for_pprof(stacktrace_lines, policy_list, buckets, |
- component_name): |
+ def accumulate_size_for_pprof(stacktrace_lines, rule_list, buckets, |
+ component_name, symbols): |
"""Accumulates size of committed chunks and the number of allocated chunks. |
Args: |
stacktrace_lines: A list of strings which are valid as stacktraces. |
- policy_list: A list containing Policy objects. (Parsed policy data by |
- parse_policy.) |
- buckets: A dict mapping bucket ids and their corresponding Bucket |
- objects. |
+ rule_list: A list of Rule objects. |
+ buckets: A dict mapping bucket ids to Bucket objects. |
component_name: A name of component for filtering. |
+ symbols: A dict mapping runtime addresses to symbol names. |
Returns: |
Two integers which are the accumulated size of committed regions and the |
@@ -204,7 +216,7 @@ class Log(object): |
bucket = buckets.get(int(words[BUCKET_ID])) |
if (not bucket or |
(component_name and |
- component_name != get_component(policy_list, bucket))): |
+ component_name != get_component(rule_list, bucket, symbols))): |
continue |
com_committed += int(words[COMMITTED]) |
@@ -213,24 +225,23 @@ class Log(object): |
return com_committed, com_allocs |
@staticmethod |
- def dump_stacktrace_lines_for_pprof(stacktrace_lines, policy_list, |
- buckets, component_name): |
+ def print_stacktrace_lines_for_pprof(stacktrace_lines, rule_list, |
+ buckets, component_name, symbols): |
"""Prints information of stacktrace lines for pprof. |
Args: |
stacktrace_lines: A list of strings which are valid as stacktraces. |
- policy_list: A list containing Policy objects. (Parsed policy data by |
- parse_policy.) |
- buckets: A dict mapping bucket ids and their corresponding Bucket |
- objects. |
+ rule_list: A list of Rule objects. |
+ buckets: A dict mapping bucket ids to Bucket objects. |
component_name: A name of component for filtering. |
+ symbols: A dict mapping runtime addresses to symbol names. |
""" |
for line in stacktrace_lines: |
words = line.split() |
bucket = buckets.get(int(words[BUCKET_ID])) |
if (not bucket or |
(component_name and |
- component_name != get_component(policy_list, bucket))): |
+ component_name != get_component(rule_list, bucket, symbols))): |
continue |
sys.stdout.write('%6d: %8s [%6d: %8s] @' % ( |
@@ -242,39 +253,39 @@ class Log(object): |
sys.stdout.write(' ' + address) |
sys.stdout.write('\n') |
- def dump_for_pprof(self, policy_list, buckets, mapping_lines, component_name): |
- """Converts the log file so it can be processed by pprof. |
+ def print_for_pprof( |
+ self, rule_list, buckets, maps_lines, component_name, symbols): |
+ """Converts the heap profile dump so it can be processed by pprof. |
Args: |
- policy_list: A list containing Policy objects. (Parsed policy data by |
- parse_policy.) |
- buckets: A dict mapping bucket ids and their corresponding Bucket |
- objects. |
- mapping_lines: A list of strings containing /proc/.../maps. |
+ rule_list: A list of Rule objects. |
+ buckets: A dict mapping bucket ids to Bucket objects. |
+ maps_lines: A list of strings containing /proc/.../maps. |
component_name: A name of component for filtering. |
+ symbols: A dict mapping runtime addresses to symbol names. |
""" |
sys.stdout.write('heap profile: ') |
com_committed, com_allocs = self.accumulate_size_for_pprof( |
- self.stacktrace_lines, policy_list, buckets, component_name) |
+ self.stacktrace_lines, rule_list, buckets, component_name, symbols) |
sys.stdout.write('%6d: %8s [%6d: %8s] @ heapprofile\n' % ( |
com_allocs, com_committed, com_allocs, com_committed)) |
- self.dump_stacktrace_lines_for_pprof( |
- self.stacktrace_lines, policy_list, buckets, component_name) |
+ self.print_stacktrace_lines_for_pprof( |
+ self.stacktrace_lines, rule_list, buckets, component_name, symbols) |
sys.stdout.write('MAPPED_LIBRARIES:\n') |
- for line in mapping_lines: |
+ for line in maps_lines: |
sys.stdout.write(line) |
@staticmethod |
- def check_stacktrace_line(stacktrace_line, buckets): |
+ def check_stacktrace_line(stacktrace_line, buckets, appeared_addresses): |
"""Checks if a given stacktrace_line is valid as stacktrace. |
Args: |
stacktrace_line: A string to be checked. |
- buckets: A dict mapping bucket ids and their corresponding Bucket |
- objects. |
+ buckets: A dict mapping bucket ids to Bucket objects. |
+ appeared_addresses: A list where appeared addresses will be stored. |
Returns: |
True if the given stacktrace_line is valid. |
@@ -305,61 +316,59 @@ class Log(object): |
return line_number, False |
return line_number, True |
- def parse_stacktraces_while_valid(self, buckets, log_lines, line_number): |
+ def parse_stacktraces_while_valid( |
+ self, buckets, dump_lines, line_number, appeared_addresses): |
"""Parses stacktrace lines while the lines are valid. |
Args: |
- buckets: A dict mapping bucket ids and their corresponding Bucket |
- objects. |
- log_lines: A list of lines to be parsed. |
- line_number: An integer representing the starting line number in |
- log_lines. |
+ buckets: A dict mapping bucket ids to Bucket objects. |
+ dump_lines: A list of lines to be parsed. |
+ line_number: A line number to start parsing in dump_lines. |
+ appeared_addresses: A list where appeared addresses will be stored. |
Returns: |
A pair of a list of valid lines and an integer representing the last |
- line number in log_lines. |
+ line number in dump_lines. |
""" |
(line_number, _) = self.skip_lines_while( |
- line_number, len(log_lines), |
- lambda n: not log_lines[n].split()[0].isdigit()) |
+ line_number, len(dump_lines), |
+ lambda n: not dump_lines[n].split()[0].isdigit()) |
stacktrace_lines_start = line_number |
(line_number, _) = self.skip_lines_while( |
- line_number, len(log_lines), |
- lambda n: self.check_stacktrace_line(log_lines[n], buckets)) |
- return (log_lines[stacktrace_lines_start:line_number], line_number) |
+ line_number, len(dump_lines), |
+ lambda n: self.check_stacktrace_line( |
+ dump_lines[n], buckets, appeared_addresses)) |
+ return (dump_lines[stacktrace_lines_start:line_number], line_number) |
- def parse_stacktraces(self, buckets, line_number): |
- """Parses lines in self.log_lines as stacktrace. |
+ def parse_stacktraces(self, buckets, line_number, appeared_addresses): |
+ """Parses lines in self.dump_lines as stacktrace. |
Valid stacktrace lines are stored into self.stacktrace_lines. |
Args: |
- buckets: A dict mapping bucket ids and their corresponding Bucket |
- objects. |
- line_number: An integer representing the starting line number in |
- log_lines. |
+ buckets: A dict mapping bucket ids to Bucket objects. |
+ line_number: A line number to start parsing in dump_lines. |
+ appeared_addresses: A list where appeared addresses will be stored. |
Raises: |
ParsingException for invalid dump versions. |
""" |
- sys.stderr.write(' Version: %s\n' % self.log_version) |
- |
- if self.log_version == DUMP_DEEP_5: |
+ if self.dump_version == DUMP_DEEP_5: |
(self.stacktrace_lines, line_number) = ( |
self.parse_stacktraces_while_valid( |
- buckets, self.log_lines, line_number)) |
+ buckets, self.dump_lines, line_number, appeared_addresses)) |
- elif self.log_version in DUMP_DEEP_OBSOLETE: |
- raise ObsoleteDumpVersionException(self.log_version) |
+ elif self.dump_version in DUMP_DEEP_OBSOLETE: |
+ raise ObsoleteDumpVersionException(self.dump_version) |
else: |
- raise InvalidDumpException('Invalid version: %s' % self.log_version) |
+ raise InvalidDumpException('Invalid version: %s' % self.dump_version) |
def parse_global_stats(self): |
- """Parses lines in self.log_lines as global stats.""" |
+ """Parses lines in self.dump_lines as global stats.""" |
(ln, _) = self.skip_lines_while( |
- 0, len(self.log_lines), |
- lambda n: self.log_lines[n] != 'GLOBAL_STATS:\n') |
+ 0, len(self.dump_lines), |
+ lambda n: self.dump_lines[n] != 'GLOBAL_STATS:\n') |
global_stat_names = [ |
'total', 'file-exec', 'file-nonexec', 'anonymous', 'stack', 'other', |
@@ -370,14 +379,14 @@ class Log(object): |
for prefix in global_stat_names: |
(ln, _) = self.skip_lines_while( |
- ln, len(self.log_lines), |
- lambda n: self.log_lines[n].split()[0] != prefix) |
- words = self.log_lines[ln].split() |
+ ln, len(self.dump_lines), |
+ lambda n: self.dump_lines[n].split()[0] != prefix) |
+ words = self.dump_lines[ln].split() |
self.counters[prefix + '_virtual'] = int(words[-2]) |
self.counters[prefix + '_committed'] = int(words[-1]) |
def parse_version(self): |
- """Parses a version string in self.log_lines. |
+ """Parses a version string in self.dump_lines. |
Returns: |
A pair of (a string representing a version of the stacktrace dump, |
@@ -390,44 +399,45 @@ class Log(object): |
# Skip until an identifiable line. |
headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ') |
- if not self.log_lines: |
+ if not self.dump_lines: |
raise EmptyDumpException('Empty heap dump file.') |
(ln, found) = self.skip_lines_while( |
- 0, len(self.log_lines), |
- lambda n: not self.log_lines[n].startswith(headers)) |
+ 0, len(self.dump_lines), |
+ lambda n: not self.dump_lines[n].startswith(headers)) |
if not found: |
raise InvalidDumpException('No version header.') |
# Identify a version. |
- if self.log_lines[ln].startswith('heap profile: '): |
- version = self.log_lines[ln][13:].strip() |
+ if self.dump_lines[ln].startswith('heap profile: '): |
+ version = self.dump_lines[ln][13:].strip() |
if version == DUMP_DEEP_5: |
(ln, _) = self.skip_lines_while( |
- ln, len(self.log_lines), |
- lambda n: self.log_lines[n] != 'STACKTRACES:\n') |
+ ln, len(self.dump_lines), |
+ lambda n: self.dump_lines[n] != 'STACKTRACES:\n') |
elif version in DUMP_DEEP_OBSOLETE: |
raise ObsoleteDumpVersionException(version) |
else: |
raise InvalidDumpException('Invalid version: %s' % version) |
- elif self.log_lines[ln] == 'STACKTRACES:\n': |
+ elif self.dump_lines[ln] == 'STACKTRACES:\n': |
raise ObsoleteDumpVersionException(DUMP_DEEP_1) |
- elif self.log_lines[ln] == 'MMAP_STACKTRACES:\n': |
+ elif self.dump_lines[ln] == 'MMAP_STACKTRACES:\n': |
raise ObsoleteDumpVersionException(DUMP_DEEP_2) |
return (version, ln) |
- def parse_log(self, buckets): |
- self.log_version, ln = self.parse_version() |
+ def parse_dump(self, buckets, appeared_addresses): |
+ self.dump_version, ln = self.parse_version() |
self.parse_global_stats() |
- self.parse_stacktraces(buckets, ln) |
+ self.parse_stacktraces(buckets, ln, appeared_addresses) |
@staticmethod |
def accumulate_size_for_policy(stacktrace_lines, |
- policy_list, buckets, sizes): |
+ rule_list, buckets, sizes, symbols): |
for line in stacktrace_lines: |
words = line.split() |
bucket = buckets.get(int(words[BUCKET_ID])) |
- component_match = get_component(policy_list, bucket) |
+ component_match = get_component(rule_list, bucket, symbols) |
+ |
M-A Ruel
2012/07/24 14:10:53
This new line is gratuitous. Intended?
Dai Mikurube (NOT FULLTIME)
2012/07/24 16:19:24
It was unintended. Thanks.
|
sizes[component_match] += int(words[COMMITTED]) |
if component_match.startswith('tc-'): |
@@ -437,29 +447,30 @@ class Log(object): |
else: |
sizes['other-total-log'] += int(words[COMMITTED]) |
- def apply_policy(self, policy_list, buckets, first_log_time): |
+ def apply_policy( |
+ self, rule_list, buckets, first_dump_time, components, symbols): |
"""Aggregates the total memory size of each component. |
Iterate through all stacktraces and attribute them to one of the components |
based on the policy. It is important to apply policy in right order. |
Args: |
- policy_list: A list containing Policy objects. (Parsed policy data by |
- parse_policy.) |
- buckets: A dict mapping bucket ids and their corresponding Bucket |
- objects. |
- first_log_time: An integer representing time when the first log is |
+ rule_list: A list of Rule objects. |
+ buckets: A dict mapping bucket ids to Bucket objects. |
+ first_dump_time: An integer representing time when the first dump is |
dumped. |
+ components: A list of strings of component names. |
+ symbols: A dict mapping runtime addresses to symbol names. |
Returns: |
A dict mapping components and their corresponding sizes. |
""" |
- sys.stderr.write('apply policy:%s\n' % (self.log_path)) |
+ sys.stderr.write('Applying policy: "%s".\n' % self.dump_path) |
sizes = dict((c, 0) for c in components) |
self.accumulate_size_for_policy(self.stacktrace_lines, |
- policy_list, buckets, sizes) |
+ rule_list, buckets, sizes, symbols) |
mmap_prefix = 'profiled-mmap' |
malloc_prefix = 'profiled-malloc' |
@@ -513,46 +524,45 @@ class Log(object): |
sizes['total-exclude-profiler'] = ( |
self.counters['total_committed'] - sizes['mmap-profiler']) |
if 'hour' in sizes: |
- sizes['hour'] = (self.log_time - first_log_time) / 60.0 / 60.0 |
+ sizes['hour'] = (self.dump_time - first_dump_time) / 60.0 / 60.0 |
if 'minute' in sizes: |
- sizes['minute'] = (self.log_time - first_log_time) / 60.0 |
+ sizes['minute'] = (self.dump_time - first_dump_time) / 60.0 |
if 'second' in sizes: |
- sizes['second'] = self.log_time - first_log_time |
+ sizes['second'] = self.dump_time - first_dump_time |
return sizes |
@staticmethod |
- def accumulate_size_for_expand(stacktrace_lines, policy_list, buckets, |
- component_name, depth, sizes): |
+ def accumulate_size_for_expand(stacktrace_lines, rule_list, buckets, |
+ component_name, depth, sizes, symbols): |
for line in stacktrace_lines: |
words = line.split() |
bucket = buckets.get(int(words[BUCKET_ID])) |
- component_match = get_component(policy_list, bucket) |
+ component_match = get_component(rule_list, bucket, symbols) |
if component_match == component_name: |
stacktrace_sequence = '' |
for address in bucket.stacktrace[0 : min(len(bucket.stacktrace), |
1 + depth)]: |
- stacktrace_sequence += address_symbol_dict[address] + ' ' |
+ stacktrace_sequence += symbols[address] + ' ' |
if not stacktrace_sequence in sizes: |
sizes[stacktrace_sequence] = 0 |
sizes[stacktrace_sequence] += int(words[COMMITTED]) |
- def expand(self, policy_list, buckets, component_name, depth): |
+ def expand(self, rule_list, buckets, component_name, depth, symbols): |
"""Prints all stacktraces in a given component of given depth. |
Args: |
- policy_list: A list containing Policy objects. (Parsed policy data by |
- parse_policy.) |
- buckets: A dict mapping bucket ids and their corresponding Bucket |
- objects. |
+ rule_list: A list of Rule objects. |
+ buckets: A dict mapping bucket ids to Bucket objects. |
component_name: A name of component for filtering. |
depth: An integer representing depth to be printed. |
+ symbols: A dict mapping runtime addresses to symbol names. |
""" |
sizes = {} |
self.accumulate_size_for_expand( |
- self.stacktrace_lines, policy_list, buckets, component_name, |
- depth, sizes) |
+ self.stacktrace_lines, rule_list, buckets, component_name, |
+ depth, sizes, symbols) |
sorted_sizes_list = sorted( |
sizes.iteritems(), key=(lambda x: x[1]), reverse=True) |
@@ -563,7 +573,8 @@ class Log(object): |
sys.stderr.write('total: %d\n' % (total)) |
-def update_symbols(symbol_path, mapping_lines, maps_path): |
+def update_symbols( |
+ symbol_path, maps_path, appeared_addresses, symbols): |
"""Updates address/symbol mapping on memory and in a .symbol cache file. |
It reads cached address/symbol mapping from a .symbol file if it exists. |
@@ -578,29 +589,42 @@ def update_symbols(symbol_path, mapping_lines, maps_path): |
Args: |
symbol_path: A string representing a path for a .symbol file. |
- mapping_lines: A list of strings containing /proc/.../maps. |
maps_path: A string of the path of /proc/.../maps. |
+ appeared_addresses: A list of known addresses. |
+ symbols: A dict mapping runtime addresses to symbol names. |
""" |
with open(symbol_path, mode='a+') as symbol_f: |
symbol_lines = symbol_f.readlines() |
if symbol_lines: |
for line in symbol_lines: |
items = line.split(None, 1) |
- address_symbol_dict[items[0]] = items[1].rstrip() |
+ if len(items) == 1: |
+ items.append('??') |
+ symbols[items[0]] = items[1].rstrip() |
+ if symbols: |
+ sys.stderr.write(' Found %d symbols in cache.\n' % len(symbols)) |
+ else: |
+ sys.stderr.write(' No symbols found in cache.\n') |
unresolved_addresses = sorted( |
- a for a in appeared_addresses if a not in address_symbol_dict) |
+ a for a in appeared_addresses if a not in symbols) |
- if unresolved_addresses: |
+ if not unresolved_addresses: |
+ sys.stderr.write(' No need to resolve any more addresses.\n') |
+ else: |
+ sys.stderr.write(' %d addresses are unresolved.\n' % |
+ len(unresolved_addresses)) |
prepared_data_dir = tempfile.mkdtemp() |
prepare_symbol_info(maps_path, prepared_data_dir) |
- symbols = find_runtime_symbols_list( |
+ symbol_list = find_runtime_symbols_list( |
prepared_data_dir, unresolved_addresses) |
- for address, symbol in zip(unresolved_addresses, symbols): |
+ for address, symbol in zip(unresolved_addresses, symbol_list): |
+ if not symbol: |
+ symbol = '??' |
stripped_symbol = symbol.strip() |
- address_symbol_dict[address] = stripped_symbol |
+ symbols[address] = stripped_symbol |
symbol_f.write('%s %s\n' % (address, stripped_symbol)) |
shutil.rmtree(prepared_data_dir) |
@@ -626,10 +650,10 @@ def parse_policy(policy_path): |
if policy_lines[0].startswith('heap profile policy: '): |
policy_version = policy_lines[0][21:].strip() |
policy_lines.pop(0) |
- policy_list = [] |
+ rule_list = [] |
+ components = [] |
if policy_version == POLICY_DEEP_2 or policy_version == POLICY_DEEP_1: |
- sys.stderr.write(' heap profile policy version: %s\n' % policy_version) |
for line in policy_lines: |
if line[0] == '#': |
continue |
@@ -645,7 +669,7 @@ def parse_policy(policy_path): |
mmap = False |
if pattern != 'default': |
- policy_list.append(Policy(name, mmap, pattern)) |
+ rule_list.append(Rule(name, mmap, pattern)) |
if components.count(name) == 0: |
components.append(name) |
@@ -653,57 +677,16 @@ def parse_policy(policy_path): |
sys.stderr.write(' invalid heap profile policy version: %s\n' % ( |
policy_version)) |
- return policy_list |
+ return rule_list, policy_version, components |
-def main(): |
- if (len(sys.argv) < 4) or (not (sys.argv[1] in ['--csv', |
- '--json', |
- '--expand', |
- '--list', |
- '--stacktrace', |
- '--pprof'])): |
- sys.stderr.write("""Usage: |
-%s [options] <chrome-binary> <policy> <profile> [component-name] [depth] |
- |
-Options: |
- --csv Output result in csv format |
- --json Output result in json format |
- --stacktrace Convert raw address to symbol names |
- --list Lists components and their sizes |
- --expand Show all stacktraces in the specified component |
- of given depth with their sizes |
- --pprof Format the profile file so it can be processed |
- by pprof |
- |
-Examples: |
- dmprof --csv Debug/chrome dmpolicy hprof.12345.0001.heap > result.csv |
- dmprof --json Debug/chrome dmpolicy hprof.12345.0001.heap > result.json |
- dmprof --list Debug/chrome dmpolicy hprof.12345.0012.heap |
- dmprof --expand Debug/chrome dmpolicy hprof.12345.0012.heap tc-webkit 4 |
- dmprof --pprof Debug/chrome dmpolicy hprof.12345.0012.heap > for_pprof.txt |
-""" % (sys.argv[0])) |
- sys.exit(1) |
- |
- action = sys.argv[1] |
- chrome_path = sys.argv[2] |
- policy_path = sys.argv[3] |
- log_path = sys.argv[4] |
- |
- sys.stderr.write('parsing a policy file\n') |
- policy_list = parse_policy(policy_path) |
- |
- p = re.compile('\.[0-9][0-9][0-9][0-9]\.heap') |
- prefix = p.sub('', log_path) |
- symbol_path = prefix + '.symbols' |
+def find_prefix(path): |
+ return re.sub('\.[0-9][0-9][0-9][0-9]\.heap', '', path) |
- sys.stderr.write('parsing the maps file\n') |
- maps_path = prefix + '.maps' |
- with open(maps_path, 'r') as maps_f: |
- maps_lines = maps_f.readlines() |
+def load_buckets(prefix): |
# Reading buckets |
- sys.stderr.write('parsing the bucket file\n') |
+ sys.stderr.write('Loading bucket files.\n') |
buckets = {} |
bucket_count = 0 |
n = 0 |
@@ -714,80 +697,255 @@ Examples: |
break |
n += 1 |
continue |
- sys.stderr.write('reading buckets from %s\n' % (buckets_path)) |
+ sys.stderr.write(' %s\n' % buckets_path) |
with open(buckets_path, 'r') as buckets_f: |
for line in buckets_f: |
words = line.split() |
buckets[int(words[0])] = Bucket(words[2:], words[1] == 'mmap') |
n += 1 |
- log_path_list = [log_path] |
+ return buckets |
- if action in ('--csv', '--json'): |
- # search for the sequence of files |
- n = int(log_path[len(log_path) - 9 : len(log_path) - 5]) |
- n += 1 # skip current file |
- while True: |
- p = '%s.%04d.heap' % (prefix, n) |
- if os.path.exists(p): |
- log_path_list.append(p) |
- else: |
- break |
- n += 1 |
- logs = [] |
- for path in log_path_list: |
- new_log = Log(path) |
- sys.stderr.write('Parsing a dump: %s\n' % path) |
- try: |
- new_log.parse_log(buckets) |
- except EmptyDumpException: |
- sys.stderr.write(' WARNING: ignored an empty dump: %s\n' % path) |
- except ParsingException, e: |
- sys.stderr.write(' Error in parsing heap profile dump: %s\n' % e) |
- sys.exit(1) |
+def determine_dump_path_list(dump_path, prefix): |
+ dump_path_list = [dump_path] |
+ |
+ # search for the sequence of files |
+ n = int(dump_path[len(dump_path) - 9 : len(dump_path) - 5]) |
+ n += 1 # skip current file |
+ while True: |
+ p = '%s.%04d.heap' % (prefix, n) |
+ if os.path.exists(p): |
+ dump_path_list.append(p) |
else: |
- logs.append(new_log) |
+ break |
+ n += 1 |
+ |
+ return dump_path_list |
+ |
+ |
+def load_single_dump(dump_path, buckets, appeared_addresses): |
+ new_dump = Dump(dump_path) |
+ try: |
+ new_dump.parse_dump(buckets, appeared_addresses) |
+ except EmptyDumpException: |
+ sys.stderr.write('... ignored an empty dump') |
+ except ParsingException, e: |
+ sys.stderr.write('... error in parsing: %s' % e) |
+ sys.exit(1) |
+ else: |
+ sys.stderr.write(' (version: %s)' % new_dump.dump_version) |
+ |
+ return new_dump |
+ |
+ |
+def load_dump(dump_path, buckets): |
+ sys.stderr.write('Loading a heap dump file: "%s"' % dump_path) |
+ appeared_addresses = set() |
+ dump = load_single_dump(dump_path, buckets, appeared_addresses) |
+ sys.stderr.write('.\n') |
+ return dump, appeared_addresses |
+ |
+ |
+def load_dumps(dump_path_list, buckets): |
+ sys.stderr.write('Loading heap dump files.\n') |
+ appeared_addresses = set() |
+ dumps = [] |
+ for path in dump_path_list: |
+ sys.stderr.write(' %s' % path) |
+ dumps.append(load_single_dump(path, buckets, appeared_addresses)) |
+ sys.stderr.write('\n') |
+ return dumps, appeared_addresses |
+ |
+ |
+def load_and_update_symbol_cache(prefix, appeared_addresses): |
+ maps_path = prefix + '.maps' |
+ symbol_path = prefix + '.symbols' |
+ sys.stderr.write('Loading and updating symbol cache: "%s".\n' % symbol_path) |
+ symbols = {} |
+ update_symbols(symbol_path, maps_path, appeared_addresses, symbols) |
+ return symbols |
+ |
+ |
+def load_default_policies(): |
+ with open(POLICIES_JSON_PATH, mode='r') as policies_f: |
+ default_policies = json.load(policies_f) |
+ return default_policies |
+ |
+ |
+def load_policy(policies_dict, policy_label): |
+ policy_file = policies_dict[policy_label]['file'] |
+ policy_path = os.path.join(os.path.dirname(__file__), policy_file) |
+ rule_list, policy_version, components = parse_policy(policy_path) |
+ sys.stderr.write(' %s: %s (version: %s)\n' % |
+ (policy_label, policy_path, policy_version)) |
+ return Policy(rule_list, policy_version, components) |
+ |
+ |
+def load_policies_dict(policies_dict): |
+ sys.stderr.write('Loading policy files.\n') |
+ policies = {} |
+ for policy_label in policies_dict: |
+ policies[policy_label] = load_policy(policies_dict, policy_label) |
+ return policies |
+ |
+ |
+def load_policies(options_policy): |
+ default_policies = load_default_policies() |
+ if options_policy: |
+ policy_labels = options_policy.split(',') |
+ specified_policies = {} |
+ for specified_policy in policy_labels: |
+ if specified_policy in default_policies: |
+ specified_policies[specified_policy] = ( |
+ default_policies[specified_policy]) |
+ policies = load_policies_dict(specified_policies) |
+ else: |
+ policies = load_policies_dict(default_policies) |
+ return policies |
+ |
+ |
+def do_stacktrace(sys_argv): |
+ parser = OptionParser(usage='Usage: %prog stacktrace <dump>') |
+ options, args = parser.parse_args(sys_argv) |
+ |
+ if len(args) < 2: |
+ parser.error('needs 1 argument.') |
+ |
+ dump_path = args[1] |
+ |
+ prefix = find_prefix(dump_path) |
+ buckets = load_buckets(prefix) |
+ dump, appeared_addresses = load_dump(dump_path, buckets) |
+ symbols = load_and_update_symbol_cache(prefix, appeared_addresses) |
+ |
+ dump.print_stacktrace(buckets, symbols) |
+ |
+ return 0 |
- sys.stderr.write('getting symbols\n') |
- update_symbols(symbol_path, maps_lines, maps_path) |
- # TODO(dmikurube): Many modes now. Split them into separete functions. |
- if action == '--stacktrace': |
- logs[0].dump_stacktrace(buckets) |
+def do_csv(sys_argv): |
+ parser = OptionParser('Usage: %prog csv [-p POLICY] <first-dump>') |
+ parser.add_option('-p', '--policy', type='string', dest='policy', |
+ help='profile with POLICY', metavar='POLICY') |
+ options, args = parser.parse_args(sys_argv) |
- elif action == '--csv': |
- sys.stdout.write(','.join(components)) |
- sys.stdout.write('\n') |
+ if len(args) < 2: |
M-A Ruel
2012/07/24 14:10:53
what with 10 args?
Dai Mikurube (NOT FULLTIME)
2012/07/24 14:53:45
It just ignores extra args. Should it warn or abo
M-A Ruel
2012/07/24 14:57:59
Please abort. Unless it is necessary to ignore dur
Dai Mikurube (NOT FULLTIME)
2012/07/24 16:19:24
Done.
|
+ parser.error('needs 1 argument.') |
- for log in logs: |
- component_sizes = log.apply_policy(policy_list, buckets, logs[0].log_time) |
+ dump_path = args[1] |
+ |
+ prefix = find_prefix(dump_path) |
+ buckets = load_buckets(prefix) |
+ dumps, appeared_addresses = load_dumps( |
+ determine_dump_path_list(dump_path, prefix), buckets) |
+ symbols = load_and_update_symbol_cache(prefix, appeared_addresses) |
+ policies = load_policies(options.policy) |
+ |
+ max_components = 0 |
+ for policy in policies: |
+ max_components = max(max_components, len(policies[policy].components)) |
+ |
+ for policy in sorted(policies): |
+ rule_list = policies[policy].rules |
+ components = policies[policy].components |
+ |
+ if len(policies) > 1: |
+ sys.stdout.write('%s%s\n' % (policy, ',' * (max_components - 1))) |
+ sys.stdout.write('%s%s\n' % ( |
+ ','.join(components), ',' * (max_components - len(components)))) |
+ |
+ for dump in dumps: |
+ component_sizes = dump.apply_policy( |
+ rule_list, buckets, dumps[0].dump_time, components, symbols) |
s = [] |
for c in components: |
if c in ('hour', 'minute', 'second'): |
s.append('%05.5f' % (component_sizes[c])) |
else: |
s.append('%05.5f' % (component_sizes[c] / 1024.0 / 1024.0)) |
- sys.stdout.write(','.join(s)) |
- sys.stdout.write('\n') |
+ sys.stdout.write('%s%s\n' % ( |
+ ','.join(s), ',' * (max_components - len(components)))) |
+ |
+ for bucket in buckets.itervalues(): |
+ bucket.clear_component_cache() |
+ |
+ return 0 |
+ |
+ |
+def do_json(sys_argv): |
+ parser = OptionParser('Usage: %prog json [-p POLICY] <first-dump>') |
+ parser.add_option('-p', '--policy', type='string', dest='policy', |
+ help='profile with POLICY', metavar='POLICY') |
+ options, args = parser.parse_args(sys_argv) |
+ |
+ if len(args) < 2: |
+ parser.error('needs 1 argument.') |
+ |
+ dump_path = args[1] |
+ |
+ prefix = find_prefix(dump_path) |
+ buckets = load_buckets(prefix) |
+ dumps, appeared_addresses = load_dumps( |
+ determine_dump_path_list(dump_path, prefix), buckets) |
+ symbols = load_and_update_symbol_cache(prefix, appeared_addresses) |
+ policies = load_policies(options.policy) |
- elif action == '--json': |
- json_base = { |
- 'version': 'JSON_DEEP_1', |
+ json_base = { |
+ 'version': 'JSON_DEEP_2', |
+ 'policies': {}, |
+ } |
+ |
+ for policy in sorted(policies): |
+ rule_list = policies[policy].rules |
+ components = policies[policy].components |
+ |
+ json_base['policies'][policy] = { |
'legends': components, |
'snapshots': [], |
} |
- for log in logs: |
- component_sizes = log.apply_policy(policy_list, buckets, logs[0].log_time) |
- component_sizes['log_path'] = log.log_path |
- component_sizes['log_time'] = datetime.fromtimestamp( |
- log.log_time).strftime('%Y-%m-%d %H:%M:%S') |
- json_base['snapshots'].append(component_sizes) |
- json.dump(json_base, sys.stdout, indent=2, sort_keys=True) |
- |
- elif action == '--list': |
- component_sizes = logs[0].apply_policy( |
- policy_list, buckets, logs[0].log_time) |
+ |
+ for dump in dumps: |
+ component_sizes = dump.apply_policy( |
+ rule_list, buckets, dumps[0].dump_time, components, symbols) |
+ component_sizes['dump_path'] = dump.dump_path |
+ component_sizes['dump_time'] = datetime.fromtimestamp( |
+ dump.dump_time).strftime('%Y-%m-%d %H:%M:%S') |
+ json_base['policies'][policy]['snapshots'].append(component_sizes) |
+ |
+ for bucket in buckets.itervalues(): |
+ bucket.clear_component_cache() |
+ |
+ json.dump(json_base, sys.stdout, indent=2, sort_keys=True) |
+ |
+ return 0 |
+ |
+ |
+def do_list(sys_argv): |
+ parser = OptionParser('Usage: %prog [-p POLICY] list <first-dump>') |
+ parser.add_option('-p', '--policy', type='string', dest='policy', |
+ help='profile with POLICY', metavar='POLICY') |
+ options, args = parser.parse_args(sys_argv) |
+ |
+ if len(args) < 2: |
+ parser.error('needs 1 argument.') |
+ |
+ dump_path = args[1] |
+ |
+ prefix = find_prefix(dump_path) |
+ buckets = load_buckets(prefix) |
+ dumps, appeared_addresses = load_dumps( |
+ determine_dump_path_list(dump_path, prefix), buckets) |
+ symbols = load_and_update_symbol_cache(prefix, appeared_addresses) |
+ policies = load_policies(options.policy) |
+ |
+ for policy in sorted(policies): |
+ rule_list = policies[policy].rules |
+ components = policies[policy].components |
+ |
+ component_sizes = dumps[0].apply_policy( |
+ rule_list, buckets, dumps[0].dump_time, components, symbols) |
+ sys.stdout.write('%s:\n' % policy) |
for c in components: |
if c in ['hour', 'minute', 'second']: |
sys.stdout.write('%30s %10.3f\n' % (c, component_sizes[c])) |
@@ -795,16 +953,110 @@ Examples: |
sys.stdout.write('%30s %10.3f\n' % ( |
c, component_sizes[c] / 1024.0 / 1024.0)) |
- elif action == '--expand': |
- component_name = sys.argv[5] |
- depth = sys.argv[6] |
- logs[0].expand(policy_list, buckets, component_name, int(depth)) |
+ for bucket in buckets.itervalues(): |
+ bucket.clear_component_cache() |
- elif action == '--pprof': |
- if len(sys.argv) > 5: |
- logs[0].dump_for_pprof(policy_list, buckets, maps_lines, sys.argv[5]) |
- else: |
- logs[0].dump_for_pprof(policy_list, buckets, maps_lines, None) |
+ return 0 |
+ |
+ |
+def do_expand(sys_argv): |
+ parser = OptionParser( |
+ 'Usage: %prog expand <dump> <policy> <component> <depth>') |
+ options, args = parser.parse_args(sys_argv) |
+ |
+ if len(args) < 5: |
+ parser.error('needs 4 arguments.') |
+ |
+ dump_path = args[1] |
+ target_policy = args[2] |
+ component_name = args[3] |
+ depth = args[4] |
+ |
+ prefix = find_prefix(dump_path) |
+ buckets = load_buckets(prefix) |
+ dump, appeared_addresses = load_dump(dump_path, buckets) |
+ symbols = load_and_update_symbol_cache(prefix, appeared_addresses) |
+ policies = load_policies(target_policy) |
+ |
+ rule_list = policies[target_policy].rules |
+ |
+ dump.expand(rule_list, buckets, component_name, int(depth), symbols) |
+ |
+ return 0 |
+ |
+ |
+def do_pprof(sys_argv): |
+ parser = OptionParser( |
+ 'Usage: %prog pprof [-c COMPONENT] <dump> <policy>') |
+ parser.add_option('-c', '--component', type='string', dest='component', |
+ help='restrict to COMPONENT', metavar='COMPONENT') |
+ options, args = parser.parse_args(sys_argv) |
+ |
+ if len(args) < 3: |
+ parser.error('needs 2 arguments.') |
+ |
+ dump_path = args[1] |
+ target_policy = args[2] |
+ component = options.component |
+ |
+ prefix = find_prefix(dump_path) |
+ buckets = load_buckets(prefix) |
+ dump, appeared_addresses = load_dump(dump_path, buckets) |
+ symbols = load_and_update_symbol_cache(prefix, appeared_addresses) |
+ policies = load_policies(target_policy) |
+ |
+ rule_list = policies[target_policy].rules |
+ |
+ with open(prefix + '.maps', 'r') as maps_f: |
+ maps_lines = maps_f.readlines() |
+ dump.print_for_pprof(rule_list, buckets, maps_lines, component, symbols) |
+ |
+ return 0 |
+ |
+ |
+def main(): |
+ COMMANDS = { |
+ 'csv': do_csv, |
+ 'expand': do_expand, |
+ 'json': do_json, |
+ 'list': do_list, |
+ 'pprof': do_pprof, |
+ 'stacktrace': do_stacktrace, |
+ } |
+ |
+ # TODO(dmikurube): Remove this message after a while. |
+ if len(sys.argv) >= 2 and sys.argv[1].startswith('--'): |
+ sys.stderr.write(""" |
+**************** NOTICE!! **************** |
+ The command line format has changed. |
+ Please look at the description below. |
+****************************************** |
+ |
+""") |
+ |
+ if len(sys.argv) < 2 or (not sys.argv[1] in COMMANDS): |
+ sys.stderr.write("""Usage: %s <command> [options] [<args>] |
+ |
+Commands: |
+ csv Classify memory usage in CSV |
+ expand Show all stacktraces contained in the specified component |
+ json Classify memory usage in JSON |
+ list Classify memory usage in simple listing format |
+ pprof Format the profile dump so that it can be processed by pprof |
+ stacktrace Convert runtime addresses to symbol names |
+ |
+Quick Reference: |
+ dmprof csv [-p POLICY] <first-dump> |
+ dmprof expand <dump> <policy> <component> <depth> |
+ dmprof json [-p POLICY] <first-dump> |
+ dmprof list [-p POLICY] <first-dump> |
+ dmprof pprof [-c COMPONENT] <dump> <policy> |
+ dmprof stacktrace <dump> |
+""" % (sys.argv[0])) |
+ sys.exit(1) |
+ action = sys.argv.pop(1) |
+ |
+ return COMMANDS[action](sys.argv) |
if __name__ == '__main__': |