Chromium Code Reviews| Index: tools/deep_memory_profiler/dmprof |
| diff --git a/tools/deep_memory_profiler/dmprof b/tools/deep_memory_profiler/dmprof |
| index 759caa0deae0523d83763b62625d475a0df64f05..ea983f85411b6d6242d62105c45a63487a924be1 100755 |
| --- a/tools/deep_memory_profiler/dmprof |
| +++ b/tools/deep_memory_profiler/dmprof |
| @@ -9,6 +9,7 @@ from datetime import datetime |
| import json |
| import os |
| import re |
| +from optparse import OptionParser |
|
M-A Ruel
2012/07/24 14:10:53
just import optparse
Dai Mikurube (NOT FULLTIME)
2012/07/24 16:19:24
Done.
|
| import shutil |
| import subprocess |
| import sys |
| @@ -30,6 +31,8 @@ ALLOC_COUNT = 2 |
| FREE_COUNT = 3 |
| NULL_REGEX = re.compile('') |
| +POLICIES_JSON_PATH = os.path.join(os.path.dirname(__file__), 'policies.json') |
|
M-A Ruel
2012/07/24 14:10:53
no
Dai Mikurube (NOT FULLTIME)
2012/07/24 14:53:45
Sorry, what do you mean by this?
|
| + |
| # Heap Profile Dump versions |
| # DUMP_DEEP_1 is OBSOLETE. |
| @@ -72,11 +75,6 @@ POLICY_DEEP_1 = 'POLICY_DEEP_1' |
| # mmap regions are distincted w/ the allocation_type column. |
| POLICY_DEEP_2 = 'POLICY_DEEP_2' |
| -# TODO(dmikurube): Avoid global variables. |
| -address_symbol_dict = {} |
| -appeared_addresses = set() |
| -components = [] |
| - |
| class EmptyDumpException(Exception): |
| def __init__(self, value): |
| @@ -106,7 +104,8 @@ class ObsoleteDumpVersionException(ParsingException): |
| return "obsolete heap profile dump version: %s" % repr(self.value) |
| -class Policy(object): |
| +class Rule(object): |
| + """Represents one matching rule in a policy file.""" |
| def __init__(self, name, mmap, pattern): |
| self.name = name |
| @@ -114,60 +113,74 @@ class Policy(object): |
| self.condition = re.compile(pattern + r'\Z') |
| -def get_component(policy_list, bucket): |
| +class Policy(object): |
| + """Represents a policy, a content of a policy file.""" |
| + |
| + def __init__(self, rules, version, components): |
| + self.rules = rules |
| + self.version = version |
| + self.components = components |
| + |
| + def append_rule(self, rule): |
| + self.rules.append(rule) |
| + |
| + |
| +def get_component(rule_list, bucket, symbols): |
| """Returns a component name which a given bucket belongs to. |
| Args: |
| - policy_list: A list containing Policy objects. (Parsed policy data by |
| - parse_policy.) |
| + rule_list: A list of Rule objects. |
| bucket: A Bucket object to be searched for. |
| + symbols: A dict mapping runtime addresses to symbol names. |
| Returns: |
| A string representing a component name. |
| """ |
| if not bucket: |
| return 'no-bucket' |
| - if bucket.component: |
| - return bucket.component |
| + if bucket.component_cache: |
| + return bucket.component_cache |
| - stacktrace = ''.join( |
| - address_symbol_dict[a] + ' ' for a in bucket.stacktrace).strip() |
| + stacktrace = ''.join(symbols[a] + ' ' for a in bucket.stacktrace).strip() |
| - for policy in policy_list: |
| - if bucket.mmap == policy.mmap and policy.condition.match(stacktrace): |
| - bucket.component = policy.name |
| - return policy.name |
| + for rule in rule_list: |
| + if bucket.mmap == rule.mmap and rule.condition.match(stacktrace): |
| + bucket.component_cache = rule.name |
| + return rule.name |
| assert False |
| class Bucket(object): |
| + """Represents a bucket, which is a unit of memory classification.""" |
| def __init__(self, stacktrace, mmap): |
| self.stacktrace = stacktrace |
| self.mmap = mmap |
| - self.component = '' |
| + self.component_cache = '' |
| + |
| + def clear_component_cache(self): |
| + self.component_cache = '' |
| -class Log(object): |
| +class Dump(object): |
| + """Represents one heap profile dump.""" |
| - """A class representing one dumped log data.""" |
| - def __init__(self, log_path): |
| - self.log_path = log_path |
| - self.log_lines = [ |
| - l for l in open(self.log_path, 'r') if l and not l.startswith('#')] |
| - self.log_version = '' |
| - sys.stderr.write('Loading a dump: %s\n' % log_path) |
| + def __init__(self, dump_path): |
| + self.dump_path = dump_path |
| + self.dump_lines = [ |
| + l for l in open(self.dump_path, 'r') if l and not l.startswith('#')] |
| + self.dump_version = '' |
| self.stacktrace_lines = [] |
| self.counters = {} |
| - self.log_time = os.stat(self.log_path).st_mtime |
| + self.dump_time = os.stat(self.dump_path).st_mtime |
| - def dump_stacktrace(buckets): |
| + def print_stacktrace(self, buckets, symbols): |
| """Prints a given stacktrace. |
| Args: |
| - buckets: A dict mapping bucket ids and their corresponding Bucket |
| - objects. |
| + buckets: A dict mapping bucket ids to Bucket objects. |
| + symbols: A dict mapping runtime addresses to symbol names. |
| """ |
| for line in self.stacktrace_lines: |
| words = line.split() |
| @@ -177,21 +190,20 @@ class Log(object): |
| for i in range(0, BUCKET_ID - 1): |
| sys.stdout.write(words[i] + ' ') |
| for address in bucket.stacktrace: |
| - sys.stdout.write((address_symbol_dict.get(address) or address) + ' ') |
| + sys.stdout.write((symbols.get(address) or address) + ' ') |
| sys.stdout.write('\n') |
| @staticmethod |
| - def accumulate_size_for_pprof(stacktrace_lines, policy_list, buckets, |
| - component_name): |
| + def accumulate_size_for_pprof(stacktrace_lines, rule_list, buckets, |
| + component_name, symbols): |
| """Accumulates size of committed chunks and the number of allocated chunks. |
| Args: |
| stacktrace_lines: A list of strings which are valid as stacktraces. |
| - policy_list: A list containing Policy objects. (Parsed policy data by |
| - parse_policy.) |
| - buckets: A dict mapping bucket ids and their corresponding Bucket |
| - objects. |
| + rule_list: A list of Rule objects. |
| + buckets: A dict mapping bucket ids to Bucket objects. |
| component_name: A name of component for filtering. |
| + symbols: A dict mapping runtime addresses to symbol names. |
| Returns: |
| Two integers which are the accumulated size of committed regions and the |
| @@ -204,7 +216,7 @@ class Log(object): |
| bucket = buckets.get(int(words[BUCKET_ID])) |
| if (not bucket or |
| (component_name and |
| - component_name != get_component(policy_list, bucket))): |
| + component_name != get_component(rule_list, bucket, symbols))): |
| continue |
| com_committed += int(words[COMMITTED]) |
| @@ -213,24 +225,23 @@ class Log(object): |
| return com_committed, com_allocs |
| @staticmethod |
| - def dump_stacktrace_lines_for_pprof(stacktrace_lines, policy_list, |
| - buckets, component_name): |
| + def print_stacktrace_lines_for_pprof(stacktrace_lines, rule_list, |
| + buckets, component_name, symbols): |
| """Prints information of stacktrace lines for pprof. |
| Args: |
| stacktrace_lines: A list of strings which are valid as stacktraces. |
| - policy_list: A list containing Policy objects. (Parsed policy data by |
| - parse_policy.) |
| - buckets: A dict mapping bucket ids and their corresponding Bucket |
| - objects. |
| + rule_list: A list of Rule objects. |
| + buckets: A dict mapping bucket ids to Bucket objects. |
| component_name: A name of component for filtering. |
| + symbols: A dict mapping runtime addresses to symbol names. |
| """ |
| for line in stacktrace_lines: |
| words = line.split() |
| bucket = buckets.get(int(words[BUCKET_ID])) |
| if (not bucket or |
| (component_name and |
| - component_name != get_component(policy_list, bucket))): |
| + component_name != get_component(rule_list, bucket, symbols))): |
| continue |
| sys.stdout.write('%6d: %8s [%6d: %8s] @' % ( |
| @@ -242,39 +253,39 @@ class Log(object): |
| sys.stdout.write(' ' + address) |
| sys.stdout.write('\n') |
| - def dump_for_pprof(self, policy_list, buckets, mapping_lines, component_name): |
| - """Converts the log file so it can be processed by pprof. |
| + def print_for_pprof( |
| + self, rule_list, buckets, maps_lines, component_name, symbols): |
| + """Converts the heap profile dump so it can be processed by pprof. |
| Args: |
| - policy_list: A list containing Policy objects. (Parsed policy data by |
| - parse_policy.) |
| - buckets: A dict mapping bucket ids and their corresponding Bucket |
| - objects. |
| - mapping_lines: A list of strings containing /proc/.../maps. |
| + rule_list: A list of Rule objects. |
| + buckets: A dict mapping bucket ids to Bucket objects. |
| + maps_lines: A list of strings containing /proc/.../maps. |
| component_name: A name of component for filtering. |
| + symbols: A dict mapping runtime addresses to symbol names. |
| """ |
| sys.stdout.write('heap profile: ') |
| com_committed, com_allocs = self.accumulate_size_for_pprof( |
| - self.stacktrace_lines, policy_list, buckets, component_name) |
| + self.stacktrace_lines, rule_list, buckets, component_name, symbols) |
| sys.stdout.write('%6d: %8s [%6d: %8s] @ heapprofile\n' % ( |
| com_allocs, com_committed, com_allocs, com_committed)) |
| - self.dump_stacktrace_lines_for_pprof( |
| - self.stacktrace_lines, policy_list, buckets, component_name) |
| + self.print_stacktrace_lines_for_pprof( |
| + self.stacktrace_lines, rule_list, buckets, component_name, symbols) |
| sys.stdout.write('MAPPED_LIBRARIES:\n') |
| - for line in mapping_lines: |
| + for line in maps_lines: |
| sys.stdout.write(line) |
| @staticmethod |
| - def check_stacktrace_line(stacktrace_line, buckets): |
| + def check_stacktrace_line(stacktrace_line, buckets, appeared_addresses): |
| """Checks if a given stacktrace_line is valid as stacktrace. |
| Args: |
| stacktrace_line: A string to be checked. |
| - buckets: A dict mapping bucket ids and their corresponding Bucket |
| - objects. |
| + buckets: A dict mapping bucket ids to Bucket objects. |
| + appeared_addresses: A list where appeared addresses will be stored. |
| Returns: |
| True if the given stacktrace_line is valid. |
| @@ -305,61 +316,59 @@ class Log(object): |
| return line_number, False |
| return line_number, True |
| - def parse_stacktraces_while_valid(self, buckets, log_lines, line_number): |
| + def parse_stacktraces_while_valid( |
| + self, buckets, dump_lines, line_number, appeared_addresses): |
| """Parses stacktrace lines while the lines are valid. |
| Args: |
| - buckets: A dict mapping bucket ids and their corresponding Bucket |
| - objects. |
| - log_lines: A list of lines to be parsed. |
| - line_number: An integer representing the starting line number in |
| - log_lines. |
| + buckets: A dict mapping bucket ids to Bucket objects. |
| + dump_lines: A list of lines to be parsed. |
| + line_number: A line number to start parsing in dump_lines. |
| + appeared_addresses: A list where appeared addresses will be stored. |
| Returns: |
| A pair of a list of valid lines and an integer representing the last |
| - line number in log_lines. |
| + line number in dump_lines. |
| """ |
| (line_number, _) = self.skip_lines_while( |
| - line_number, len(log_lines), |
| - lambda n: not log_lines[n].split()[0].isdigit()) |
| + line_number, len(dump_lines), |
| + lambda n: not dump_lines[n].split()[0].isdigit()) |
| stacktrace_lines_start = line_number |
| (line_number, _) = self.skip_lines_while( |
| - line_number, len(log_lines), |
| - lambda n: self.check_stacktrace_line(log_lines[n], buckets)) |
| - return (log_lines[stacktrace_lines_start:line_number], line_number) |
| + line_number, len(dump_lines), |
| + lambda n: self.check_stacktrace_line( |
| + dump_lines[n], buckets, appeared_addresses)) |
| + return (dump_lines[stacktrace_lines_start:line_number], line_number) |
| - def parse_stacktraces(self, buckets, line_number): |
| - """Parses lines in self.log_lines as stacktrace. |
| + def parse_stacktraces(self, buckets, line_number, appeared_addresses): |
| + """Parses lines in self.dump_lines as stacktrace. |
| Valid stacktrace lines are stored into self.stacktrace_lines. |
| Args: |
| - buckets: A dict mapping bucket ids and their corresponding Bucket |
| - objects. |
| - line_number: An integer representing the starting line number in |
| - log_lines. |
| + buckets: A dict mapping bucket ids to Bucket objects. |
| + line_number: A line number to start parsing in dump_lines. |
| + appeared_addresses: A list where appeared addresses will be stored. |
| Raises: |
| ParsingException for invalid dump versions. |
| """ |
| - sys.stderr.write(' Version: %s\n' % self.log_version) |
| - |
| - if self.log_version == DUMP_DEEP_5: |
| + if self.dump_version == DUMP_DEEP_5: |
| (self.stacktrace_lines, line_number) = ( |
| self.parse_stacktraces_while_valid( |
| - buckets, self.log_lines, line_number)) |
| + buckets, self.dump_lines, line_number, appeared_addresses)) |
| - elif self.log_version in DUMP_DEEP_OBSOLETE: |
| - raise ObsoleteDumpVersionException(self.log_version) |
| + elif self.dump_version in DUMP_DEEP_OBSOLETE: |
| + raise ObsoleteDumpVersionException(self.dump_version) |
| else: |
| - raise InvalidDumpException('Invalid version: %s' % self.log_version) |
| + raise InvalidDumpException('Invalid version: %s' % self.dump_version) |
| def parse_global_stats(self): |
| - """Parses lines in self.log_lines as global stats.""" |
| + """Parses lines in self.dump_lines as global stats.""" |
| (ln, _) = self.skip_lines_while( |
| - 0, len(self.log_lines), |
| - lambda n: self.log_lines[n] != 'GLOBAL_STATS:\n') |
| + 0, len(self.dump_lines), |
| + lambda n: self.dump_lines[n] != 'GLOBAL_STATS:\n') |
| global_stat_names = [ |
| 'total', 'file-exec', 'file-nonexec', 'anonymous', 'stack', 'other', |
| @@ -370,14 +379,14 @@ class Log(object): |
| for prefix in global_stat_names: |
| (ln, _) = self.skip_lines_while( |
| - ln, len(self.log_lines), |
| - lambda n: self.log_lines[n].split()[0] != prefix) |
| - words = self.log_lines[ln].split() |
| + ln, len(self.dump_lines), |
| + lambda n: self.dump_lines[n].split()[0] != prefix) |
| + words = self.dump_lines[ln].split() |
| self.counters[prefix + '_virtual'] = int(words[-2]) |
| self.counters[prefix + '_committed'] = int(words[-1]) |
| def parse_version(self): |
| - """Parses a version string in self.log_lines. |
| + """Parses a version string in self.dump_lines. |
| Returns: |
| A pair of (a string representing a version of the stacktrace dump, |
| @@ -390,44 +399,45 @@ class Log(object): |
| # Skip until an identifiable line. |
| headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ') |
| - if not self.log_lines: |
| + if not self.dump_lines: |
| raise EmptyDumpException('Empty heap dump file.') |
| (ln, found) = self.skip_lines_while( |
| - 0, len(self.log_lines), |
| - lambda n: not self.log_lines[n].startswith(headers)) |
| + 0, len(self.dump_lines), |
| + lambda n: not self.dump_lines[n].startswith(headers)) |
| if not found: |
| raise InvalidDumpException('No version header.') |
| # Identify a version. |
| - if self.log_lines[ln].startswith('heap profile: '): |
| - version = self.log_lines[ln][13:].strip() |
| + if self.dump_lines[ln].startswith('heap profile: '): |
| + version = self.dump_lines[ln][13:].strip() |
| if version == DUMP_DEEP_5: |
| (ln, _) = self.skip_lines_while( |
| - ln, len(self.log_lines), |
| - lambda n: self.log_lines[n] != 'STACKTRACES:\n') |
| + ln, len(self.dump_lines), |
| + lambda n: self.dump_lines[n] != 'STACKTRACES:\n') |
| elif version in DUMP_DEEP_OBSOLETE: |
| raise ObsoleteDumpVersionException(version) |
| else: |
| raise InvalidDumpException('Invalid version: %s' % version) |
| - elif self.log_lines[ln] == 'STACKTRACES:\n': |
| + elif self.dump_lines[ln] == 'STACKTRACES:\n': |
| raise ObsoleteDumpVersionException(DUMP_DEEP_1) |
| - elif self.log_lines[ln] == 'MMAP_STACKTRACES:\n': |
| + elif self.dump_lines[ln] == 'MMAP_STACKTRACES:\n': |
| raise ObsoleteDumpVersionException(DUMP_DEEP_2) |
| return (version, ln) |
| - def parse_log(self, buckets): |
| - self.log_version, ln = self.parse_version() |
| + def parse_dump(self, buckets, appeared_addresses): |
| + self.dump_version, ln = self.parse_version() |
| self.parse_global_stats() |
| - self.parse_stacktraces(buckets, ln) |
| + self.parse_stacktraces(buckets, ln, appeared_addresses) |
| @staticmethod |
| def accumulate_size_for_policy(stacktrace_lines, |
| - policy_list, buckets, sizes): |
| + rule_list, buckets, sizes, symbols): |
| for line in stacktrace_lines: |
| words = line.split() |
| bucket = buckets.get(int(words[BUCKET_ID])) |
| - component_match = get_component(policy_list, bucket) |
| + component_match = get_component(rule_list, bucket, symbols) |
| + |
|
M-A Ruel
2012/07/24 14:10:53
This new line is gratuitous. Intended?
Dai Mikurube (NOT FULLTIME)
2012/07/24 16:19:24
It was unintended. Thanks.
|
| sizes[component_match] += int(words[COMMITTED]) |
| if component_match.startswith('tc-'): |
| @@ -437,29 +447,30 @@ class Log(object): |
| else: |
| sizes['other-total-log'] += int(words[COMMITTED]) |
| - def apply_policy(self, policy_list, buckets, first_log_time): |
| + def apply_policy( |
| + self, rule_list, buckets, first_dump_time, components, symbols): |
| """Aggregates the total memory size of each component. |
| Iterate through all stacktraces and attribute them to one of the components |
| based on the policy. It is important to apply policy in right order. |
| Args: |
| - policy_list: A list containing Policy objects. (Parsed policy data by |
| - parse_policy.) |
| - buckets: A dict mapping bucket ids and their corresponding Bucket |
| - objects. |
| - first_log_time: An integer representing time when the first log is |
| + rule_list: A list of Rule objects. |
| + buckets: A dict mapping bucket ids to Bucket objects. |
| + first_dump_time: An integer representing time when the first dump is |
| dumped. |
| + components: A list of strings of component names. |
| + symbols: A dict mapping runtime addresses to symbol names. |
| Returns: |
| A dict mapping components and their corresponding sizes. |
| """ |
| - sys.stderr.write('apply policy:%s\n' % (self.log_path)) |
| + sys.stderr.write('Applying policy: "%s".\n' % self.dump_path) |
| sizes = dict((c, 0) for c in components) |
| self.accumulate_size_for_policy(self.stacktrace_lines, |
| - policy_list, buckets, sizes) |
| + rule_list, buckets, sizes, symbols) |
| mmap_prefix = 'profiled-mmap' |
| malloc_prefix = 'profiled-malloc' |
| @@ -513,46 +524,45 @@ class Log(object): |
| sizes['total-exclude-profiler'] = ( |
| self.counters['total_committed'] - sizes['mmap-profiler']) |
| if 'hour' in sizes: |
| - sizes['hour'] = (self.log_time - first_log_time) / 60.0 / 60.0 |
| + sizes['hour'] = (self.dump_time - first_dump_time) / 60.0 / 60.0 |
| if 'minute' in sizes: |
| - sizes['minute'] = (self.log_time - first_log_time) / 60.0 |
| + sizes['minute'] = (self.dump_time - first_dump_time) / 60.0 |
| if 'second' in sizes: |
| - sizes['second'] = self.log_time - first_log_time |
| + sizes['second'] = self.dump_time - first_dump_time |
| return sizes |
| @staticmethod |
| - def accumulate_size_for_expand(stacktrace_lines, policy_list, buckets, |
| - component_name, depth, sizes): |
| + def accumulate_size_for_expand(stacktrace_lines, rule_list, buckets, |
| + component_name, depth, sizes, symbols): |
| for line in stacktrace_lines: |
| words = line.split() |
| bucket = buckets.get(int(words[BUCKET_ID])) |
| - component_match = get_component(policy_list, bucket) |
| + component_match = get_component(rule_list, bucket, symbols) |
| if component_match == component_name: |
| stacktrace_sequence = '' |
| for address in bucket.stacktrace[0 : min(len(bucket.stacktrace), |
| 1 + depth)]: |
| - stacktrace_sequence += address_symbol_dict[address] + ' ' |
| + stacktrace_sequence += symbols[address] + ' ' |
| if not stacktrace_sequence in sizes: |
| sizes[stacktrace_sequence] = 0 |
| sizes[stacktrace_sequence] += int(words[COMMITTED]) |
| - def expand(self, policy_list, buckets, component_name, depth): |
| + def expand(self, rule_list, buckets, component_name, depth, symbols): |
| """Prints all stacktraces in a given component of given depth. |
| Args: |
| - policy_list: A list containing Policy objects. (Parsed policy data by |
| - parse_policy.) |
| - buckets: A dict mapping bucket ids and their corresponding Bucket |
| - objects. |
| + rule_list: A list of Rule objects. |
| + buckets: A dict mapping bucket ids to Bucket objects. |
| component_name: A name of component for filtering. |
| depth: An integer representing depth to be printed. |
| + symbols: A dict mapping runtime addresses to symbol names. |
| """ |
| sizes = {} |
| self.accumulate_size_for_expand( |
| - self.stacktrace_lines, policy_list, buckets, component_name, |
| - depth, sizes) |
| + self.stacktrace_lines, rule_list, buckets, component_name, |
| + depth, sizes, symbols) |
| sorted_sizes_list = sorted( |
| sizes.iteritems(), key=(lambda x: x[1]), reverse=True) |
| @@ -563,7 +573,8 @@ class Log(object): |
| sys.stderr.write('total: %d\n' % (total)) |
| -def update_symbols(symbol_path, mapping_lines, maps_path): |
| +def update_symbols( |
| + symbol_path, maps_path, appeared_addresses, symbols): |
| """Updates address/symbol mapping on memory and in a .symbol cache file. |
| It reads cached address/symbol mapping from a .symbol file if it exists. |
| @@ -578,29 +589,42 @@ def update_symbols(symbol_path, mapping_lines, maps_path): |
| Args: |
| symbol_path: A string representing a path for a .symbol file. |
| - mapping_lines: A list of strings containing /proc/.../maps. |
| maps_path: A string of the path of /proc/.../maps. |
| + appeared_addresses: A list of known addresses. |
| + symbols: A dict mapping runtime addresses to symbol names. |
| """ |
| with open(symbol_path, mode='a+') as symbol_f: |
| symbol_lines = symbol_f.readlines() |
| if symbol_lines: |
| for line in symbol_lines: |
| items = line.split(None, 1) |
| - address_symbol_dict[items[0]] = items[1].rstrip() |
| + if len(items) == 1: |
| + items.append('??') |
| + symbols[items[0]] = items[1].rstrip() |
| + if symbols: |
| + sys.stderr.write(' Found %d symbols in cache.\n' % len(symbols)) |
| + else: |
| + sys.stderr.write(' No symbols found in cache.\n') |
| unresolved_addresses = sorted( |
| - a for a in appeared_addresses if a not in address_symbol_dict) |
| + a for a in appeared_addresses if a not in symbols) |
| - if unresolved_addresses: |
| + if not unresolved_addresses: |
| + sys.stderr.write(' No need to resolve any more addresses.\n') |
| + else: |
| + sys.stderr.write(' %d addresses are unresolved.\n' % |
| + len(unresolved_addresses)) |
| prepared_data_dir = tempfile.mkdtemp() |
| prepare_symbol_info(maps_path, prepared_data_dir) |
| - symbols = find_runtime_symbols_list( |
| + symbol_list = find_runtime_symbols_list( |
| prepared_data_dir, unresolved_addresses) |
| - for address, symbol in zip(unresolved_addresses, symbols): |
| + for address, symbol in zip(unresolved_addresses, symbol_list): |
| + if not symbol: |
| + symbol = '??' |
| stripped_symbol = symbol.strip() |
| - address_symbol_dict[address] = stripped_symbol |
| + symbols[address] = stripped_symbol |
| symbol_f.write('%s %s\n' % (address, stripped_symbol)) |
| shutil.rmtree(prepared_data_dir) |
| @@ -626,10 +650,10 @@ def parse_policy(policy_path): |
| if policy_lines[0].startswith('heap profile policy: '): |
| policy_version = policy_lines[0][21:].strip() |
| policy_lines.pop(0) |
| - policy_list = [] |
| + rule_list = [] |
| + components = [] |
| if policy_version == POLICY_DEEP_2 or policy_version == POLICY_DEEP_1: |
| - sys.stderr.write(' heap profile policy version: %s\n' % policy_version) |
| for line in policy_lines: |
| if line[0] == '#': |
| continue |
| @@ -645,7 +669,7 @@ def parse_policy(policy_path): |
| mmap = False |
| if pattern != 'default': |
| - policy_list.append(Policy(name, mmap, pattern)) |
| + rule_list.append(Rule(name, mmap, pattern)) |
| if components.count(name) == 0: |
| components.append(name) |
| @@ -653,57 +677,16 @@ def parse_policy(policy_path): |
| sys.stderr.write(' invalid heap profile policy version: %s\n' % ( |
| policy_version)) |
| - return policy_list |
| + return rule_list, policy_version, components |
| -def main(): |
| - if (len(sys.argv) < 4) or (not (sys.argv[1] in ['--csv', |
| - '--json', |
| - '--expand', |
| - '--list', |
| - '--stacktrace', |
| - '--pprof'])): |
| - sys.stderr.write("""Usage: |
| -%s [options] <chrome-binary> <policy> <profile> [component-name] [depth] |
| - |
| -Options: |
| - --csv Output result in csv format |
| - --json Output result in json format |
| - --stacktrace Convert raw address to symbol names |
| - --list Lists components and their sizes |
| - --expand Show all stacktraces in the specified component |
| - of given depth with their sizes |
| - --pprof Format the profile file so it can be processed |
| - by pprof |
| - |
| -Examples: |
| - dmprof --csv Debug/chrome dmpolicy hprof.12345.0001.heap > result.csv |
| - dmprof --json Debug/chrome dmpolicy hprof.12345.0001.heap > result.json |
| - dmprof --list Debug/chrome dmpolicy hprof.12345.0012.heap |
| - dmprof --expand Debug/chrome dmpolicy hprof.12345.0012.heap tc-webkit 4 |
| - dmprof --pprof Debug/chrome dmpolicy hprof.12345.0012.heap > for_pprof.txt |
| -""" % (sys.argv[0])) |
| - sys.exit(1) |
| - |
| - action = sys.argv[1] |
| - chrome_path = sys.argv[2] |
| - policy_path = sys.argv[3] |
| - log_path = sys.argv[4] |
| - |
| - sys.stderr.write('parsing a policy file\n') |
| - policy_list = parse_policy(policy_path) |
| - |
| - p = re.compile('\.[0-9][0-9][0-9][0-9]\.heap') |
| - prefix = p.sub('', log_path) |
| - symbol_path = prefix + '.symbols' |
| +def find_prefix(path): |
| + return re.sub('\.[0-9][0-9][0-9][0-9]\.heap', '', path) |
| - sys.stderr.write('parsing the maps file\n') |
| - maps_path = prefix + '.maps' |
| - with open(maps_path, 'r') as maps_f: |
| - maps_lines = maps_f.readlines() |
| +def load_buckets(prefix): |
| # Reading buckets |
| - sys.stderr.write('parsing the bucket file\n') |
| + sys.stderr.write('Loading bucket files.\n') |
| buckets = {} |
| bucket_count = 0 |
| n = 0 |
| @@ -714,80 +697,255 @@ Examples: |
| break |
| n += 1 |
| continue |
| - sys.stderr.write('reading buckets from %s\n' % (buckets_path)) |
| + sys.stderr.write(' %s\n' % buckets_path) |
| with open(buckets_path, 'r') as buckets_f: |
| for line in buckets_f: |
| words = line.split() |
| buckets[int(words[0])] = Bucket(words[2:], words[1] == 'mmap') |
| n += 1 |
| - log_path_list = [log_path] |
| + return buckets |
| - if action in ('--csv', '--json'): |
| - # search for the sequence of files |
| - n = int(log_path[len(log_path) - 9 : len(log_path) - 5]) |
| - n += 1 # skip current file |
| - while True: |
| - p = '%s.%04d.heap' % (prefix, n) |
| - if os.path.exists(p): |
| - log_path_list.append(p) |
| - else: |
| - break |
| - n += 1 |
| - logs = [] |
| - for path in log_path_list: |
| - new_log = Log(path) |
| - sys.stderr.write('Parsing a dump: %s\n' % path) |
| - try: |
| - new_log.parse_log(buckets) |
| - except EmptyDumpException: |
| - sys.stderr.write(' WARNING: ignored an empty dump: %s\n' % path) |
| - except ParsingException, e: |
| - sys.stderr.write(' Error in parsing heap profile dump: %s\n' % e) |
| - sys.exit(1) |
| +def determine_dump_path_list(dump_path, prefix): |
| + dump_path_list = [dump_path] |
| + |
| + # search for the sequence of files |
| + n = int(dump_path[len(dump_path) - 9 : len(dump_path) - 5]) |
| + n += 1 # skip current file |
| + while True: |
| + p = '%s.%04d.heap' % (prefix, n) |
| + if os.path.exists(p): |
| + dump_path_list.append(p) |
| else: |
| - logs.append(new_log) |
| + break |
| + n += 1 |
| + |
| + return dump_path_list |
| + |
| + |
| +def load_single_dump(dump_path, buckets, appeared_addresses): |
| + new_dump = Dump(dump_path) |
| + try: |
| + new_dump.parse_dump(buckets, appeared_addresses) |
| + except EmptyDumpException: |
| + sys.stderr.write('... ignored an empty dump') |
| + except ParsingException, e: |
| + sys.stderr.write('... error in parsing: %s' % e) |
| + sys.exit(1) |
| + else: |
| + sys.stderr.write(' (version: %s)' % new_dump.dump_version) |
| + |
| + return new_dump |
| + |
| + |
| +def load_dump(dump_path, buckets): |
| + sys.stderr.write('Loading a heap dump file: "%s"' % dump_path) |
| + appeared_addresses = set() |
| + dump = load_single_dump(dump_path, buckets, appeared_addresses) |
| + sys.stderr.write('.\n') |
| + return dump, appeared_addresses |
| + |
| + |
| +def load_dumps(dump_path_list, buckets): |
| + sys.stderr.write('Loading heap dump files.\n') |
| + appeared_addresses = set() |
| + dumps = [] |
| + for path in dump_path_list: |
| + sys.stderr.write(' %s' % path) |
| + dumps.append(load_single_dump(path, buckets, appeared_addresses)) |
| + sys.stderr.write('\n') |
| + return dumps, appeared_addresses |
| + |
| + |
| +def load_and_update_symbol_cache(prefix, appeared_addresses): |
| + maps_path = prefix + '.maps' |
| + symbol_path = prefix + '.symbols' |
| + sys.stderr.write('Loading and updating symbol cache: "%s".\n' % symbol_path) |
| + symbols = {} |
| + update_symbols(symbol_path, maps_path, appeared_addresses, symbols) |
| + return symbols |
| + |
| + |
| +def load_default_policies(): |
| + with open(POLICIES_JSON_PATH, mode='r') as policies_f: |
| + default_policies = json.load(policies_f) |
| + return default_policies |
| + |
| + |
| +def load_policy(policies_dict, policy_label): |
| + policy_file = policies_dict[policy_label]['file'] |
| + policy_path = os.path.join(os.path.dirname(__file__), policy_file) |
| + rule_list, policy_version, components = parse_policy(policy_path) |
| + sys.stderr.write(' %s: %s (version: %s)\n' % |
| + (policy_label, policy_path, policy_version)) |
| + return Policy(rule_list, policy_version, components) |
| + |
| + |
| +def load_policies_dict(policies_dict): |
| + sys.stderr.write('Loading policy files.\n') |
| + policies = {} |
| + for policy_label in policies_dict: |
| + policies[policy_label] = load_policy(policies_dict, policy_label) |
| + return policies |
| + |
| + |
| +def load_policies(options_policy): |
| + default_policies = load_default_policies() |
| + if options_policy: |
| + policy_labels = options_policy.split(',') |
| + specified_policies = {} |
| + for specified_policy in policy_labels: |
| + if specified_policy in default_policies: |
| + specified_policies[specified_policy] = ( |
| + default_policies[specified_policy]) |
| + policies = load_policies_dict(specified_policies) |
| + else: |
| + policies = load_policies_dict(default_policies) |
| + return policies |
| + |
| + |
| +def do_stacktrace(sys_argv): |
| + parser = OptionParser(usage='Usage: %prog stacktrace <dump>') |
| + options, args = parser.parse_args(sys_argv) |
| + |
| + if len(args) < 2: |
| + parser.error('needs 1 argument.') |
| + |
| + dump_path = args[1] |
| + |
| + prefix = find_prefix(dump_path) |
| + buckets = load_buckets(prefix) |
| + dump, appeared_addresses = load_dump(dump_path, buckets) |
| + symbols = load_and_update_symbol_cache(prefix, appeared_addresses) |
| + |
| + dump.print_stacktrace(buckets, symbols) |
| + |
| + return 0 |
| - sys.stderr.write('getting symbols\n') |
| - update_symbols(symbol_path, maps_lines, maps_path) |
| - # TODO(dmikurube): Many modes now. Split them into separete functions. |
| - if action == '--stacktrace': |
| - logs[0].dump_stacktrace(buckets) |
| +def do_csv(sys_argv): |
| + parser = OptionParser('Usage: %prog csv [-p POLICY] <first-dump>') |
| + parser.add_option('-p', '--policy', type='string', dest='policy', |
| + help='profile with POLICY', metavar='POLICY') |
| + options, args = parser.parse_args(sys_argv) |
| - elif action == '--csv': |
| - sys.stdout.write(','.join(components)) |
| - sys.stdout.write('\n') |
| + if len(args) < 2: |
|
M-A Ruel
2012/07/24 14:10:53
what with 10 args?
Dai Mikurube (NOT FULLTIME)
2012/07/24 14:53:45
It just ignores extra args. Should it warn or abo
M-A Ruel
2012/07/24 14:57:59
Please abort. Unless it is necessary to ignore dur
Dai Mikurube (NOT FULLTIME)
2012/07/24 16:19:24
Done.
|
| + parser.error('needs 1 argument.') |
| - for log in logs: |
| - component_sizes = log.apply_policy(policy_list, buckets, logs[0].log_time) |
| + dump_path = args[1] |
| + |
| + prefix = find_prefix(dump_path) |
| + buckets = load_buckets(prefix) |
| + dumps, appeared_addresses = load_dumps( |
| + determine_dump_path_list(dump_path, prefix), buckets) |
| + symbols = load_and_update_symbol_cache(prefix, appeared_addresses) |
| + policies = load_policies(options.policy) |
| + |
| + max_components = 0 |
| + for policy in policies: |
| + max_components = max(max_components, len(policies[policy].components)) |
| + |
| + for policy in sorted(policies): |
| + rule_list = policies[policy].rules |
| + components = policies[policy].components |
| + |
| + if len(policies) > 1: |
| + sys.stdout.write('%s%s\n' % (policy, ',' * (max_components - 1))) |
| + sys.stdout.write('%s%s\n' % ( |
| + ','.join(components), ',' * (max_components - len(components)))) |
| + |
| + for dump in dumps: |
| + component_sizes = dump.apply_policy( |
| + rule_list, buckets, dumps[0].dump_time, components, symbols) |
| s = [] |
| for c in components: |
| if c in ('hour', 'minute', 'second'): |
| s.append('%05.5f' % (component_sizes[c])) |
| else: |
| s.append('%05.5f' % (component_sizes[c] / 1024.0 / 1024.0)) |
| - sys.stdout.write(','.join(s)) |
| - sys.stdout.write('\n') |
| + sys.stdout.write('%s%s\n' % ( |
| + ','.join(s), ',' * (max_components - len(components)))) |
| + |
| + for bucket in buckets.itervalues(): |
| + bucket.clear_component_cache() |
| + |
| + return 0 |
| + |
| + |
| +def do_json(sys_argv): |
| + parser = OptionParser('Usage: %prog json [-p POLICY] <first-dump>') |
| + parser.add_option('-p', '--policy', type='string', dest='policy', |
| + help='profile with POLICY', metavar='POLICY') |
| + options, args = parser.parse_args(sys_argv) |
| + |
| + if len(args) < 2: |
| + parser.error('needs 1 argument.') |
| + |
| + dump_path = args[1] |
| + |
| + prefix = find_prefix(dump_path) |
| + buckets = load_buckets(prefix) |
| + dumps, appeared_addresses = load_dumps( |
| + determine_dump_path_list(dump_path, prefix), buckets) |
| + symbols = load_and_update_symbol_cache(prefix, appeared_addresses) |
| + policies = load_policies(options.policy) |
| - elif action == '--json': |
| - json_base = { |
| - 'version': 'JSON_DEEP_1', |
| + json_base = { |
| + 'version': 'JSON_DEEP_2', |
| + 'policies': {}, |
| + } |
| + |
| + for policy in sorted(policies): |
| + rule_list = policies[policy].rules |
| + components = policies[policy].components |
| + |
| + json_base['policies'][policy] = { |
| 'legends': components, |
| 'snapshots': [], |
| } |
| - for log in logs: |
| - component_sizes = log.apply_policy(policy_list, buckets, logs[0].log_time) |
| - component_sizes['log_path'] = log.log_path |
| - component_sizes['log_time'] = datetime.fromtimestamp( |
| - log.log_time).strftime('%Y-%m-%d %H:%M:%S') |
| - json_base['snapshots'].append(component_sizes) |
| - json.dump(json_base, sys.stdout, indent=2, sort_keys=True) |
| - |
| - elif action == '--list': |
| - component_sizes = logs[0].apply_policy( |
| - policy_list, buckets, logs[0].log_time) |
| + |
| + for dump in dumps: |
| + component_sizes = dump.apply_policy( |
| + rule_list, buckets, dumps[0].dump_time, components, symbols) |
| + component_sizes['dump_path'] = dump.dump_path |
| + component_sizes['dump_time'] = datetime.fromtimestamp( |
| + dump.dump_time).strftime('%Y-%m-%d %H:%M:%S') |
| + json_base['policies'][policy]['snapshots'].append(component_sizes) |
| + |
| + for bucket in buckets.itervalues(): |
| + bucket.clear_component_cache() |
| + |
| + json.dump(json_base, sys.stdout, indent=2, sort_keys=True) |
| + |
| + return 0 |
| + |
| + |
| +def do_list(sys_argv): |
| + parser = OptionParser('Usage: %prog [-p POLICY] list <first-dump>') |
| + parser.add_option('-p', '--policy', type='string', dest='policy', |
| + help='profile with POLICY', metavar='POLICY') |
| + options, args = parser.parse_args(sys_argv) |
| + |
| + if len(args) < 2: |
| + parser.error('needs 1 argument.') |
| + |
| + dump_path = args[1] |
| + |
| + prefix = find_prefix(dump_path) |
| + buckets = load_buckets(prefix) |
| + dumps, appeared_addresses = load_dumps( |
| + determine_dump_path_list(dump_path, prefix), buckets) |
| + symbols = load_and_update_symbol_cache(prefix, appeared_addresses) |
| + policies = load_policies(options.policy) |
| + |
| + for policy in sorted(policies): |
| + rule_list = policies[policy].rules |
| + components = policies[policy].components |
| + |
| + component_sizes = dumps[0].apply_policy( |
| + rule_list, buckets, dumps[0].dump_time, components, symbols) |
| + sys.stdout.write('%s:\n' % policy) |
| for c in components: |
| if c in ['hour', 'minute', 'second']: |
| sys.stdout.write('%30s %10.3f\n' % (c, component_sizes[c])) |
| @@ -795,16 +953,110 @@ Examples: |
| sys.stdout.write('%30s %10.3f\n' % ( |
| c, component_sizes[c] / 1024.0 / 1024.0)) |
| - elif action == '--expand': |
| - component_name = sys.argv[5] |
| - depth = sys.argv[6] |
| - logs[0].expand(policy_list, buckets, component_name, int(depth)) |
| + for bucket in buckets.itervalues(): |
| + bucket.clear_component_cache() |
| - elif action == '--pprof': |
| - if len(sys.argv) > 5: |
| - logs[0].dump_for_pprof(policy_list, buckets, maps_lines, sys.argv[5]) |
| - else: |
| - logs[0].dump_for_pprof(policy_list, buckets, maps_lines, None) |
| + return 0 |
| + |
| + |
| +def do_expand(sys_argv): |
| + parser = OptionParser( |
| + 'Usage: %prog expand <dump> <policy> <component> <depth>') |
| + options, args = parser.parse_args(sys_argv) |
| + |
| + if len(args) < 5: |
| + parser.error('needs 4 arguments.') |
| + |
| + dump_path = args[1] |
| + target_policy = args[2] |
| + component_name = args[3] |
| + depth = args[4] |
| + |
| + prefix = find_prefix(dump_path) |
| + buckets = load_buckets(prefix) |
| + dump, appeared_addresses = load_dump(dump_path, buckets) |
| + symbols = load_and_update_symbol_cache(prefix, appeared_addresses) |
| + policies = load_policies(target_policy) |
| + |
| + rule_list = policies[target_policy].rules |
| + |
| + dump.expand(rule_list, buckets, component_name, int(depth), symbols) |
| + |
| + return 0 |
| + |
| + |
| +def do_pprof(sys_argv): |
| + parser = OptionParser( |
| + 'Usage: %prog pprof [-c COMPONENT] <dump> <policy>') |
| + parser.add_option('-c', '--component', type='string', dest='component', |
| + help='restrict to COMPONENT', metavar='COMPONENT') |
| + options, args = parser.parse_args(sys_argv) |
| + |
| + if len(args) < 3: |
| + parser.error('needs 2 arguments.') |
| + |
| + dump_path = args[1] |
| + target_policy = args[2] |
| + component = options.component |
| + |
| + prefix = find_prefix(dump_path) |
| + buckets = load_buckets(prefix) |
| + dump, appeared_addresses = load_dump(dump_path, buckets) |
| + symbols = load_and_update_symbol_cache(prefix, appeared_addresses) |
| + policies = load_policies(target_policy) |
| + |
| + rule_list = policies[target_policy].rules |
| + |
| + with open(prefix + '.maps', 'r') as maps_f: |
| + maps_lines = maps_f.readlines() |
| + dump.print_for_pprof(rule_list, buckets, maps_lines, component, symbols) |
| + |
| + return 0 |
| + |
| + |
| +def main(): |
| + COMMANDS = { |
| + 'csv': do_csv, |
| + 'expand': do_expand, |
| + 'json': do_json, |
| + 'list': do_list, |
| + 'pprof': do_pprof, |
| + 'stacktrace': do_stacktrace, |
| + } |
| + |
| + # TODO(dmikurube): Remove this message after a while. |
| + if len(sys.argv) >= 2 and sys.argv[1].startswith('--'): |
| + sys.stderr.write(""" |
| +**************** NOTICE!! **************** |
| + The command line format has changed. |
| + Please look at the description below. |
| +****************************************** |
| + |
| +""") |
| + |
| + if len(sys.argv) < 2 or (not sys.argv[1] in COMMANDS): |
| + sys.stderr.write("""Usage: %s <command> [options] [<args>] |
| + |
| +Commands: |
| + csv Classify memory usage in CSV |
| + expand Show all stacktraces contained in the specified component |
| + json Classify memory usage in JSON |
| + list Classify memory usage in simple listing format |
| + pprof Format the profile dump so that it can be processed by pprof |
| + stacktrace Convert runtime addresses to symbol names |
| + |
| +Quick Reference: |
| + dmprof csv [-p POLICY] <first-dump> |
| + dmprof expand <dump> <policy> <component> <depth> |
| + dmprof json [-p POLICY] <first-dump> |
| + dmprof list [-p POLICY] <first-dump> |
| + dmprof pprof [-c COMPONENT] <dump> <policy> |
| + dmprof stacktrace <dump> |
| +""" % (sys.argv[0])) |
| + sys.exit(1) |
| + action = sys.argv.pop(1) |
| + |
| + return COMMANDS[action](sys.argv) |
| if __name__ == '__main__': |