Index: tools/clang/blink_gc_plugin/process-graph.py |
diff --git a/tools/clang/blink_gc_plugin/process-graph.py b/tools/clang/blink_gc_plugin/process-graph.py |
new file mode 100755 |
index 0000000000000000000000000000000000000000..b4fb1e64116c96e2b817cadf04bc33b1a9158c68 |
--- /dev/null |
+++ b/tools/clang/blink_gc_plugin/process-graph.py |
@@ -0,0 +1,464 @@ |
+#!/usr/bin/env python |
+# Copyright 2014 The Chromium Authors. All rights reserved. |
+# Use of this source code is governed by a BSD-style license that can be |
+# found in the LICENSE file. |
+ |
+import argparse, os, sys, json, subprocess, pickle, StringIO |
+ |
+parser = argparse.ArgumentParser( |
+ description = |
+ "Process the Blink points-to graph generated by the Blink GC plugin.") |
+ |
+parser.add_argument( |
+ '-', dest='use_stdin', action='store_true', |
+ help='Read JSON graph files from stdin') |
+ |
+parser.add_argument( |
+ '-c', '--detect-cycles', action='store_true', |
+ help='Detect cycles containing GC roots') |
+ |
+parser.add_argument( |
+ '-s', '--print-stats', action='store_true', |
+ help='Statistics about ref-counted and traced objects') |
+ |
+parser.add_argument( |
+ '-v', '--verbose', action='store_true', |
+ help='Verbose output') |
+ |
+parser.add_argument( |
+ '--ignore-cycles', default=None, metavar='FILE', |
+ help='File with cycles to ignore') |
+ |
+parser.add_argument( |
+ '--ignore-classes', nargs='*', default=[], metavar='CLASS', |
+ help='Classes to ignore when detecting cycles') |
+ |
+parser.add_argument( |
+ '--pickle-graph', default=None, metavar='FILE', |
+ help='File to read/save the graph from/to') |
+ |
+parser.add_argument( |
+ 'files', metavar='FILE_OR_DIR', nargs='*', default=[], |
+ help='JSON graph files or directories containing them') |
+ |
+# Command line args after parsing. |
+args = None |
+ |
+# Map from node labels to nodes. |
+graph = {} |
+ |
+# Set of root nodes. |
+roots = [] |
+ |
+# List of cycles to ignore. |
+ignored_cycles = [] |
+ |
+# Global flag to determine exit code. |
+global_reported_error = False |
+ |
+def set_reported_error(value): |
+ global global_reported_error |
+ global_reported_error = value |
+ |
+def reported_error(): |
+ return global_reported_error |
+ |
+def log(msg): |
+ if args.verbose: |
+ print msg |
+ |
+global_inc_copy = 0 |
+def inc_copy(): |
+ global global_inc_copy |
+ global_inc_copy += 1 |
+ |
+def get_node(name): |
+ return graph.setdefault(name, Node(name)) |
+ |
+ptr_types = ('raw', 'ref', 'mem') |
+ |
+def inc_ptr(dst, ptr): |
+ if ptr in ptr_types: |
+ node = graph.get(dst) |
+ if not node: return |
+ node.counts[ptr] += 1 |
+ |
+def add_counts(s1, s2): |
+ for (k, v) in s2.iteritems(): |
+ s1[k] += s2[k] |
+ |
+# Representation of graph nodes. Basically a map of directed edges. |
+class Node: |
+ def __init__(self, name): |
+ self.name = name |
+ self.edges = {} |
+ self.reset() |
+ def __repr__(self): |
+ return "%s(%s) %s" % (self.name, self.visited, self.edges) |
+ def update_node(self, decl): |
+ # Currently we don't track any node info besides its edges. |
+ pass |
+ def update_edge(self, e): |
+ new_edge = Edge(**e) |
+ edge = self.edges.get(new_edge.key) |
+ if edge: |
+ # If an edge exist, its kind is the strongest of the two. |
+ edge.kind = max(edge.kind, new_edge.kind) |
+ else: |
+ self.edges[new_edge.key] = new_edge |
+ def super_edges(self): |
+ return [ e for e in self.edges.itervalues() if e.is_super() ] |
+ def subclass_edges(self): |
+ return [ e for e in self.edges.itervalues() if e.is_subclass() ] |
+ def reset(self): |
+ self.cost = sys.maxint |
+ self.visited = False |
+ self.path = None |
+ self.counts = {} |
+ for ptr in ptr_types: |
+ self.counts[ptr] = 0 |
+ def update_counts(self): |
+ for e in self.edges.itervalues(): |
+ inc_ptr(e.dst, e.ptr) |
+ |
+# Representation of directed graph edges. |
+class Edge: |
+ def __init__(self, **decl): |
+ self.src = decl['src'] |
+ self.dst = decl['dst'] |
+ self.lbl = decl['lbl'] |
+ self.ptr = decl['ptr'] |
+ self.kind = decl['kind'] # 0 = weak, 1 = strong, 2 = root |
+ self.loc = decl['loc'] |
+ # The label does not uniquely determine an edge from a node. We |
+ # define the semi-unique key to be the concatenation of the |
+ # label and dst name. This is sufficient to track the strongest |
+ # edge to a particular type. For example, if the field A::m_f |
+ # has type HashMap<WeakMember<B>, Member<B>> we will have a |
+ # strong edge with key m_f#B from A to B. |
+ self.key = '%s#%s' % (self.lbl, self.dst) |
+ def __repr__(self): |
+ return '%s (%s) => %s' % (self.src, self.lbl, self.dst) |
+ def is_root(self): |
+ return self.kind == 2 |
+ def is_weak(self): |
+ return self.kind == 0 |
+ def keeps_alive(self): |
+ return self.kind > 0 |
+ def is_subclass(self): |
+ return self.lbl.startswith('<subclass>') |
+ def is_super(self): |
+ return self.lbl.startswith('<super>') |
+ |
+def parse_file(filename): |
+ obj = json.load(open(filename)) |
+ return obj |
+ |
+def build_graphs_in_dir(dirname): |
+ # TODO: Use plateform independent code, eg, os.walk |
+ files = subprocess.check_output( |
+ ['find', dirname, '-name', '*.graph.json']).split('\n') |
+ log("Found %d files" % len(files)) |
+ for f in files: |
+ f.strip() |
+ if len(f) < 1: |
+ continue |
+ build_graph(f) |
+ |
+def build_graph(filename): |
+ for decl in parse_file(filename): |
+ if decl.has_key('name'): |
+ # Add/update a node entry |
+ name = decl['name'] |
+ node = get_node(name) |
+ node.update_node(decl) |
+ else: |
+ # Add/update an edge entry |
+ name = decl['src'] |
+ node = get_node(name) |
+ node.update_edge(decl) |
+ |
+# Copy all non-weak edges from super classes to their subclasses. |
+# This causes all fields of a super to be considered fields of a |
+# derived class without tranitively relating derived classes with |
+# each other. For example, if B <: A, C <: A, and for some D, D => B, |
+# we don't want that to entail that D => C. |
+def copy_super_edges(edge): |
+ if edge.is_weak() or not edge.is_super(): |
+ return |
+ inc_copy() |
+ # Make the super-class edge weak (prohibits processing twice). |
+ edge.kind = 0 |
+ # If the super class is not in our graph exit early. |
+ super_node = graph.get(edge.dst) |
+ if super_node is None: return |
+ # Recursively copy all super-class edges. |
+ for e in super_node.super_edges(): |
+ copy_super_edges(e) |
+ # Copy strong super-class edges (ignoring sub-class edges) to the sub class. |
+ sub_node = graph[edge.src] |
+ for e in super_node.edges.itervalues(): |
+ if e.keeps_alive() and not e.is_subclass(): |
+ new_edge = Edge( |
+ src = sub_node.name, |
+ dst = e.dst, |
+ lbl = '%s <: %s' % (super_node.name, e.lbl), |
+ ptr = e.ptr, |
+ kind = e.kind, |
+ loc = e.loc, |
+ ) |
+ sub_node.edges[new_edge.key] = new_edge |
+ # Add a strong sub-class edge. |
+ sub_edge = Edge( |
+ src = super_node.name, |
+ dst = sub_node.name, |
+ lbl = '<subclass>', |
+ ptr = edge.ptr, |
+ kind = 1, |
+ loc = edge.loc, |
+ ) |
+ super_node.edges[sub_edge.key] = sub_edge |
+ |
+def complete_graph(): |
+ for node in graph.itervalues(): |
+ for edge in node.super_edges(): |
+ copy_super_edges(edge) |
+ for edge in node.edges.itervalues(): |
+ if edge.is_root(): |
+ roots.append(edge) |
+ log("Copied edges down <super> edges for %d graph nodes" % global_inc_copy) |
+ |
+def reset_graph(): |
+ for n in graph.itervalues(): |
+ n.reset() |
+ |
+def shortest_path(start, end): |
+ start.cost = 0 |
+ minlist = [start] |
+ while len(minlist) > 0: |
+ minlist.sort(key=lambda n: -n.cost) |
+ current = minlist.pop() |
+ current.visited = True |
+ if current == end or current.cost >= end.cost + 1: |
+ return |
+ for e in current.edges.itervalues(): |
+ if not e.keeps_alive(): |
+ continue |
+ dst = graph.get(e.dst) |
+ if dst is None or dst.visited: |
+ continue |
+ if current.cost < dst.cost: |
+ dst.cost = current.cost + 1 |
+ dst.path = e |
+ minlist.append(dst) |
+ |
+def detect_cycles(): |
+ for root_edge in roots: |
+ reset_graph() |
+ # Mark ignored classes as already visited |
+ for ignore in args.ignore_classes: |
+ name = ignore.find("::") > 0 and ignore or ("blink::" + ignore) |
+ node = graph.get(name) |
+ if node: |
+ node.visited = True |
+ src = graph[root_edge.src] |
+ dst = graph.get(root_edge.dst) |
+ if src.visited: |
+ continue |
+ if root_edge.dst == "WTF::String": |
+ continue |
+ if dst is None: |
+ print "\nPersistent root to incomplete destination object:" |
+ print root_edge |
+ set_reported_error(True) |
+ continue |
+ # Find the shortest path from the root target (dst) to its host (src) |
+ shortest_path(dst, src) |
+ if src.cost < sys.maxint: |
+ report_cycle(root_edge) |
+ |
+def is_ignored_cycle(cycle): |
+ for block in ignored_cycles: |
+ if block_match(cycle, block): |
+ return True |
+ |
+def block_match(b1, b2): |
+ if len(b1) != len(b2): |
+ return False |
+ for (l1, l2) in zip(b1, b2): |
+ if l1 != l2: |
+ return False |
+ return True |
+ |
+def report_cycle(root_edge): |
+ dst = graph[root_edge.dst] |
+ path = [] |
+ edge = root_edge |
+ dst.path = None |
+ while edge: |
+ path.append(edge) |
+ edge = graph[edge.src].path |
+ path.append(root_edge) |
+ path.reverse() |
+ # Find the max loc length for pretty printing. |
+ max_loc = 0 |
+ for p in path: |
+ if len(p.loc) > max_loc: |
+ max_loc = len(p.loc) |
+ out = StringIO.StringIO() |
+ for p in path[:-1]: |
+ print >>out, (p.loc + ':').ljust(max_loc + 1), p |
+ sout = out.getvalue() |
+ if not is_ignored_cycle(sout): |
+ print "\nFound a potentially leaking cycle starting from a GC root:\n", sout |
+ set_reported_error(True) |
+ |
+def load_graph(): |
+ global graph |
+ global roots |
+ log("Reading graph from pickled file: " + args.pickle_graph) |
+ dump = pickle.load(open(args.pickle_graph, 'rb')) |
+ graph = dump[0] |
+ roots = dump[1] |
+ |
+def save_graph(): |
+ log("Saving graph to pickle file: " + args.pickle_graph) |
+ dump = (graph, roots) |
+ pickle.dump(dump, open(args.pickle_graph, 'wb')) |
+ |
+def read_ignored_cycles(): |
+ global ignored_cycles |
+ if not args.ignore_cycles: |
+ return |
+ log("Reading ignored cycles from file: " + args.ignore_cycles) |
+ block = [] |
+ for l in open(args.ignore_cycles): |
+ line = l.strip() |
+ if not line or line.startswith('Found'): |
+ if len(block) > 0: |
+ ignored_cycles.append(block) |
+ block = [] |
+ else: |
+ block += l |
+ if len(block) > 0: |
+ ignored_cycles.append(block) |
+ |
+gc_bases = ( |
+ 'blink::GarbageCollected', |
+ 'blink::GarbageCollectedFinalized', |
+ 'blink::GarbageCollectedMixin', |
+) |
+ref_bases = ( |
+ 'WTF::RefCounted', |
+ 'WTF::ThreadSafeRefCounted', |
+) |
+gcref_bases = ( |
+ 'blink::RefCountedGarbageCollected', |
+ 'blink::ThreadSafeRefCountedGarbageCollected', |
+) |
+ref_mixins = ( |
+ 'blink::EventTarget', |
+ 'blink::EventTargetWithInlineData', |
+ 'blink::ActiveDOMObject', |
+) |
+ |
+def print_stats(): |
+ gcref_managed = [] |
+ ref_managed = [] |
+ gc_managed = [] |
+ hierarchies = [] |
+ |
+ for node in graph.itervalues(): |
+ node.update_counts() |
+ for sup in node.super_edges(): |
+ if sup.dst in gcref_bases: |
+ gcref_managed.append(node) |
+ elif sup.dst in ref_bases: |
+ ref_managed.append(node) |
+ elif sup.dst in gc_bases: |
+ gc_managed.append(node) |
+ |
+ groups = [("GC manged ", gc_managed), |
+ ("ref counted ", ref_managed), |
+ ("in transition", gcref_managed)] |
+ total = sum([len(g) for (s,g) in groups]) |
+ for (s, g) in groups: |
+ percent = len(g) * 100 / total |
+ print "%2d%% is %s (%d hierarchies)" % (percent, s, len(g)) |
+ |
+ for base in gcref_managed: |
+ stats = dict({ 'classes': 0, 'ref-mixins': 0 }) |
+ for ptr in ptr_types: stats[ptr] = 0 |
+ hierarchy_stats(base, stats) |
+ hierarchies.append((base, stats)) |
+ |
+ print "\nHierarchies in transition (RefCountedGarbageCollected):" |
+ hierarchies.sort(key=lambda (n,s): -s['classes']) |
+ for (node, stats) in hierarchies: |
+ total = stats['mem'] + stats['ref'] + stats['raw'] |
+ print ( |
+ "%s %3d%% of %-30s: %3d cls, %3d mem, %3d ref, %3d raw, %3d ref-mixins" % |
+ (stats['ref'] == 0 and stats['ref-mixins'] == 0 and "*" or " ", |
+ total == 0 and 100 or stats['mem'] * 100 / total, |
+ node.name.replace('blink::', ''), |
+ stats['classes'], |
+ stats['mem'], |
+ stats['ref'], |
+ stats['raw'], |
+ stats['ref-mixins'], |
+ )) |
+ |
+def hierarchy_stats(node, stats): |
+ if not node: return |
+ stats['classes'] += 1 |
+ add_counts(stats, node.counts) |
+ for edge in node.super_edges(): |
+ if edge.dst in ref_mixins: |
+ stats['ref-mixins'] += 1 |
+ for edge in node.subclass_edges(): |
+ hierarchy_stats(graph.get(edge.dst), stats) |
+ |
+def main(): |
+ global args |
+ args = parser.parse_args() |
+ if not (args.detect_cycles or args.print_stats): |
+ print "Please select an operation to perform (eg, -c to detect cycles)" |
+ parser.print_help() |
+ return 1 |
+ if args.pickle_graph and os.path.isfile(args.pickle_graph): |
+ load_graph() |
+ else: |
+ if args.use_stdin: |
+ log("Reading files from stdin") |
+ for f in sys.stdin: |
+ build_graph(f.strip()) |
+ else: |
+ log("Reading files and directories from command line") |
+ if len(args.files) == 0: |
+ print "Please provide files or directores for building the graph" |
+ parser.print_help() |
+ return 1 |
+ for f in args.files: |
+ if os.path.isdir(f): |
+ log("Building graph from files in directory: " + f) |
+ build_graphs_in_dir(f) |
+ else: |
+ log("Building graph from file: " + f) |
+ build_graph(f) |
+ log("Completing graph construction (%d graph nodes)" % len(graph)) |
+ complete_graph() |
+ if args.pickle_graph: |
+ save_graph() |
+ if args.detect_cycles: |
+ read_ignored_cycles() |
+ log("Detecting cycles containg GC roots") |
+ detect_cycles() |
+ if args.print_stats: |
+ log("Printing statistics") |
+ print_stats() |
+ if reported_error(): |
+ return 1 |
+ return 0 |
+ |
+if __name__ == '__main__': |
+ sys.exit(main()) |