tools/clang/blink_gc_plugin/process-graph.py - Issue 1385193002: Bisect clang

Unified Diff: tools/clang/blink_gc_plugin/process-graph.py

Issue 1385193002: Bisect clang Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: 246985 Created 5 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: tools/clang/blink_gc_plugin/process-graph.py

diff --git a/tools/clang/blink_gc_plugin/process-graph.py b/tools/clang/blink_gc_plugin/process-graph.py

new file mode 100755

index 0000000000000000000000000000000000000000..b4fb1e64116c96e2b817cadf04bc33b1a9158c68

--- /dev/null

+++ b/tools/clang/blink_gc_plugin/process-graph.py

@@ -0,0 +1,464 @@

+#!/usr/bin/env python

+# Use of this source code is governed by a BSD-style license that can be

+# found in the LICENSE file.

+import argparse, os, sys, json, subprocess, pickle, StringIO

+parser = argparse.ArgumentParser(

+ description =

+ "Process the Blink points-to graph generated by the Blink GC plugin.")

+parser.add_argument(

+ '-', dest='use_stdin', action='store_true',

+ help='Read JSON graph files from stdin')

+parser.add_argument(

+ '-c', '--detect-cycles', action='store_true',

+ help='Detect cycles containing GC roots')

+parser.add_argument(

+ '-s', '--print-stats', action='store_true',

+ help='Statistics about ref-counted and traced objects')

+parser.add_argument(

+ '-v', '--verbose', action='store_true',

+ help='Verbose output')

+parser.add_argument(

+ '--ignore-cycles', default=None, metavar='FILE',

+ help='File with cycles to ignore')

+parser.add_argument(

+ '--ignore-classes', nargs='*', default=[], metavar='CLASS',

+ help='Classes to ignore when detecting cycles')

+parser.add_argument(

+ '--pickle-graph', default=None, metavar='FILE',

+ help='File to read/save the graph from/to')

+parser.add_argument(

+ 'files', metavar='FILE_OR_DIR', nargs='*', default=[],

+ help='JSON graph files or directories containing them')

+# Command line args after parsing.

+args = None

+# Map from node labels to nodes.

+graph = {}

+# Set of root nodes.

+roots = []

+# List of cycles to ignore.

+ignored_cycles = []

+# Global flag to determine exit code.

+global_reported_error = False

+def set_reported_error(value):

+ global global_reported_error

+ global_reported_error = value

+def reported_error():

+ return global_reported_error

+def log(msg):

+ if args.verbose:

+ print msg

+global_inc_copy = 0

+def inc_copy():

+ global global_inc_copy

+ global_inc_copy += 1

+def get_node(name):

+ return graph.setdefault(name, Node(name))

+ptr_types = ('raw', 'ref', 'mem')

+def inc_ptr(dst, ptr):

+ if ptr in ptr_types:

+ node = graph.get(dst)

+ if not node: return

+ node.counts[ptr] += 1

+def add_counts(s1, s2):

+ for (k, v) in s2.iteritems():

+ s1[k] += s2[k]

+# Representation of graph nodes. Basically a map of directed edges.

+class Node:

+ def __init__(self, name):

+ self.name = name

+ self.edges = {}

+ self.reset()

+ def __repr__(self):

+ return "%s(%s) %s" % (self.name, self.visited, self.edges)

+ def update_node(self, decl):

+ # Currently we don't track any node info besides its edges.

+ pass

+ def update_edge(self, e):

+ new_edge = Edge(**e)

+ edge = self.edges.get(new_edge.key)

+ if edge:

+ # If an edge exist, its kind is the strongest of the two.

+ edge.kind = max(edge.kind, new_edge.kind)

+ else:

+ self.edges[new_edge.key] = new_edge

+ def super_edges(self):

+ return [ e for e in self.edges.itervalues() if e.is_super() ]

+ def subclass_edges(self):

+ return [ e for e in self.edges.itervalues() if e.is_subclass() ]

+ def reset(self):

+ self.cost = sys.maxint

+ self.visited = False

+ self.path = None

+ self.counts = {}

+ for ptr in ptr_types:

+ self.counts[ptr] = 0

+ def update_counts(self):

+ for e in self.edges.itervalues():

+ inc_ptr(e.dst, e.ptr)

+# Representation of directed graph edges.

+class Edge:

+ def __init__(self, **decl):

+ self.src = decl['src']

+ self.dst = decl['dst']

+ self.lbl = decl['lbl']

+ self.ptr = decl['ptr']

+ self.kind = decl['kind'] # 0 = weak, 1 = strong, 2 = root

+ self.loc = decl['loc']

+ # The label does not uniquely determine an edge from a node. We

+ # define the semi-unique key to be the concatenation of the

+ # label and dst name. This is sufficient to track the strongest

+ # edge to a particular type. For example, if the field A::m_f

+ # has type HashMap<WeakMember<B>, Member<B>> we will have a

+ # strong edge with key m_f#B from A to B.

+ self.key = '%s#%s' % (self.lbl, self.dst)

+ def __repr__(self):

+ return '%s (%s) => %s' % (self.src, self.lbl, self.dst)

+ def is_root(self):

+ return self.kind == 2

+ def is_weak(self):

+ return self.kind == 0

+ def keeps_alive(self):

+ return self.kind > 0

+ def is_subclass(self):

+ return self.lbl.startswith('<subclass>')

+ def is_super(self):

+ return self.lbl.startswith('<super>')

+def parse_file(filename):

+ obj = json.load(open(filename))

+ return obj

+def build_graphs_in_dir(dirname):

+ # TODO: Use plateform independent code, eg, os.walk

+ files = subprocess.check_output(

+ ['find', dirname, '-name', '*.graph.json']).split('\n')

+ log("Found %d files" % len(files))

+ for f in files:

+ f.strip()

+ if len(f) < 1:

+ continue

+ build_graph(f)

+def build_graph(filename):

+ for decl in parse_file(filename):

+ if decl.has_key('name'):

+ # Add/update a node entry

+ name = decl['name']

+ node = get_node(name)

+ node.update_node(decl)

+ else:

+ # Add/update an edge entry

+ name = decl['src']

+ node = get_node(name)

+ node.update_edge(decl)

+# Copy all non-weak edges from super classes to their subclasses.

+# This causes all fields of a super to be considered fields of a

+# derived class without tranitively relating derived classes with

+# each other. For example, if B <: A, C <: A, and for some D, D => B,

+# we don't want that to entail that D => C.

+def copy_super_edges(edge):

+ if edge.is_weak() or not edge.is_super():

+ return

+ inc_copy()

+ # Make the super-class edge weak (prohibits processing twice).

+ edge.kind = 0

+ # If the super class is not in our graph exit early.

+ super_node = graph.get(edge.dst)

+ if super_node is None: return

+ # Recursively copy all super-class edges.

+ for e in super_node.super_edges():

+ copy_super_edges(e)

+ # Copy strong super-class edges (ignoring sub-class edges) to the sub class.

+ sub_node = graph[edge.src]

+ for e in super_node.edges.itervalues():

+ if e.keeps_alive() and not e.is_subclass():

+ new_edge = Edge(

+ src = sub_node.name,

+ dst = e.dst,

+ lbl = '%s <: %s' % (super_node.name, e.lbl),

+ ptr = e.ptr,

+ kind = e.kind,

+ loc = e.loc,

+ )

+ sub_node.edges[new_edge.key] = new_edge

+ # Add a strong sub-class edge.

+ sub_edge = Edge(

+ src = super_node.name,

+ dst = sub_node.name,

+ lbl = '<subclass>',

+ ptr = edge.ptr,

+ kind = 1,

+ loc = edge.loc,

+ )

+ super_node.edges[sub_edge.key] = sub_edge

+def complete_graph():

+ for node in graph.itervalues():

+ for edge in node.super_edges():

+ copy_super_edges(edge)

+ for edge in node.edges.itervalues():

+ if edge.is_root():

+ roots.append(edge)

+ log("Copied edges down <super> edges for %d graph nodes" % global_inc_copy)

+def reset_graph():

+ for n in graph.itervalues():

+ n.reset()

+def shortest_path(start, end):

+ start.cost = 0

+ minlist = [start]

+ while len(minlist) > 0:

+ minlist.sort(key=lambda n: -n.cost)

+ current = minlist.pop()

+ current.visited = True

+ if current == end or current.cost >= end.cost + 1:

+ return

+ for e in current.edges.itervalues():

+ if not e.keeps_alive():

+ continue

+ dst = graph.get(e.dst)

+ if dst is None or dst.visited:

+ continue

+ if current.cost < dst.cost:

+ dst.cost = current.cost + 1

+ dst.path = e

+ minlist.append(dst)

+def detect_cycles():

+ for root_edge in roots:

+ reset_graph()

+ # Mark ignored classes as already visited

+ for ignore in args.ignore_classes:

+ name = ignore.find("::") > 0 and ignore or ("blink::" + ignore)

+ node = graph.get(name)

+ if node:

+ node.visited = True

+ src = graph[root_edge.src]

+ dst = graph.get(root_edge.dst)

+ if src.visited:

+ continue

+ if root_edge.dst == "WTF::String":

+ continue

+ if dst is None:

+ print "\nPersistent root to incomplete destination object:"

+ print root_edge

+ set_reported_error(True)

+ continue

+ # Find the shortest path from the root target (dst) to its host (src)

+ shortest_path(dst, src)

+ if src.cost < sys.maxint:

+ report_cycle(root_edge)

+def is_ignored_cycle(cycle):

+ for block in ignored_cycles:

+ if block_match(cycle, block):

+ return True

+def block_match(b1, b2):

+ if len(b1) != len(b2):

+ return False

+ for (l1, l2) in zip(b1, b2):

+ if l1 != l2:

+ return False

+ return True

+def report_cycle(root_edge):

+ dst = graph[root_edge.dst]

+ path = []

+ edge = root_edge

+ dst.path = None

+ while edge:

+ path.append(edge)

+ edge = graph[edge.src].path

+ path.append(root_edge)

+ path.reverse()

+ # Find the max loc length for pretty printing.

+ max_loc = 0

+ for p in path:

+ if len(p.loc) > max_loc:

+ max_loc = len(p.loc)

+ out = StringIO.StringIO()

+ for p in path[:-1]:

+ print >>out, (p.loc + ':').ljust(max_loc + 1), p

+ sout = out.getvalue()

+ if not is_ignored_cycle(sout):

+ print "\nFound a potentially leaking cycle starting from a GC root:\n", sout

+ set_reported_error(True)

+def load_graph():

+ global graph

+ global roots

+ log("Reading graph from pickled file: " + args.pickle_graph)

+ dump = pickle.load(open(args.pickle_graph, 'rb'))

+ graph = dump[0]

+ roots = dump[1]

+def save_graph():

+ log("Saving graph to pickle file: " + args.pickle_graph)

+ dump = (graph, roots)

+ pickle.dump(dump, open(args.pickle_graph, 'wb'))

+def read_ignored_cycles():

+ global ignored_cycles

+ if not args.ignore_cycles:

+ return

+ log("Reading ignored cycles from file: " + args.ignore_cycles)

+ block = []

+ for l in open(args.ignore_cycles):

+ line = l.strip()

+ if not line or line.startswith('Found'):

+ if len(block) > 0:

+ ignored_cycles.append(block)

+ block = []

+ else:

+ block += l

+ if len(block) > 0:

+ ignored_cycles.append(block)

+gc_bases = (

+ 'blink::GarbageCollected',

+ 'blink::GarbageCollectedFinalized',

+ 'blink::GarbageCollectedMixin',

+ref_bases = (

+ 'WTF::RefCounted',

+ 'WTF::ThreadSafeRefCounted',

+gcref_bases = (

+ 'blink::RefCountedGarbageCollected',

+ 'blink::ThreadSafeRefCountedGarbageCollected',

+ref_mixins = (

+ 'blink::EventTarget',

+ 'blink::EventTargetWithInlineData',

+ 'blink::ActiveDOMObject',

+def print_stats():

+ gcref_managed = []

+ ref_managed = []

+ gc_managed = []

+ hierarchies = []

+ for node in graph.itervalues():

+ node.update_counts()

+ for sup in node.super_edges():

+ if sup.dst in gcref_bases:

+ gcref_managed.append(node)

+ elif sup.dst in ref_bases:

+ ref_managed.append(node)

+ elif sup.dst in gc_bases:

+ gc_managed.append(node)

+ groups = [("GC manged ", gc_managed),

+ ("ref counted ", ref_managed),

+ ("in transition", gcref_managed)]

+ total = sum([len(g) for (s,g) in groups])

+ for (s, g) in groups:

+ percent = len(g) * 100 / total

+ print "%2d%% is %s (%d hierarchies)" % (percent, s, len(g))

+ for base in gcref_managed:

+ stats = dict({ 'classes': 0, 'ref-mixins': 0 })

+ for ptr in ptr_types: stats[ptr] = 0

+ hierarchy_stats(base, stats)

+ hierarchies.append((base, stats))

+ print "\nHierarchies in transition (RefCountedGarbageCollected):"

+ hierarchies.sort(key=lambda (n,s): -s['classes'])

+ for (node, stats) in hierarchies:

+ total = stats['mem'] + stats['ref'] + stats['raw']

+ print (

+ "%s %3d%% of %-30s: %3d cls, %3d mem, %3d ref, %3d raw, %3d ref-mixins" %

+ (stats['ref'] == 0 and stats['ref-mixins'] == 0 and "*" or " ",

+ total == 0 and 100 or stats['mem'] * 100 / total,

+ node.name.replace('blink::', ''),

+ stats['classes'],

+ stats['mem'],

+ stats['ref'],

+ stats['raw'],

+ stats['ref-mixins'],

+ ))

+def hierarchy_stats(node, stats):

+ if not node: return

+ stats['classes'] += 1

+ add_counts(stats, node.counts)

+ for edge in node.super_edges():

+ if edge.dst in ref_mixins:

+ stats['ref-mixins'] += 1

+ for edge in node.subclass_edges():

+ hierarchy_stats(graph.get(edge.dst), stats)

+def main():

+ global args

+ args = parser.parse_args()

+ if not (args.detect_cycles or args.print_stats):

+ print "Please select an operation to perform (eg, -c to detect cycles)"

+ parser.print_help()

+ return 1

+ if args.pickle_graph and os.path.isfile(args.pickle_graph):

+ load_graph()

+ else:

+ if args.use_stdin:

+ log("Reading files from stdin")

+ for f in sys.stdin:

+ build_graph(f.strip())

+ else:

+ log("Reading files and directories from command line")

+ if len(args.files) == 0:

+ print "Please provide files or directores for building the graph"

+ parser.print_help()

+ return 1

+ for f in args.files:

+ if os.path.isdir(f):

+ log("Building graph from files in directory: " + f)

+ build_graphs_in_dir(f)

+ else:

+ log("Building graph from file: " + f)

+ build_graph(f)

+ log("Completing graph construction (%d graph nodes)" % len(graph))

+ complete_graph()

+ if args.pickle_graph:

+ save_graph()

+ if args.detect_cycles:

+ read_ignored_cycles()

+ log("Detecting cycles containg GC roots")

+ detect_cycles()

+ if args.print_stats:

+ log("Printing statistics")

+ print_stats()

+ if reported_error():

+ return 1

+ return 0

+if __name__ == '__main__':

+ sys.exit(main())

« no previous file with comments | « tools/clang/blink_gc_plugin/TracingStatus.h ('k') | tools/clang/blink_gc_plugin/tests/.gitignore » ('j') | no next file with comments »