| Index: tools/clang/blink_gc_plugin/process-graph.py
 | 
| diff --git a/tools/clang/blink_gc_plugin/process-graph.py b/tools/clang/blink_gc_plugin/process-graph.py
 | 
| new file mode 100755
 | 
| index 0000000000000000000000000000000000000000..b4fb1e64116c96e2b817cadf04bc33b1a9158c68
 | 
| --- /dev/null
 | 
| +++ b/tools/clang/blink_gc_plugin/process-graph.py
 | 
| @@ -0,0 +1,464 @@
 | 
| +#!/usr/bin/env python
 | 
| +# Copyright 2014 The Chromium Authors. All rights reserved.
 | 
| +# Use of this source code is governed by a BSD-style license that can be
 | 
| +# found in the LICENSE file.
 | 
| +
 | 
| +import argparse, os, sys, json, subprocess, pickle, StringIO
 | 
| +
 | 
| +parser = argparse.ArgumentParser(
 | 
| +  description =
 | 
| +    "Process the Blink points-to graph generated by the Blink GC plugin.")
 | 
| +
 | 
| +parser.add_argument(
 | 
| +  '-', dest='use_stdin', action='store_true',
 | 
| +  help='Read JSON graph files from stdin')
 | 
| +
 | 
| +parser.add_argument(
 | 
| +  '-c', '--detect-cycles', action='store_true',
 | 
| +  help='Detect cycles containing GC roots')
 | 
| +
 | 
| +parser.add_argument(
 | 
| +  '-s', '--print-stats', action='store_true',
 | 
| +  help='Statistics about ref-counted and traced objects')
 | 
| +
 | 
| +parser.add_argument(
 | 
| +  '-v', '--verbose', action='store_true',
 | 
| +  help='Verbose output')
 | 
| +
 | 
| +parser.add_argument(
 | 
| +  '--ignore-cycles', default=None, metavar='FILE',
 | 
| +  help='File with cycles to ignore')
 | 
| +
 | 
| +parser.add_argument(
 | 
| +  '--ignore-classes', nargs='*', default=[], metavar='CLASS',
 | 
| +  help='Classes to ignore when detecting cycles')
 | 
| +
 | 
| +parser.add_argument(
 | 
| +  '--pickle-graph', default=None, metavar='FILE',
 | 
| +  help='File to read/save the graph from/to')
 | 
| +
 | 
| +parser.add_argument(
 | 
| +  'files', metavar='FILE_OR_DIR', nargs='*', default=[],
 | 
| +  help='JSON graph files or directories containing them')
 | 
| +
 | 
| +# Command line args after parsing.
 | 
| +args = None
 | 
| +
 | 
| +# Map from node labels to nodes.
 | 
| +graph = {}
 | 
| +
 | 
| +# Set of root nodes.
 | 
| +roots = []
 | 
| +
 | 
| +# List of cycles to ignore.
 | 
| +ignored_cycles = []
 | 
| +
 | 
| +# Global flag to determine exit code.
 | 
| +global_reported_error = False
 | 
| +
 | 
| +def set_reported_error(value):
 | 
| +  global global_reported_error
 | 
| +  global_reported_error = value
 | 
| +
 | 
| +def reported_error():
 | 
| +  return global_reported_error
 | 
| +
 | 
| +def log(msg):
 | 
| +  if args.verbose:
 | 
| +    print msg
 | 
| +
 | 
| +global_inc_copy = 0
 | 
| +def inc_copy():
 | 
| +  global global_inc_copy
 | 
| +  global_inc_copy += 1
 | 
| +
 | 
| +def get_node(name):
 | 
| +  return graph.setdefault(name, Node(name))
 | 
| +
 | 
| +ptr_types = ('raw', 'ref', 'mem')
 | 
| +
 | 
| +def inc_ptr(dst, ptr):
 | 
| +  if ptr in ptr_types:
 | 
| +    node = graph.get(dst)
 | 
| +    if not node: return
 | 
| +    node.counts[ptr] += 1
 | 
| +
 | 
| +def add_counts(s1, s2):
 | 
| +  for (k, v) in s2.iteritems():
 | 
| +    s1[k] += s2[k]
 | 
| +
 | 
| +# Representation of graph nodes. Basically a map of directed edges.
 | 
| +class Node:
 | 
| +  def __init__(self, name):
 | 
| +    self.name = name
 | 
| +    self.edges = {}
 | 
| +    self.reset()
 | 
| +  def __repr__(self):
 | 
| +    return "%s(%s) %s" % (self.name, self.visited, self.edges)
 | 
| +  def update_node(self, decl):
 | 
| +    # Currently we don't track any node info besides its edges.
 | 
| +    pass
 | 
| +  def update_edge(self, e):
 | 
| +    new_edge = Edge(**e)
 | 
| +    edge = self.edges.get(new_edge.key)
 | 
| +    if edge:
 | 
| +      # If an edge exist, its kind is the strongest of the two.
 | 
| +      edge.kind = max(edge.kind, new_edge.kind)
 | 
| +    else:
 | 
| +      self.edges[new_edge.key] = new_edge
 | 
| +  def super_edges(self):
 | 
| +    return [ e for e in self.edges.itervalues() if e.is_super() ]
 | 
| +  def subclass_edges(self):
 | 
| +    return [ e for e in self.edges.itervalues() if e.is_subclass() ]
 | 
| +  def reset(self):
 | 
| +    self.cost = sys.maxint
 | 
| +    self.visited = False
 | 
| +    self.path = None
 | 
| +    self.counts = {}
 | 
| +    for ptr in ptr_types:
 | 
| +      self.counts[ptr] = 0
 | 
| +  def update_counts(self):
 | 
| +    for e in self.edges.itervalues():
 | 
| +      inc_ptr(e.dst, e.ptr)
 | 
| +
 | 
| +# Representation of directed graph edges.
 | 
| +class Edge:
 | 
| +  def __init__(self, **decl):
 | 
| +    self.src = decl['src']
 | 
| +    self.dst = decl['dst']
 | 
| +    self.lbl = decl['lbl']
 | 
| +    self.ptr = decl['ptr']
 | 
| +    self.kind = decl['kind'] # 0 = weak, 1 = strong, 2 = root
 | 
| +    self.loc = decl['loc']
 | 
| +    # The label does not uniquely determine an edge from a node. We
 | 
| +    # define the semi-unique key to be the concatenation of the
 | 
| +    # label and dst name. This is sufficient to track the strongest
 | 
| +    # edge to a particular type. For example, if the field A::m_f
 | 
| +    # has type HashMap<WeakMember<B>, Member<B>> we will have a
 | 
| +    # strong edge with key m_f#B from A to B.
 | 
| +    self.key = '%s#%s' % (self.lbl, self.dst)
 | 
| +  def __repr__(self):
 | 
| +    return '%s (%s) => %s' % (self.src, self.lbl, self.dst)
 | 
| +  def is_root(self):
 | 
| +    return self.kind == 2
 | 
| +  def is_weak(self):
 | 
| +    return self.kind == 0
 | 
| +  def keeps_alive(self):
 | 
| +    return self.kind > 0
 | 
| +  def is_subclass(self):
 | 
| +    return self.lbl.startswith('<subclass>')
 | 
| +  def is_super(self):
 | 
| +    return self.lbl.startswith('<super>')
 | 
| +
 | 
| +def parse_file(filename):
 | 
| +  obj = json.load(open(filename))
 | 
| +  return obj
 | 
| +
 | 
| +def build_graphs_in_dir(dirname):
 | 
| +  # TODO: Use plateform independent code, eg, os.walk
 | 
| +  files = subprocess.check_output(
 | 
| +    ['find', dirname, '-name', '*.graph.json']).split('\n')
 | 
| +  log("Found %d files" % len(files))
 | 
| +  for f in files:
 | 
| +    f.strip()
 | 
| +    if len(f) < 1:
 | 
| +      continue
 | 
| +    build_graph(f)
 | 
| +
 | 
| +def build_graph(filename):
 | 
| +  for decl in parse_file(filename):
 | 
| +    if decl.has_key('name'):
 | 
| +      # Add/update a node entry
 | 
| +      name = decl['name']
 | 
| +      node = get_node(name)
 | 
| +      node.update_node(decl)
 | 
| +    else:
 | 
| +      # Add/update an edge entry
 | 
| +      name = decl['src']
 | 
| +      node = get_node(name)
 | 
| +      node.update_edge(decl)
 | 
| +
 | 
| +# Copy all non-weak edges from super classes to their subclasses.
 | 
| +# This causes all fields of a super to be considered fields of a
 | 
| +# derived class without tranitively relating derived classes with
 | 
| +# each other. For example, if B <: A, C <: A, and for some D, D => B,
 | 
| +# we don't want that to entail that D => C.
 | 
| +def copy_super_edges(edge):
 | 
| +  if edge.is_weak() or not edge.is_super():
 | 
| +    return
 | 
| +  inc_copy()
 | 
| +  # Make the super-class edge weak (prohibits processing twice).
 | 
| +  edge.kind = 0
 | 
| +  # If the super class is not in our graph exit early.
 | 
| +  super_node = graph.get(edge.dst)
 | 
| +  if super_node is None: return
 | 
| +  # Recursively copy all super-class edges.
 | 
| +  for e in super_node.super_edges():
 | 
| +    copy_super_edges(e)
 | 
| +  # Copy strong super-class edges (ignoring sub-class edges) to the sub class.
 | 
| +  sub_node = graph[edge.src]
 | 
| +  for e in super_node.edges.itervalues():
 | 
| +    if e.keeps_alive() and not e.is_subclass():
 | 
| +      new_edge = Edge(
 | 
| +        src = sub_node.name,
 | 
| +        dst = e.dst,
 | 
| +        lbl = '%s <: %s' % (super_node.name, e.lbl),
 | 
| +        ptr = e.ptr,
 | 
| +        kind = e.kind,
 | 
| +        loc = e.loc,
 | 
| +      )
 | 
| +      sub_node.edges[new_edge.key] = new_edge
 | 
| +  # Add a strong sub-class edge.
 | 
| +  sub_edge = Edge(
 | 
| +    src = super_node.name,
 | 
| +    dst = sub_node.name,
 | 
| +    lbl = '<subclass>',
 | 
| +    ptr = edge.ptr,
 | 
| +    kind = 1,
 | 
| +    loc = edge.loc,
 | 
| +  )
 | 
| +  super_node.edges[sub_edge.key] = sub_edge
 | 
| +
 | 
| +def complete_graph():
 | 
| +  for node in graph.itervalues():
 | 
| +    for edge in node.super_edges():
 | 
| +      copy_super_edges(edge)
 | 
| +    for edge in node.edges.itervalues():
 | 
| +      if edge.is_root():
 | 
| +        roots.append(edge)
 | 
| +  log("Copied edges down <super> edges for %d graph nodes" % global_inc_copy)
 | 
| +
 | 
| +def reset_graph():
 | 
| +  for n in graph.itervalues():
 | 
| +    n.reset()
 | 
| +
 | 
| +def shortest_path(start, end):
 | 
| +  start.cost = 0
 | 
| +  minlist = [start]
 | 
| +  while len(minlist) > 0:
 | 
| +    minlist.sort(key=lambda n: -n.cost)
 | 
| +    current = minlist.pop()
 | 
| +    current.visited = True
 | 
| +    if current == end or current.cost >= end.cost + 1:
 | 
| +      return
 | 
| +    for e in current.edges.itervalues():
 | 
| +      if not e.keeps_alive():
 | 
| +        continue
 | 
| +      dst = graph.get(e.dst)
 | 
| +      if dst is None or dst.visited:
 | 
| +        continue
 | 
| +      if current.cost < dst.cost:
 | 
| +        dst.cost = current.cost + 1
 | 
| +        dst.path = e
 | 
| +      minlist.append(dst)
 | 
| +
 | 
| +def detect_cycles():
 | 
| +  for root_edge in roots:
 | 
| +    reset_graph()
 | 
| +    # Mark ignored classes as already visited
 | 
| +    for ignore in args.ignore_classes:
 | 
| +      name = ignore.find("::") > 0 and ignore or ("blink::" + ignore)
 | 
| +      node = graph.get(name)
 | 
| +      if node:
 | 
| +        node.visited = True
 | 
| +    src = graph[root_edge.src]
 | 
| +    dst = graph.get(root_edge.dst)
 | 
| +    if src.visited:
 | 
| +      continue
 | 
| +    if root_edge.dst == "WTF::String":
 | 
| +      continue
 | 
| +    if dst is None:
 | 
| +      print "\nPersistent root to incomplete destination object:"
 | 
| +      print root_edge
 | 
| +      set_reported_error(True)
 | 
| +      continue
 | 
| +    # Find the shortest path from the root target (dst) to its host (src)
 | 
| +    shortest_path(dst, src)
 | 
| +    if src.cost < sys.maxint:
 | 
| +      report_cycle(root_edge)
 | 
| +
 | 
| +def is_ignored_cycle(cycle):
 | 
| +  for block in ignored_cycles:
 | 
| +    if block_match(cycle, block):
 | 
| +      return True
 | 
| +
 | 
| +def block_match(b1, b2):
 | 
| +  if len(b1) != len(b2):
 | 
| +    return False
 | 
| +  for (l1, l2) in zip(b1, b2):
 | 
| +    if l1 != l2:
 | 
| +      return False
 | 
| +  return True
 | 
| +
 | 
| +def report_cycle(root_edge):
 | 
| +  dst = graph[root_edge.dst]
 | 
| +  path = []
 | 
| +  edge = root_edge
 | 
| +  dst.path = None
 | 
| +  while edge:
 | 
| +    path.append(edge)
 | 
| +    edge = graph[edge.src].path
 | 
| +  path.append(root_edge)
 | 
| +  path.reverse()
 | 
| +  # Find the max loc length for pretty printing.
 | 
| +  max_loc = 0
 | 
| +  for p in path:
 | 
| +    if len(p.loc) > max_loc:
 | 
| +      max_loc = len(p.loc)
 | 
| +  out = StringIO.StringIO()
 | 
| +  for p in path[:-1]:
 | 
| +    print >>out, (p.loc + ':').ljust(max_loc + 1), p
 | 
| +  sout = out.getvalue()
 | 
| +  if not is_ignored_cycle(sout):
 | 
| +    print "\nFound a potentially leaking cycle starting from a GC root:\n", sout
 | 
| +    set_reported_error(True)
 | 
| +
 | 
| +def load_graph():
 | 
| +  global graph
 | 
| +  global roots
 | 
| +  log("Reading graph from pickled file: " + args.pickle_graph)
 | 
| +  dump = pickle.load(open(args.pickle_graph, 'rb'))
 | 
| +  graph = dump[0]
 | 
| +  roots = dump[1]
 | 
| +
 | 
| +def save_graph():
 | 
| +  log("Saving graph to pickle file: " + args.pickle_graph)
 | 
| +  dump = (graph, roots)
 | 
| +  pickle.dump(dump, open(args.pickle_graph, 'wb'))
 | 
| +
 | 
| +def read_ignored_cycles():
 | 
| +  global ignored_cycles
 | 
| +  if not args.ignore_cycles:
 | 
| +    return
 | 
| +  log("Reading ignored cycles from file: " + args.ignore_cycles)
 | 
| +  block = []
 | 
| +  for l in open(args.ignore_cycles):
 | 
| +    line = l.strip()
 | 
| +    if not line or line.startswith('Found'):
 | 
| +      if len(block) > 0:
 | 
| +        ignored_cycles.append(block)
 | 
| +      block = []
 | 
| +    else:
 | 
| +      block += l
 | 
| +  if len(block) > 0:
 | 
| +    ignored_cycles.append(block)
 | 
| +
 | 
| +gc_bases = (
 | 
| +  'blink::GarbageCollected',
 | 
| +  'blink::GarbageCollectedFinalized',
 | 
| +  'blink::GarbageCollectedMixin',
 | 
| +)
 | 
| +ref_bases = (
 | 
| +  'WTF::RefCounted',
 | 
| +  'WTF::ThreadSafeRefCounted',
 | 
| +)
 | 
| +gcref_bases = (
 | 
| +  'blink::RefCountedGarbageCollected',
 | 
| +  'blink::ThreadSafeRefCountedGarbageCollected',
 | 
| +)
 | 
| +ref_mixins = (
 | 
| +  'blink::EventTarget',
 | 
| +  'blink::EventTargetWithInlineData',
 | 
| +  'blink::ActiveDOMObject',
 | 
| +)
 | 
| +
 | 
| +def print_stats():
 | 
| +  gcref_managed = []
 | 
| +  ref_managed = []
 | 
| +  gc_managed = []
 | 
| +  hierarchies = []
 | 
| +
 | 
| +  for node in graph.itervalues():
 | 
| +    node.update_counts()
 | 
| +    for sup in node.super_edges():
 | 
| +      if sup.dst in gcref_bases:
 | 
| +        gcref_managed.append(node)
 | 
| +      elif sup.dst in ref_bases:
 | 
| +        ref_managed.append(node)
 | 
| +      elif sup.dst in gc_bases:
 | 
| +        gc_managed.append(node)
 | 
| +
 | 
| +  groups = [("GC manged   ", gc_managed),
 | 
| +            ("ref counted ", ref_managed),
 | 
| +            ("in transition", gcref_managed)]
 | 
| +  total = sum([len(g) for (s,g) in groups])
 | 
| +  for (s, g) in groups:
 | 
| +    percent = len(g) * 100 / total
 | 
| +    print "%2d%% is %s (%d hierarchies)" % (percent, s, len(g))
 | 
| +
 | 
| +  for base in gcref_managed:
 | 
| +    stats = dict({ 'classes': 0, 'ref-mixins': 0 })
 | 
| +    for ptr in ptr_types: stats[ptr] = 0
 | 
| +    hierarchy_stats(base, stats)
 | 
| +    hierarchies.append((base, stats))
 | 
| +
 | 
| +  print "\nHierarchies in transition (RefCountedGarbageCollected):"
 | 
| +  hierarchies.sort(key=lambda (n,s): -s['classes'])
 | 
| +  for (node, stats) in hierarchies:
 | 
| +    total = stats['mem'] + stats['ref'] + stats['raw']
 | 
| +    print (
 | 
| +      "%s %3d%% of %-30s: %3d cls, %3d mem, %3d ref, %3d raw, %3d ref-mixins" %
 | 
| +      (stats['ref'] == 0 and stats['ref-mixins'] == 0 and "*" or " ",
 | 
| +       total == 0 and 100 or stats['mem'] * 100 / total,
 | 
| +       node.name.replace('blink::', ''),
 | 
| +       stats['classes'],
 | 
| +       stats['mem'],
 | 
| +       stats['ref'],
 | 
| +       stats['raw'],
 | 
| +       stats['ref-mixins'],
 | 
| +     ))
 | 
| +
 | 
| +def hierarchy_stats(node, stats):
 | 
| +  if not node: return
 | 
| +  stats['classes'] += 1
 | 
| +  add_counts(stats, node.counts)
 | 
| +  for edge in node.super_edges():
 | 
| +    if edge.dst in ref_mixins:
 | 
| +      stats['ref-mixins'] += 1
 | 
| +  for edge in node.subclass_edges():
 | 
| +    hierarchy_stats(graph.get(edge.dst), stats)
 | 
| +
 | 
| +def main():
 | 
| +  global args
 | 
| +  args = parser.parse_args()
 | 
| +  if not (args.detect_cycles or args.print_stats):
 | 
| +    print "Please select an operation to perform (eg, -c to detect cycles)"
 | 
| +    parser.print_help()
 | 
| +    return 1
 | 
| +  if args.pickle_graph and os.path.isfile(args.pickle_graph):
 | 
| +    load_graph()
 | 
| +  else:
 | 
| +    if args.use_stdin:
 | 
| +      log("Reading files from stdin")
 | 
| +      for f in sys.stdin:
 | 
| +        build_graph(f.strip())
 | 
| +    else:
 | 
| +      log("Reading files and directories from command line")
 | 
| +      if len(args.files) == 0:
 | 
| +        print "Please provide files or directores for building the graph"
 | 
| +        parser.print_help()
 | 
| +        return 1
 | 
| +      for f in args.files:
 | 
| +        if os.path.isdir(f):
 | 
| +          log("Building graph from files in directory: " + f)
 | 
| +          build_graphs_in_dir(f)
 | 
| +        else:
 | 
| +          log("Building graph from file: " + f)
 | 
| +          build_graph(f)
 | 
| +    log("Completing graph construction (%d graph nodes)" % len(graph))
 | 
| +    complete_graph()
 | 
| +    if args.pickle_graph:
 | 
| +      save_graph()
 | 
| +  if args.detect_cycles:
 | 
| +    read_ignored_cycles()
 | 
| +    log("Detecting cycles containg GC roots")
 | 
| +    detect_cycles()
 | 
| +  if args.print_stats:
 | 
| +    log("Printing statistics")
 | 
| +    print_stats()
 | 
| +  if reported_error():
 | 
| +    return 1
 | 
| +  return 0
 | 
| +
 | 
| +if __name__ == '__main__':
 | 
| +  sys.exit(main())
 | 
| 
 |