OLD | NEW |
(Empty) | |
| 1 #!/usr/bin/env python |
| 2 # Copyright 2014 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. |
| 5 |
| 6 import argparse, os, sys, json, subprocess, pickle, StringIO |
| 7 |
| 8 parser = argparse.ArgumentParser( |
| 9 description = |
| 10 "Process the Blink points-to graph generated by the Blink GC plugin.") |
| 11 |
| 12 parser.add_argument( |
| 13 '-', dest='use_stdin', action='store_true', |
| 14 help='Read JSON graph files from stdin') |
| 15 |
| 16 parser.add_argument( |
| 17 '-c', '--detect-cycles', action='store_true', |
| 18 help='Detect cycles containing GC roots') |
| 19 |
| 20 parser.add_argument( |
| 21 '-s', '--print-stats', action='store_true', |
| 22 help='Statistics about ref-counted and traced objects') |
| 23 |
| 24 parser.add_argument( |
| 25 '-v', '--verbose', action='store_true', |
| 26 help='Verbose output') |
| 27 |
| 28 parser.add_argument( |
| 29 '--ignore-cycles', default=None, metavar='FILE', |
| 30 help='File with cycles to ignore') |
| 31 |
| 32 parser.add_argument( |
| 33 '--ignore-classes', nargs='*', default=[], metavar='CLASS', |
| 34 help='Classes to ignore when detecting cycles') |
| 35 |
| 36 parser.add_argument( |
| 37 '--pickle-graph', default=None, metavar='FILE', |
| 38 help='File to read/save the graph from/to') |
| 39 |
| 40 parser.add_argument( |
| 41 'files', metavar='FILE_OR_DIR', nargs='*', default=[], |
| 42 help='JSON graph files or directories containing them') |
| 43 |
| 44 # Command line args after parsing. |
| 45 args = None |
| 46 |
| 47 # Map from node labels to nodes. |
| 48 graph = {} |
| 49 |
| 50 # Set of root nodes. |
| 51 roots = [] |
| 52 |
| 53 # List of cycles to ignore. |
| 54 ignored_cycles = [] |
| 55 |
| 56 # Global flag to determine exit code. |
| 57 global_reported_error = False |
| 58 |
| 59 def set_reported_error(value): |
| 60 global global_reported_error |
| 61 global_reported_error = value |
| 62 |
| 63 def reported_error(): |
| 64 return global_reported_error |
| 65 |
| 66 def log(msg): |
| 67 if args.verbose: |
| 68 print msg |
| 69 |
| 70 global_inc_copy = 0 |
| 71 def inc_copy(): |
| 72 global global_inc_copy |
| 73 global_inc_copy += 1 |
| 74 |
| 75 def get_node(name): |
| 76 return graph.setdefault(name, Node(name)) |
| 77 |
| 78 ptr_types = ('raw', 'ref', 'mem') |
| 79 |
| 80 def inc_ptr(dst, ptr): |
| 81 if ptr in ptr_types: |
| 82 node = graph.get(dst) |
| 83 if not node: return |
| 84 node.counts[ptr] += 1 |
| 85 |
| 86 def add_counts(s1, s2): |
| 87 for (k, v) in s2.iteritems(): |
| 88 s1[k] += s2[k] |
| 89 |
| 90 # Representation of graph nodes. Basically a map of directed edges. |
| 91 class Node: |
| 92 def __init__(self, name): |
| 93 self.name = name |
| 94 self.edges = {} |
| 95 self.reset() |
| 96 def __repr__(self): |
| 97 return "%s(%s) %s" % (self.name, self.visited, self.edges) |
| 98 def update_node(self, decl): |
| 99 # Currently we don't track any node info besides its edges. |
| 100 pass |
| 101 def update_edge(self, e): |
| 102 new_edge = Edge(**e) |
| 103 edge = self.edges.get(new_edge.key) |
| 104 if edge: |
| 105 # If an edge exist, its kind is the strongest of the two. |
| 106 edge.kind = max(edge.kind, new_edge.kind) |
| 107 else: |
| 108 self.edges[new_edge.key] = new_edge |
| 109 def super_edges(self): |
| 110 return [ e for e in self.edges.itervalues() if e.is_super() ] |
| 111 def subclass_edges(self): |
| 112 return [ e for e in self.edges.itervalues() if e.is_subclass() ] |
| 113 def reset(self): |
| 114 self.cost = sys.maxint |
| 115 self.visited = False |
| 116 self.path = None |
| 117 self.counts = {} |
| 118 for ptr in ptr_types: |
| 119 self.counts[ptr] = 0 |
| 120 def update_counts(self): |
| 121 for e in self.edges.itervalues(): |
| 122 inc_ptr(e.dst, e.ptr) |
| 123 |
| 124 # Representation of directed graph edges. |
| 125 class Edge: |
| 126 def __init__(self, **decl): |
| 127 self.src = decl['src'] |
| 128 self.dst = decl['dst'] |
| 129 self.lbl = decl['lbl'] |
| 130 self.ptr = decl['ptr'] |
| 131 self.kind = decl['kind'] # 0 = weak, 1 = strong, 2 = root |
| 132 self.loc = decl['loc'] |
| 133 # The label does not uniquely determine an edge from a node. We |
| 134 # define the semi-unique key to be the concatenation of the |
| 135 # label and dst name. This is sufficient to track the strongest |
| 136 # edge to a particular type. For example, if the field A::m_f |
| 137 # has type HashMap<WeakMember<B>, Member<B>> we will have a |
| 138 # strong edge with key m_f#B from A to B. |
| 139 self.key = '%s#%s' % (self.lbl, self.dst) |
| 140 def __repr__(self): |
| 141 return '%s (%s) => %s' % (self.src, self.lbl, self.dst) |
| 142 def is_root(self): |
| 143 return self.kind == 2 |
| 144 def is_weak(self): |
| 145 return self.kind == 0 |
| 146 def keeps_alive(self): |
| 147 return self.kind > 0 |
| 148 def is_subclass(self): |
| 149 return self.lbl.startswith('<subclass>') |
| 150 def is_super(self): |
| 151 return self.lbl.startswith('<super>') |
| 152 |
| 153 def parse_file(filename): |
| 154 obj = json.load(open(filename)) |
| 155 return obj |
| 156 |
| 157 def build_graphs_in_dir(dirname): |
| 158 # TODO: Use plateform independent code, eg, os.walk |
| 159 files = subprocess.check_output( |
| 160 ['find', dirname, '-name', '*.graph.json']).split('\n') |
| 161 log("Found %d files" % len(files)) |
| 162 for f in files: |
| 163 f.strip() |
| 164 if len(f) < 1: |
| 165 continue |
| 166 build_graph(f) |
| 167 |
| 168 def build_graph(filename): |
| 169 for decl in parse_file(filename): |
| 170 if decl.has_key('name'): |
| 171 # Add/update a node entry |
| 172 name = decl['name'] |
| 173 node = get_node(name) |
| 174 node.update_node(decl) |
| 175 else: |
| 176 # Add/update an edge entry |
| 177 name = decl['src'] |
| 178 node = get_node(name) |
| 179 node.update_edge(decl) |
| 180 |
| 181 # Copy all non-weak edges from super classes to their subclasses. |
| 182 # This causes all fields of a super to be considered fields of a |
| 183 # derived class without tranitively relating derived classes with |
| 184 # each other. For example, if B <: A, C <: A, and for some D, D => B, |
| 185 # we don't want that to entail that D => C. |
| 186 def copy_super_edges(edge): |
| 187 if edge.is_weak() or not edge.is_super(): |
| 188 return |
| 189 inc_copy() |
| 190 # Make the super-class edge weak (prohibits processing twice). |
| 191 edge.kind = 0 |
| 192 # If the super class is not in our graph exit early. |
| 193 super_node = graph.get(edge.dst) |
| 194 if super_node is None: return |
| 195 # Recursively copy all super-class edges. |
| 196 for e in super_node.super_edges(): |
| 197 copy_super_edges(e) |
| 198 # Copy strong super-class edges (ignoring sub-class edges) to the sub class. |
| 199 sub_node = graph[edge.src] |
| 200 for e in super_node.edges.itervalues(): |
| 201 if e.keeps_alive() and not e.is_subclass(): |
| 202 new_edge = Edge( |
| 203 src = sub_node.name, |
| 204 dst = e.dst, |
| 205 lbl = '%s <: %s' % (super_node.name, e.lbl), |
| 206 ptr = e.ptr, |
| 207 kind = e.kind, |
| 208 loc = e.loc, |
| 209 ) |
| 210 sub_node.edges[new_edge.key] = new_edge |
| 211 # Add a strong sub-class edge. |
| 212 sub_edge = Edge( |
| 213 src = super_node.name, |
| 214 dst = sub_node.name, |
| 215 lbl = '<subclass>', |
| 216 ptr = edge.ptr, |
| 217 kind = 1, |
| 218 loc = edge.loc, |
| 219 ) |
| 220 super_node.edges[sub_edge.key] = sub_edge |
| 221 |
| 222 def complete_graph(): |
| 223 for node in graph.itervalues(): |
| 224 for edge in node.super_edges(): |
| 225 copy_super_edges(edge) |
| 226 for edge in node.edges.itervalues(): |
| 227 if edge.is_root(): |
| 228 roots.append(edge) |
| 229 log("Copied edges down <super> edges for %d graph nodes" % global_inc_copy) |
| 230 |
| 231 def reset_graph(): |
| 232 for n in graph.itervalues(): |
| 233 n.reset() |
| 234 |
| 235 def shortest_path(start, end): |
| 236 start.cost = 0 |
| 237 minlist = [start] |
| 238 while len(minlist) > 0: |
| 239 minlist.sort(key=lambda n: -n.cost) |
| 240 current = minlist.pop() |
| 241 current.visited = True |
| 242 if current == end or current.cost >= end.cost + 1: |
| 243 return |
| 244 for e in current.edges.itervalues(): |
| 245 if not e.keeps_alive(): |
| 246 continue |
| 247 dst = graph.get(e.dst) |
| 248 if dst is None or dst.visited: |
| 249 continue |
| 250 if current.cost < dst.cost: |
| 251 dst.cost = current.cost + 1 |
| 252 dst.path = e |
| 253 minlist.append(dst) |
| 254 |
| 255 def detect_cycles(): |
| 256 for root_edge in roots: |
| 257 reset_graph() |
| 258 # Mark ignored classes as already visited |
| 259 for ignore in args.ignore_classes: |
| 260 name = ignore.find("::") > 0 and ignore or ("blink::" + ignore) |
| 261 node = graph.get(name) |
| 262 if node: |
| 263 node.visited = True |
| 264 src = graph[root_edge.src] |
| 265 dst = graph.get(root_edge.dst) |
| 266 if src.visited: |
| 267 continue |
| 268 if root_edge.dst == "WTF::String": |
| 269 continue |
| 270 if dst is None: |
| 271 print "\nPersistent root to incomplete destination object:" |
| 272 print root_edge |
| 273 set_reported_error(True) |
| 274 continue |
| 275 # Find the shortest path from the root target (dst) to its host (src) |
| 276 shortest_path(dst, src) |
| 277 if src.cost < sys.maxint: |
| 278 report_cycle(root_edge) |
| 279 |
| 280 def is_ignored_cycle(cycle): |
| 281 for block in ignored_cycles: |
| 282 if block_match(cycle, block): |
| 283 return True |
| 284 |
| 285 def block_match(b1, b2): |
| 286 if len(b1) != len(b2): |
| 287 return False |
| 288 for (l1, l2) in zip(b1, b2): |
| 289 if l1 != l2: |
| 290 return False |
| 291 return True |
| 292 |
| 293 def report_cycle(root_edge): |
| 294 dst = graph[root_edge.dst] |
| 295 path = [] |
| 296 edge = root_edge |
| 297 dst.path = None |
| 298 while edge: |
| 299 path.append(edge) |
| 300 edge = graph[edge.src].path |
| 301 path.append(root_edge) |
| 302 path.reverse() |
| 303 # Find the max loc length for pretty printing. |
| 304 max_loc = 0 |
| 305 for p in path: |
| 306 if len(p.loc) > max_loc: |
| 307 max_loc = len(p.loc) |
| 308 out = StringIO.StringIO() |
| 309 for p in path[:-1]: |
| 310 print >>out, (p.loc + ':').ljust(max_loc + 1), p |
| 311 sout = out.getvalue() |
| 312 if not is_ignored_cycle(sout): |
| 313 print "\nFound a potentially leaking cycle starting from a GC root:\n", sout |
| 314 set_reported_error(True) |
| 315 |
| 316 def load_graph(): |
| 317 global graph |
| 318 global roots |
| 319 log("Reading graph from pickled file: " + args.pickle_graph) |
| 320 dump = pickle.load(open(args.pickle_graph, 'rb')) |
| 321 graph = dump[0] |
| 322 roots = dump[1] |
| 323 |
| 324 def save_graph(): |
| 325 log("Saving graph to pickle file: " + args.pickle_graph) |
| 326 dump = (graph, roots) |
| 327 pickle.dump(dump, open(args.pickle_graph, 'wb')) |
| 328 |
| 329 def read_ignored_cycles(): |
| 330 global ignored_cycles |
| 331 if not args.ignore_cycles: |
| 332 return |
| 333 log("Reading ignored cycles from file: " + args.ignore_cycles) |
| 334 block = [] |
| 335 for l in open(args.ignore_cycles): |
| 336 line = l.strip() |
| 337 if not line or line.startswith('Found'): |
| 338 if len(block) > 0: |
| 339 ignored_cycles.append(block) |
| 340 block = [] |
| 341 else: |
| 342 block += l |
| 343 if len(block) > 0: |
| 344 ignored_cycles.append(block) |
| 345 |
| 346 gc_bases = ( |
| 347 'blink::GarbageCollected', |
| 348 'blink::GarbageCollectedFinalized', |
| 349 'blink::GarbageCollectedMixin', |
| 350 ) |
| 351 ref_bases = ( |
| 352 'WTF::RefCounted', |
| 353 'WTF::ThreadSafeRefCounted', |
| 354 ) |
| 355 gcref_bases = ( |
| 356 'blink::RefCountedGarbageCollected', |
| 357 'blink::ThreadSafeRefCountedGarbageCollected', |
| 358 ) |
| 359 ref_mixins = ( |
| 360 'blink::EventTarget', |
| 361 'blink::EventTargetWithInlineData', |
| 362 'blink::ActiveDOMObject', |
| 363 ) |
| 364 |
| 365 def print_stats(): |
| 366 gcref_managed = [] |
| 367 ref_managed = [] |
| 368 gc_managed = [] |
| 369 hierarchies = [] |
| 370 |
| 371 for node in graph.itervalues(): |
| 372 node.update_counts() |
| 373 for sup in node.super_edges(): |
| 374 if sup.dst in gcref_bases: |
| 375 gcref_managed.append(node) |
| 376 elif sup.dst in ref_bases: |
| 377 ref_managed.append(node) |
| 378 elif sup.dst in gc_bases: |
| 379 gc_managed.append(node) |
| 380 |
| 381 groups = [("GC manged ", gc_managed), |
| 382 ("ref counted ", ref_managed), |
| 383 ("in transition", gcref_managed)] |
| 384 total = sum([len(g) for (s,g) in groups]) |
| 385 for (s, g) in groups: |
| 386 percent = len(g) * 100 / total |
| 387 print "%2d%% is %s (%d hierarchies)" % (percent, s, len(g)) |
| 388 |
| 389 for base in gcref_managed: |
| 390 stats = dict({ 'classes': 0, 'ref-mixins': 0 }) |
| 391 for ptr in ptr_types: stats[ptr] = 0 |
| 392 hierarchy_stats(base, stats) |
| 393 hierarchies.append((base, stats)) |
| 394 |
| 395 print "\nHierarchies in transition (RefCountedGarbageCollected):" |
| 396 hierarchies.sort(key=lambda (n,s): -s['classes']) |
| 397 for (node, stats) in hierarchies: |
| 398 total = stats['mem'] + stats['ref'] + stats['raw'] |
| 399 print ( |
| 400 "%s %3d%% of %-30s: %3d cls, %3d mem, %3d ref, %3d raw, %3d ref-mixins" % |
| 401 (stats['ref'] == 0 and stats['ref-mixins'] == 0 and "*" or " ", |
| 402 total == 0 and 100 or stats['mem'] * 100 / total, |
| 403 node.name.replace('blink::', ''), |
| 404 stats['classes'], |
| 405 stats['mem'], |
| 406 stats['ref'], |
| 407 stats['raw'], |
| 408 stats['ref-mixins'], |
| 409 )) |
| 410 |
| 411 def hierarchy_stats(node, stats): |
| 412 if not node: return |
| 413 stats['classes'] += 1 |
| 414 add_counts(stats, node.counts) |
| 415 for edge in node.super_edges(): |
| 416 if edge.dst in ref_mixins: |
| 417 stats['ref-mixins'] += 1 |
| 418 for edge in node.subclass_edges(): |
| 419 hierarchy_stats(graph.get(edge.dst), stats) |
| 420 |
| 421 def main(): |
| 422 global args |
| 423 args = parser.parse_args() |
| 424 if not (args.detect_cycles or args.print_stats): |
| 425 print "Please select an operation to perform (eg, -c to detect cycles)" |
| 426 parser.print_help() |
| 427 return 1 |
| 428 if args.pickle_graph and os.path.isfile(args.pickle_graph): |
| 429 load_graph() |
| 430 else: |
| 431 if args.use_stdin: |
| 432 log("Reading files from stdin") |
| 433 for f in sys.stdin: |
| 434 build_graph(f.strip()) |
| 435 else: |
| 436 log("Reading files and directories from command line") |
| 437 if len(args.files) == 0: |
| 438 print "Please provide files or directores for building the graph" |
| 439 parser.print_help() |
| 440 return 1 |
| 441 for f in args.files: |
| 442 if os.path.isdir(f): |
| 443 log("Building graph from files in directory: " + f) |
| 444 build_graphs_in_dir(f) |
| 445 else: |
| 446 log("Building graph from file: " + f) |
| 447 build_graph(f) |
| 448 log("Completing graph construction (%d graph nodes)" % len(graph)) |
| 449 complete_graph() |
| 450 if args.pickle_graph: |
| 451 save_graph() |
| 452 if args.detect_cycles: |
| 453 read_ignored_cycles() |
| 454 log("Detecting cycles containg GC roots") |
| 455 detect_cycles() |
| 456 if args.print_stats: |
| 457 log("Printing statistics") |
| 458 print_stats() |
| 459 if reported_error(): |
| 460 return 1 |
| 461 return 0 |
| 462 |
| 463 if __name__ == '__main__': |
| 464 sys.exit(main()) |
OLD | NEW |