OLD | NEW |
(Empty) | |
| 1 #!/usr/bin/env python |
| 2 # Copyright 2014 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. |
| 5 |
| 6 import argparse, os, sys, json, subprocess |
| 7 |
| 8 parser = argparse.ArgumentParser( |
| 9 description = |
| 10 "Process the Blink points-to graph generated by the Blink GC plugin.") |
| 11 |
| 12 parser.add_argument( |
| 13 '-', dest='use_stdin', action='store_true', |
| 14 help='Read JSON graph files from stdin') |
| 15 |
| 16 parser.add_argument( |
| 17 '-v', '--verbose', action='store_true', |
| 18 help='Verbose output') |
| 19 |
| 20 parser.add_argument( |
| 21 '-c', '--detect-cycles', action='store_true', default=True, |
| 22 help='Detect cycles containing GC roots') |
| 23 |
| 24 parser.add_argument( |
| 25 'files', metavar='FILE', nargs='*', default=[], |
| 26 help='JSON graph files or directories containing them') |
| 27 |
| 28 # Command line args after parsing. |
| 29 args = None |
| 30 |
| 31 # Map from node labels to nodes. |
| 32 graph = {} |
| 33 |
| 34 # Set of root nodes. |
| 35 roots = [] |
| 36 |
| 37 # Global flag to determine exit code. |
| 38 global_reported_error = False |
| 39 |
| 40 def set_reported_error(value): |
| 41 global global_reported_error |
| 42 global_reported_error = value |
| 43 |
| 44 def reported_error(): |
| 45 return global_reported_error |
| 46 |
| 47 def log(msg): |
| 48 if args.verbose: |
| 49 print msg |
| 50 |
| 51 global_inc_copy = 0 |
| 52 def inc_copy(): |
| 53 global global_inc_copy |
| 54 global_inc_copy += 1 |
| 55 |
| 56 def get_node(name): |
| 57 return graph.setdefault(name, Node(name)) |
| 58 |
| 59 # Representation of graph nodes. Basically a map of directed edges. |
| 60 class Node: |
| 61 def __init__(self, name): |
| 62 self.name = name |
| 63 self.edges = {} |
| 64 self.reset() |
| 65 def __repr__(self): |
| 66 return "%s(%s) %s" % (self.name, self.visited, self.edges) |
| 67 def update_node(self, decl): |
| 68 # Currently we don't track any node info besides its edges. |
| 69 pass |
| 70 def update_edge(self, e): |
| 71 new_edge = Edge(**e) |
| 72 edge = self.edges.get(new_edge.key) |
| 73 if edge: |
| 74 # If an edge exist, its kind is the strongest of the two. |
| 75 edge.kind = max(edge.kind, new_edge.kind) |
| 76 else: |
| 77 self.edges[new_edge.key] = new_edge |
| 78 def super_edges(self): |
| 79 return [ e for e in self.edges.itervalues() if e.is_super() ] |
| 80 def reset(self): |
| 81 self.cost = sys.maxint |
| 82 self.visited = False |
| 83 self.path = None |
| 84 |
| 85 # Representation of directed graph edges. |
| 86 class Edge: |
| 87 def __init__(self, **decl): |
| 88 self.src = decl['src'] |
| 89 self.dst = decl['dst'] |
| 90 self.lbl = decl['lbl'] |
| 91 self.kind = decl['kind'] # 0 = weak, 1 = strong, 2 = root |
| 92 self.loc = decl['loc'] |
| 93 # The label does not uniquely determine an edge from a node. We |
| 94 # define the semi-unique key to be the concatenation of the |
| 95 # label and dst name. This is sufficient to track the strongest |
| 96 # edge to a particular type. For example, if the field A::m_f |
| 97 # has type HashMap<WeakMember<B>, Member<B>> we will have a |
| 98 # strong edge with key m_f#B from A to B. |
| 99 self.key = '%s#%s' % (self.lbl, self.dst) |
| 100 def copy(self): |
| 101 return Edge( |
| 102 lbl = self.lbl, |
| 103 kind = self.kind, |
| 104 src = self.src, |
| 105 dst = self.dst, |
| 106 loc = self.loc) |
| 107 def __repr__(self): |
| 108 return '%s (%s) => %s' % (self.src, self.lbl, self.dst) |
| 109 def is_root(self): |
| 110 return self.kind == 2 |
| 111 def is_weak(self): |
| 112 return self.kind == 0 |
| 113 def keeps_alive(self): |
| 114 return self.kind > 0 |
| 115 def is_subclass(self): |
| 116 return self.lbl.startswith('<subclass>') |
| 117 def is_super(self): |
| 118 return self.lbl.startswith('<super>') |
| 119 |
| 120 def parse_file(filename): |
| 121 obj = json.load(open(filename)) |
| 122 return obj |
| 123 |
| 124 def build_graphs_in_dir(dirname): |
| 125 # TODO: Use plateform independent code, eg, os.walk |
| 126 files = subprocess.check_output( |
| 127 ['find', dirname, '-name', '*.graph.json']).split('\n') |
| 128 log("Found %d files" % len(files)) |
| 129 for f in files: |
| 130 f.strip() |
| 131 if len(f) < 1: |
| 132 continue |
| 133 build_graph(f) |
| 134 |
| 135 def build_graph(filename): |
| 136 for decl in parse_file(filename): |
| 137 if decl.has_key('name'): |
| 138 # Add/update a node entry |
| 139 name = decl['name'] |
| 140 node = get_node(name) |
| 141 node.update_node(decl) |
| 142 else: |
| 143 # Add/update an edge entry |
| 144 name = decl['src'] |
| 145 node = get_node(name) |
| 146 node.update_edge(decl) |
| 147 |
| 148 # Copy all non-weak edges from super classes to their subclasses. |
| 149 # This causes all fields of a super to be considered fields of a |
| 150 # derived class without tranitively relating derived classes with |
| 151 # each other. For example, if B <: A, C <: A, and for some D, D => B, |
| 152 # we don't want that to entail that D => C. |
| 153 def copy_super_edges(edge): |
| 154 if edge.is_weak() or not edge.is_super(): |
| 155 return |
| 156 inc_copy() |
| 157 # Make the super-class edge weak (prohibits processing twice). |
| 158 edge.kind = 0 |
| 159 # If the super class is not in our graph exit early. |
| 160 super_node = graph.get(edge.dst) |
| 161 if super_node is None: return |
| 162 # Recursively copy all super-class edges. |
| 163 for e in super_node.super_edges(): |
| 164 copy_super_edges(e) |
| 165 # Copy strong super-class edges (ignoring sub-class edges) to the sub class. |
| 166 sub_node = graph[edge.src] |
| 167 for e in super_node.edges.itervalues(): |
| 168 if e.keeps_alive() and not e.is_subclass(): |
| 169 new_edge = e.copy() |
| 170 new_edge.src = sub_node.name |
| 171 new_edge.lbl = '%s <: %s' % (super_node.name, e.lbl) |
| 172 sub_node.edges[new_edge.key] = new_edge |
| 173 # Add a strong sub-class edge. |
| 174 sub_edge = edge.copy() |
| 175 sub_edge.kind = 1 |
| 176 sub_edge.lbl = '<subclass>' |
| 177 sub_edge.src = super_node.name |
| 178 sub_edge.dst = sub_node.name |
| 179 super_node.edges[sub_edge.key] = sub_edge |
| 180 |
| 181 def complete_graph(): |
| 182 for node in graph.itervalues(): |
| 183 for edge in node.super_edges(): |
| 184 copy_super_edges(edge) |
| 185 for edge in node.edges.itervalues(): |
| 186 if edge.is_root(): |
| 187 roots.append(edge) |
| 188 log("Copied edges down <super> edges for %d graph nodes" % global_inc_copy) |
| 189 |
| 190 def reset_graph(): |
| 191 for n in graph.itervalues(): |
| 192 n.reset() |
| 193 |
| 194 def shortest_path(start, end): |
| 195 start.cost = 0 |
| 196 minlist = [start] |
| 197 while len(minlist) > 0: |
| 198 minlist.sort(key=lambda n: -n.cost) |
| 199 current = minlist.pop() |
| 200 current.visited = True |
| 201 if current == end or current.cost >= end.cost + 1: |
| 202 return |
| 203 for e in current.edges.itervalues(): |
| 204 if not e.keeps_alive(): |
| 205 continue |
| 206 dst = graph.get(e.dst) |
| 207 if dst is None or dst.visited: |
| 208 continue |
| 209 if current.cost < dst.cost: |
| 210 dst.cost = current.cost + 1 |
| 211 dst.path = e |
| 212 minlist.append(dst) |
| 213 |
| 214 def detect_cycles(): |
| 215 for root_edge in roots: |
| 216 reset_graph() |
| 217 src = graph[root_edge.src] |
| 218 dst = graph.get(root_edge.dst) |
| 219 if dst is None: |
| 220 print "\nPersistent root to incomplete destination object:" |
| 221 print root_edge |
| 222 set_reported_error(True) |
| 223 continue |
| 224 # Find the shortest path from the root target (dst) to its host (src) |
| 225 shortest_path(dst, src) |
| 226 if src.cost < sys.maxint: |
| 227 report_cycle(root_edge) |
| 228 |
| 229 def report_cycle(root_edge): |
| 230 dst = graph[root_edge.dst] |
| 231 print "\nFound a potentially leaking cycle starting from a GC root:" |
| 232 path = [] |
| 233 edge = root_edge |
| 234 dst.path = None |
| 235 while edge: |
| 236 path.append(edge) |
| 237 edge = graph[edge.src].path |
| 238 path.append(root_edge) |
| 239 path.reverse() |
| 240 # Find the max loc length for pretty printing. |
| 241 max_loc = 0 |
| 242 for p in path: |
| 243 if len(p.loc) > max_loc: |
| 244 max_loc = len(p.loc) |
| 245 for p in path[:-1]: |
| 246 print (p.loc + ':').ljust(max_loc + 1), p |
| 247 set_reported_error(True) |
| 248 |
| 249 def main(): |
| 250 global args |
| 251 args = parser.parse_args() |
| 252 if not args.detect_cycles: |
| 253 print "Please select an operation to perform (eg, -c to detect cycles)" |
| 254 parser.print_help() |
| 255 return 1 |
| 256 if args.use_stdin: |
| 257 log("Reading files from stdin") |
| 258 for f in sys.stdin: |
| 259 build_graph(f.strip()) |
| 260 else: |
| 261 log("Reading files and directories from command line") |
| 262 if len(args.files) == 0: |
| 263 print "Please provide files or directores for building the graph" |
| 264 parser.print_help() |
| 265 return 1 |
| 266 for f in args.files: |
| 267 if os.path.isdir(f): |
| 268 log("Building graph from files in directory: " + f) |
| 269 build_graphs_in_dir(f) |
| 270 else: |
| 271 log("Building graph from file: " + f) |
| 272 build_graph(f) |
| 273 log("Completing graph construction (%d graph nodes)" % len(graph)) |
| 274 complete_graph() |
| 275 if args.detect_cycles: |
| 276 log("Detecting cycles containg GC roots") |
| 277 detect_cycles() |
| 278 if reported_error(): |
| 279 return 1 |
| 280 return 0 |
| 281 |
| 282 if __name__ == '__main__': |
| 283 sys.exit(main()) |
OLD | NEW |