OLD | NEW |
---|---|
(Empty) | |
1 #!/usr/bin/env python | |
2 # Copyright 2014 The Chromium Authors. All rights reserved. | |
3 # Use of this source code is governed by a BSD-style license that can be | |
4 # found in the LICENSE file. | |
5 | |
6 import argparse, os, sys, json, subprocess | |
7 | |
8 parser = argparse.ArgumentParser( | |
9 description = | |
10 "Process the Blink points-to graph generated by the Blink GC plugin.") | |
11 | |
12 parser.add_argument( | |
13 '-', dest='use_stdin', action='store_true', | |
14 help='Read JSON graph files from stdin') | |
15 | |
16 parser.add_argument( | |
17 '-v', '--verbose', action='store_true', | |
18 help='Verbose output') | |
19 | |
20 parser.add_argument( | |
21 '-c', '--detect-cycles', action='store_true', default=True, | |
22 help='Detect cycles containing GC roots') | |
23 | |
24 parser.add_argument( | |
25 'files', metavar='FILE', nargs='*', default=[], | |
26 help='JSON graph files or directories containing them') | |
27 | |
28 # command line args after parsing. | |
Mads Ager (chromium)
2014/03/21 11:45:57
Start comments with a capital letter?
| |
29 args = None | |
30 | |
31 # map from node labels to nodes. | |
32 graph = {} | |
33 | |
34 # set of root nodes. | |
35 roots = [] | |
36 | |
37 # global flag to determine exit code. | |
38 global_reported_error = False | |
39 | |
40 def set_reported_error(value): | |
41 global global_reported_error | |
42 global_reported_error = value | |
43 | |
44 def reported_error(): | |
45 return global_reported_error | |
46 | |
47 def log(msg): | |
48 if args.verbose: | |
49 print msg | |
50 | |
51 global_inc_copy = 0 | |
52 def inc_copy(): | |
53 global global_inc_copy | |
54 global_inc_copy += 1 | |
55 | |
56 def get_node(name): | |
57 return graph.setdefault(name, Node(name)) | |
58 | |
59 # Representation of graph nodes. Basically a map of directed edges. | |
60 class Node: | |
61 def __init__(self, name): | |
62 self.name = name | |
63 self.edges = {} | |
64 self.reset() | |
65 def __repr__(self): | |
66 return "%s(%s) %s" % (self.name, self.visited, self.edges) | |
67 def update_node(self, decl): | |
68 # Currently we don't track any node info besides its edges. | |
69 pass | |
70 def update_edge(self, e): | |
71 new_edge = Edge(**e) | |
72 edge = self.edges.get(new_edge.key) | |
73 if edge: | |
74 # If an edge exist, its kind is the strongest of the two. | |
75 edge.kind = max(edge.kind, new_edge.kind) | |
76 else: | |
77 self.edges[new_edge.key] = new_edge | |
78 def super_edges(self): | |
79 return [ e for e in self.edges.itervalues() if e.is_super() ] | |
80 def reset(self): | |
81 self.cost = sys.maxint | |
82 self.visited = False | |
83 self.path = None | |
84 | |
85 # Representation of directed graph edges. | |
86 class Edge: | |
87 def __init__(self, **decl): | |
88 self.src = decl['src'] | |
89 self.dst = decl['dst'] | |
90 self.lbl = decl['lbl'] | |
91 self.kind = decl['kind'] # 0 = weak, 1 = strong, 2 = root | |
92 self.loc = decl['loc'] | |
93 # The label does not uniquely determine an edge from a node. We | |
94 # define the semi-unique key to be the concatenation of the | |
95 # label and dst name. This is sufficient to track the strongest | |
96 # edge to a particular type. For example, if the field A::m_f | |
97 # has type HashMap<WeakMember<B>, Member<B>> we will have a one | |
Mads Ager (chromium)
2014/03/21 11:45:57
a one -> a
| |
98 # strong edge with key m_f#B from A to B. | |
99 self.key = '%s#%s' % (self.lbl, self.dst) | |
100 def copy(self): | |
101 return Edge( | |
102 lbl = self.lbl, | |
103 kind = self.kind, | |
104 src = self.src, | |
105 dst = self.dst, | |
106 loc = self.loc) | |
107 def __repr__(self): | |
108 return '%s (%s) => %s' % (self.src, self.lbl, self.dst) | |
109 def is_root(self): | |
110 return self.kind == 2 | |
111 def is_weak(self): | |
112 return self.kind == 0 | |
113 def keeps_alive(self): | |
114 return self.kind > 0 | |
115 def is_subclass(self): | |
116 return self.lbl.startswith('<subclass>') | |
117 def is_super(self): | |
118 return self.lbl.startswith('<super>') | |
119 | |
120 def parse_file(filename): | |
121 obj = json.load(open(filename)) | |
122 return obj | |
123 | |
124 def build_graphs_in_dir(dirname): | |
125 files = subprocess.check_output( | |
126 ['find', dirname, '-name', '*.graph.json']).split('\n') | |
Mads Ager (chromium)
2014/03/21 11:45:57
We might want to use python APIs for this at some
| |
127 log("Found %d files" % len(files)) | |
128 for f in files: | |
129 f.strip() | |
130 if len(f) < 1: | |
131 continue | |
132 build_graph(f) | |
133 | |
134 def build_graph(filename): | |
135 for decl in parse_file(filename): | |
136 if decl.has_key('name'): | |
137 # Add/update a node entry | |
138 name = decl['name'] | |
139 node = get_node(name) | |
140 node.update_node(decl) | |
141 else: | |
142 # Add/update an edge entry | |
143 name = decl['src'] | |
144 node = get_node(name) | |
145 node.update_edge(decl) | |
146 | |
147 # Copy all edges from a super classes to their subclasses. | |
148 | |
149 # This causes all fields of a super to be considered fields of a | |
150 # derived class without tranitively relating derived classes with | |
151 # each other. For example, if B <: A, C <: A, and for some D, D => B, | |
152 # we don't want that to entail that D => C. | |
153 def copy_super_edges(edge): | |
154 if edge.is_weak() or not edge.is_super(): | |
155 return | |
156 inc_copy() | |
157 # make the super-class edge weak (prohibits processing twice). | |
158 edge.kind = 0 | |
159 # if the super class is not in our graph exit early. | |
160 super_node = graph.get(edge.dst) | |
161 if super_node is None: return | |
162 # recursively copy all super-class edges. | |
163 for e in super_node.super_edges(): | |
164 copy_super_edges(e) | |
165 # copy strong super-class edges (ignoring sub-class edges) to the sub class. | |
166 sub_node = graph[edge.src] | |
167 for e in super_node.edges.itervalues(): | |
168 if e.keeps_alive() and not e.is_subclass(): | |
169 new_edge = e.copy() | |
170 new_edge.src = sub_node.name | |
171 new_edge.lbl = '%s <: %s' % (super_node.name, e.lbl) | |
172 sub_node.edges[new_edge.key] = new_edge | |
173 # add a strong sub-class edge. | |
174 sub_edge = edge.copy() | |
175 sub_edge.kind = 1 | |
176 sub_edge.lbl = '<subclass>' | |
177 sub_edge.src = super_node.name | |
178 sub_edge.dst = sub_node.name | |
179 super_node.edges[sub_edge.key] = sub_edge | |
180 | |
181 def complete_graph(): | |
182 for node in graph.itervalues(): | |
183 for edge in node.super_edges(): | |
184 copy_super_edges(edge) | |
185 for edge in node.edges.itervalues(): | |
186 if edge.is_root(): | |
187 roots.append(edge) | |
188 log("Copied edges down <super> edges for %d graph nodes" % global_inc_copy) | |
189 | |
190 def reset_graph(): | |
191 for n in graph.itervalues(): | |
192 n.reset() | |
193 | |
194 def shortest_path(start, end): | |
195 start.cost = 0 | |
196 minlist = [start] | |
197 while len(minlist) > 0: | |
198 minlist.sort(key=lambda n: -n.cost) | |
199 current = minlist.pop() | |
200 current.visited = True | |
201 if current == end or current.cost >= end.cost + 1: | |
202 return | |
203 for e in current.edges.itervalues(): | |
204 if not e.keeps_alive(): | |
205 continue | |
206 dst = graph.get(e.dst) | |
207 if dst is None or dst.visited: | |
208 continue | |
209 if current.cost < dst.cost: | |
210 dst.cost = current.cost + 1 | |
211 dst.path = e | |
212 minlist.append(dst) | |
213 | |
214 def detect_cycles(): | |
215 for root_edge in roots: | |
216 reset_graph() | |
217 src = graph[root_edge.src] | |
218 dst = graph.get(root_edge.dst) | |
219 if dst is None: | |
220 print "\nPersistent root to incomplete destination object:" | |
221 print root_edge | |
222 set_reported_error(True) | |
223 continue | |
224 # Find the shortest path from the root target (dst) to its host (src) | |
225 shortest_path(dst, src) | |
226 if src.cost < sys.maxint: | |
227 report_cycle(root_edge) | |
228 | |
229 def report_cycle(root_edge): | |
230 dst = graph[root_edge.dst] | |
231 print "\nFound a potentially leaking cycle starting from a GC root:" | |
232 path = [] | |
233 edge = root_edge | |
234 dst.path = None | |
235 while edge: | |
236 path.append(edge) | |
237 edge = graph[edge.src].path | |
238 path.append(root_edge) | |
239 path.reverse() | |
240 # Find the max loc length for pretty printing. | |
241 max_loc = 0 | |
242 for p in path: | |
243 if len(p.loc) > max_loc: | |
244 max_loc = len(p.loc) | |
245 for p in path[:-1]: | |
246 print (p.loc + ':').ljust(max_loc + 1), p | |
247 set_reported_error(True) | |
248 | |
249 def main(): | |
250 global args | |
251 args = parser.parse_args() | |
252 if not args.detect_cycles: | |
253 print "Please select an operation to perform (eg, -c to detect cycles)" | |
254 parser.print_help() | |
255 return 1 | |
256 if args.use_stdin: | |
257 log("Reading files from stdin") | |
258 for f in sys.stdin: | |
259 build_graph(f.strip()) | |
260 else: | |
261 log("Reading files and directories from command line") | |
262 if len(args.files) == 0: | |
263 print "Please provide files or directores for building the graph" | |
264 parser.print_help() | |
265 return 1 | |
266 for f in args.files: | |
267 if os.path.isdir(f): | |
268 log("Building graph from files in directory: " + f) | |
269 build_graphs_in_dir(f) | |
270 else: | |
271 log("Building graph from file: " + f) | |
272 build_graph(f) | |
273 log("Completing graph construction (%d graph nodes)" % len(graph)) | |
274 complete_graph() | |
275 if args.detect_cycles: | |
276 log("Detecting cycles containg GC roots") | |
277 detect_cycles() | |
278 if reported_error(): | |
279 return 1 | |
280 return 0 | |
281 | |
282 if __name__ == '__main__': | |
283 sys.exit(main()) | |
OLD | NEW |