Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(203)

Side by Side Diff: chrome/tools/history-viz.py

Issue 8680018: Fix python scripts in src/chrome/ (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: copyright Created 9 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « chrome/tools/extract_histograms.py ('k') | chrome/tools/inconsistent-eol.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/python 1 #!/usr/bin/env python
2 # Copyright (c) 2009 The Chromium Authors. All rights reserved. 2 # Copyright (c) 2011 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be 3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file. 4 # found in the LICENSE file.
5 5
6 """Process a History database and dump a .dot file suitable for GraphViz. 6 """Process a History database and dump a .dot file suitable for GraphViz.
7 7
8 This is useful for debugging history redirect flows. 8 This is useful for debugging history redirect flows.
9 9
10 An example run of this program: 10 An example run of this program:
11 python /src/history-viz.py History > foo.dot 11 python /src/history-viz.py History > foo.dot
12 /c/Program\ Files/Graphviz2.18/bin/dot -Tpng foo.dot -o foo.png 12 /c/Program\ Files/Graphviz2.18/bin/dot -Tpng foo.dot -o foo.png
13 """ 13 """
14 14
15 import struct 15 import struct
16 import subprocess 16 import subprocess
17 import sys 17 import sys
18 import urlparse 18 import urlparse
19 19
20 class URL: 20
21 # Some transition types, copied from page_transition_types.h.
22 TRANS_TYPES = {
23 0: 'link',
24 1: 'typed',
25 2: 'most-visited',
26 3: 'auto subframe',
27 7: 'form',
28 }
29
30
31 class URL(object):
21 """Represents a broken-down URL from our most visited database.""" 32 """Represents a broken-down URL from our most visited database."""
22 33
23 def __init__(self, id, url): 34 def __init__(self, id, url):
24 """Initialize a new URL object. |id| is the database id of the URL.""" 35 """Initialize a new URL object. |id| is the database id of the URL."""
25 self.id = id 36 self.id = id
26 self.url = url 37 self.url = url
27 scheme, loc, path, query, fragment = urlparse.urlsplit(url) 38 scheme, loc, path, query, fragment = urlparse.urlsplit(url)
28 if scheme == 'http': 39 if scheme == 'http':
29 scheme = '' # Shorten for display purposes. 40 scheme = '' # Shorten for display purposes.
30 if len(scheme) > 0: 41 if len(scheme) > 0:
(...skipping 29 matching lines...) Expand all
60 if len(part) > MAX_LEN: 71 if len(part) > MAX_LEN:
61 part = part[0:MAX_LEN-3] + '...' 72 part = part[0:MAX_LEN-3] + '...'
62 if len(line)+len(part) > MAX_LEN: 73 if len(line)+len(part) > MAX_LEN:
63 lines.append(line) 74 lines.append(line)
64 line = '' 75 line = ''
65 line += part 76 line += part
66 if len(line) > 0: 77 if len(line) > 0:
67 lines.append(line) 78 lines.append(line)
68 return '\n'.join(lines) 79 return '\n'.join(lines)
69 80
70 class Edge: 81
82 class Edge(object):
71 """Represents an edge in the history graph, connecting two pages. 83 """Represents an edge in the history graph, connecting two pages.
72 84
73 If a link is traversed twice, it is one Edge with two entries in 85 If a link is traversed twice, it is one Edge with two entries in
74 the .transitions array.""" 86 the .transitions array."""
75 def __init__(self, src, dst): 87 def __init__(self, src, dst):
76 self.src = src 88 self.src = src
77 self.dst = dst 89 self.dst = dst
78 self.transitions = [] 90 self.transitions = []
79 91
80 def Transitions(self): 92 def Transitions(self):
81 """Return a dictionary mapping transition type -> occurences.""" 93 """Return a dictionary mapping transition type -> occurences."""
82 all = {} 94 all = {}
83 for trans in self.transitions: 95 for trans in self.transitions:
84 all[trans] = all.get(trans, 0) + 1 96 all[trans] = all.get(trans, 0) + 1
85 # We currently don't use the chain type. 97 # We currently don't use the chain type.
86 # TODO(evanm): make this a command-line option. 98 # TODO(evanm): make this a command-line option.
87 # if trans & 0x30000000 != 0: 99 # if trans & 0x30000000 != 0:
88 # chain = '' 100 # chain = ''
89 # if trans & 0x10000000: 101 # if trans & 0x10000000:
90 # chain = 'start' 102 # chain = 'start'
91 # if trans & 0x20000000: 103 # if trans & 0x20000000:
92 # if len(chain) == 0: 104 # if len(chain) == 0:
93 # chain = 'end' 105 # chain = 'end'
94 # else: 106 # else:
95 # chain = '' 107 # chain = ''
96 # if len(chain) > 0: 108 # if len(chain) > 0:
97 # edge['chain'] = chain 109 # edge['chain'] = chain
98 return all 110 return all
99 111
112
100 def ClusterBy(objs, pred): 113 def ClusterBy(objs, pred):
101 """Group a list of objects by a predicate. 114 """Group a list of objects by a predicate.
102 115
103 Given a list of objects and a predicate over the objects, return a 116 Given a list of objects and a predicate over the objects, return a
104 dictionary mapping pred(obj) -> all objs with the same pred(obj).""" 117 dictionary mapping pred(obj) -> all objs with the same pred(obj)."""
105 clusters = {} 118 clusters = {}
106 for obj in objs: 119 for obj in objs:
107 cluster = pred(obj) 120 cluster = pred(obj)
108 clusters[cluster] = clusters.get(cluster, []) 121 clusters[cluster] = clusters.get(cluster, [])
109 clusters[cluster].append(obj) 122 clusters[cluster].append(obj)
110 return clusters 123 return clusters
111 124
112 def EscapeDot(str): 125
126 def EscapeDot(string):
113 """Escape a string suitable for embedding in a graphviz graph.""" 127 """Escape a string suitable for embedding in a graphviz graph."""
114 # TODO(evanm): this is likely not sufficient. 128 # TODO(evanm): this is likely not sufficient.
115 return str.replace('\n', '\\n') 129 return string.replace('\n', '\\n')
116 130
117 class SQLite: 131
132 class SQLite(object):
118 """Trivial interface to executing SQLite queries. 133 """Trivial interface to executing SQLite queries.
119 Spawns a new process with each call.""" 134 Spawns a new process with each call."""
120 def __init__(self, file=None): 135 def __init__(self, file=None):
121 self.file = file 136 self.file = file
122 137
123 def Run(self, sql): 138 def Run(self, sql):
124 """Execute |sql|, yielding each row of results as an array.""" 139 """Execute |sql|, yielding each row of results as an array."""
125 subproc = subprocess.Popen(['sqlite', self.file], 140 subproc = subprocess.Popen(['sqlite', self.file],
126 stdin=subprocess.PIPE, 141 stdin=subprocess.PIPE,
127 stdout=subprocess.PIPE) 142 stdout=subprocess.PIPE)
128 subproc.stdin.write('.mode tabs\n') 143 subproc.stdin.write('.mode tabs\n')
129 subproc.stdin.write(sql + ';') 144 subproc.stdin.write(sql + ';')
130 subproc.stdin.close() 145 subproc.stdin.close()
131 for line in subproc.stdout: 146 for line in subproc.stdout:
132 row = line.strip().split('\t') 147 row = line.strip().split('\t')
133 yield row 148 yield row
134 149
150
135 def LoadHistory(filename): 151 def LoadHistory(filename):
136 db = SQLite(filename) 152 db = SQLite(filename)
137 153
138 urls = {} # Map of urlid => url. 154 urls = {} # Map of urlid => url.
139 urls['0'] = URL('0', 'start') # Node name '0' is our special 'start' node. 155 urls['0'] = URL('0', 'start') # Node name '0' is our special 'start' node.
140 for id, url in db.Run('SELECT id, url FROM urls'): 156 for id, url in db.Run('SELECT id, url FROM urls'):
141 urls[id] = URL(id, url) 157 urls[id] = URL(id, url)
142 158
143 visiturlids = {} # Map of visitid => urlid. 159 visiturlids = {} # Map of visitid => urlid.
144 visiturlids['0'] = '0' # '0' is our special 'start' node. 160 visiturlids['0'] = '0' # '0' is our special 'start' node.
145 edges = {} # Map of urlid->urlid->Edge. 161 edges = {} # Map of urlid->urlid->Edge.
146 for src, dst, url, trans in db.Run('SELECT from_visit, id, url, transition ' 162 for src, dst, url, trans in db.Run('SELECT from_visit, id, url, transition '
147 'FROM visits ORDER BY id'): 163 'FROM visits ORDER BY id'):
148 visiturlids[dst] = url 164 visiturlids[dst] = url
149 src = visiturlids[src] 165 src = visiturlids[src]
150 dst = visiturlids[dst] 166 dst = visiturlids[dst]
151 edges[src] = edges.get(src, {}) 167 edges[src] = edges.get(src, {})
152 edge = edges[src][dst] = edges[src].get(dst, Edge(src, dst)) 168 edge = edges[src][dst] = edges[src].get(dst, Edge(src, dst))
153 # SQLite outputs transition values as signed integers, but they're really 169 # SQLite outputs transition values as signed integers, but they're really
154 # a bitfield. Below does "unsigned trans = static_cast<unsigned>(trans)". 170 # a bitfield. Below does "unsigned trans = static_cast<unsigned>(trans)".
155 trans = struct.unpack('I', struct.pack('i', int(trans)))[0] 171 trans = struct.unpack('I', struct.pack('i', int(trans)))[0]
156 edge.transitions.append(trans) 172 edge.transitions.append(trans)
157 173
158 return urls, edges 174 return urls, edges
159 175
160 # Some transition types, copied from page_transition_types.h.
161 TRANS_TYPES = {
162 0: 'link',
163 1: 'typed',
164 2: 'most-visited',
165 3: 'auto subframe',
166 7: 'form',
167 }
168 176
169 urls, edges = LoadHistory(sys.argv[1]) 177 def main():
178 urls, edges = LoadHistory(sys.argv[1])
179 print 'digraph G {'
180 print ' graph [rankdir=LR]' # Display left to right.
181 print ' node [shape=box]' # Display nodes as boxes.
182 print ' subgraph { rank=source; 0 [label="start"] }'
170 183
171 print 'digraph G {' 184 # Output all the nodes within graph clusters.
172 print ' graph [rankdir=LR]' # Display left to right. 185 hosts = ClusterBy(urls.values(), lambda url: url.host)
173 print ' node [shape=box]' # Display nodes as boxes. 186 for i, (host, urls) in enumerate(hosts.items()):
174 print ' subgraph { rank=source; 0 [label="start"] }' 187 # Cluster all URLs under this host if it has more than one entry.
188 host_clustered = len(urls) > 1
189 if host_clustered:
190 print 'subgraph clusterhost%d {' % i
191 print ' label="%s"' % host
192 paths = ClusterBy(urls, lambda url: url.path)
193 for j, (path, urls) in enumerate(paths.items()):
194 # Cluster all URLs under this host if it has more than one entry.
195 path_clustered = host_clustered and len(urls) > 1
196 if path_clustered:
197 print ' subgraph cluster%d%d {' % (i, j)
198 print ' label="%s"' % path
199 for url in urls:
200 if url.id == '0': continue # We already output the special start node.
201 pretty = url.PrettyPrint(include_host=not host_clustered,
202 include_path=not path_clustered)
203 print ' %s [label="%s"]' % (url.id, EscapeDot(pretty))
204 if path_clustered:
205 print ' }'
206 if host_clustered:
207 print '}'
175 208
176 # Output all the nodes within graph clusters. 209 # Output all the edges between nodes.
177 hosts = ClusterBy(urls.values(), lambda url: url.host) 210 for src, dsts in edges.items():
178 for i, (host, urls) in enumerate(hosts.items()): 211 for dst, edge in dsts.items():
179 # Cluster all URLs under this host if it has more than one entry. 212 # Gather up all the transitions into the label.
180 host_clustered = len(urls) > 1 213 label = [] # Label for the edge.
181 if host_clustered: 214 transitions = edge.Transitions()
182 print 'subgraph clusterhost%d {' % i 215 for trans, count in transitions.items():
183 print ' label="%s"' % host 216 text = ''
184 paths = ClusterBy(urls, lambda url: url.path) 217 if count > 1:
185 for j, (path, urls) in enumerate(paths.items()): 218 text = '%dx ' % count
186 # Cluster all URLs under this host if it has more than one entry. 219 base_type = trans & 0xFF
187 path_clustered = host_clustered and len(urls) > 1 220 redir = (trans & 0xC0000000) != 0
188 if path_clustered: 221 start = (trans & 0x10000000) != 0
189 print ' subgraph cluster%d%d {' % (i, j) 222 end = (trans & 0x20000000) != 0
190 print ' label="%s"' % path 223 if start or end:
191 for url in urls: 224 if start:
192 if url.id == '0': continue # We already output the special start node. 225 text += '<'
193 pretty = url.PrettyPrint(include_host=not host_clustered, 226 if end:
194 include_path=not path_clustered) 227 text += '>'
195 print ' %s [label="%s"]' % (url.id, EscapeDot(pretty)) 228 text += ' '
196 if path_clustered: 229 if redir:
197 print ' }' 230 text += 'R '
198 if host_clustered: 231 text += TRANS_TYPES.get(base_type, 'trans%d' % base_type)
199 print '}' 232 label.append(text)
233 if len(label) == 0:
234 continue
200 235
201 # Output all the edges between nodes. 236 edgeattrs = [] # Graphviz attributes for the edge.
202 for src, dsts in edges.items(): 237 # If the edge is from the start and the transitions are fishy, make it
203 for dst, edge in dsts.items(): 238 # display as a dotted line.
204 # Gather up all the transitions into the label. 239 if src == '0' and len(transitions.keys()) == 1 and transitions.has_key(0):
205 label = [] # Label for the edge. 240 edgeattrs.append('style=dashed')
206 transitions = edge.Transitions() 241 if len(label) > 0:
207 for trans, count in transitions.items(): 242 edgeattrs.append('label="%s"' % EscapeDot('\n'.join(label)))
208 text = ''
209 if count > 1:
210 text = '%dx ' % count
211 base_type = trans & 0xFF
212 redir = (trans & 0xC0000000) != 0
213 start = (trans & 0x10000000) != 0
214 end = (trans & 0x20000000) != 0
215 if start or end:
216 if start:
217 text += '<'
218 if end:
219 text += '>'
220 text += ' '
221 if redir:
222 text += 'R '
223 text += TRANS_TYPES.get(base_type, 'trans%d' % base_type)
224 label.append(text)
225 if len(label) == 0:
226 continue
227 243
228 edgeattrs = [] # Graphviz attributes for the edge. 244 out = '%s -> %s' % (src, dst)
229 # If the edge is from the start and the transitions are fishy, make it 245 if len(edgeattrs) > 0:
230 # display as a dotted line. 246 out += ' [%s]' % ','.join(edgeattrs)
231 if src == '0' and len(transitions.keys()) == 1 and transitions.has_key(0): 247 print out
232 edgeattrs.append('style=dashed') 248 print '}'
233 if len(label) > 0: 249 return 0
234 edgeattrs.append('label="%s"' % EscapeDot('\n'.join(label)))
235 250
236 out = '%s -> %s' % (src, dst)
237 if len(edgeattrs) > 0:
238 out += ' [%s]' % ','.join(edgeattrs)
239 print out
240 print '}'
241 251
252 if __name__ == '__main__':
253 sys.exit(main())
OLDNEW
« no previous file with comments | « chrome/tools/extract_histograms.py ('k') | chrome/tools/inconsistent-eol.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698