chrome/tools/history-viz.py - Issue 8680018: Fix python scripts in src/chrome/

Side by Side Diff: chrome/tools/history-viz.py

Issue 8680018: Fix python scripts in src/chrome/ (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: copyright Created 9 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 #!/usr/bin/python	1 #!/usr/bin/env python

2 # Copyright (c) 2009 The Chromium Authors. All rights reserved.	2 # Copyright (c) 2011 The Chromium Authors. All rights reserved.

3 # Use of this source code is governed by a BSD-style license that can be	3 # Use of this source code is governed by a BSD-style license that can be

4 # found in the LICENSE file.	4 # found in the LICENSE file.

5	5

6 """Process a History database and dump a .dot file suitable for GraphViz.	6 """Process a History database and dump a .dot file suitable for GraphViz.

7	7

8 This is useful for debugging history redirect flows.	8 This is useful for debugging history redirect flows.

9	9

10 An example run of this program:	10 An example run of this program:

11 python /src/history-viz.py History > foo.dot	11 python /src/history-viz.py History > foo.dot

12 /c/Program\ Files/Graphviz2.18/bin/dot -Tpng foo.dot -o foo.png	12 /c/Program\ Files/Graphviz2.18/bin/dot -Tpng foo.dot -o foo.png

13 """	13 """

14	14

15 import struct	15 import struct

16 import subprocess	16 import subprocess

17 import sys	17 import sys

18 import urlparse	18 import urlparse

19	19

20 class URL:	20

	21 # Some transition types, copied from page_transition_types.h.

	22 TRANS_TYPES = {

	23 0: 'link',

	24 1: 'typed',

	25 2: 'most-visited',

	26 3: 'auto subframe',

	27 7: 'form',

	28 }

	29

	30

	31 class URL(object):

21 """Represents a broken-down URL from our most visited database."""	32 """Represents a broken-down URL from our most visited database."""

22	33

23 def __init__(self, id, url):	34 def __init__(self, id, url):

24 """Initialize a new URL object. \|id\| is the database id of the URL."""	35 """Initialize a new URL object. \|id\| is the database id of the URL."""

25 self.id = id	36 self.id = id

26 self.url = url	37 self.url = url

27 scheme, loc, path, query, fragment = urlparse.urlsplit(url)	38 scheme, loc, path, query, fragment = urlparse.urlsplit(url)

28 if scheme == 'http':	39 if scheme == 'http':

29 scheme = '' # Shorten for display purposes.	40 scheme = '' # Shorten for display purposes.

30 if len(scheme) > 0:	41 if len(scheme) > 0:

(...skipping 29 matching lines...) Expand all Loading...
60 if len(part) > MAX_LEN:	71 if len(part) > MAX_LEN:

61 part = part[0:MAX_LEN-3] + '...'	72 part = part[0:MAX_LEN-3] + '...'

62 if len(line)+len(part) > MAX_LEN:	73 if len(line)+len(part) > MAX_LEN:

63 lines.append(line)	74 lines.append(line)

64 line = ''	75 line = ''

65 line += part	76 line += part

66 if len(line) > 0:	77 if len(line) > 0:

67 lines.append(line)	78 lines.append(line)

68 return '\n'.join(lines)	79 return '\n'.join(lines)

69	80

70 class Edge:	81

	82 class Edge(object):

71 """Represents an edge in the history graph, connecting two pages.	83 """Represents an edge in the history graph, connecting two pages.

72	84

73 If a link is traversed twice, it is one Edge with two entries in	85 If a link is traversed twice, it is one Edge with two entries in

74 the .transitions array."""	86 the .transitions array."""

75 def __init__(self, src, dst):	87 def __init__(self, src, dst):

76 self.src = src	88 self.src = src

77 self.dst = dst	89 self.dst = dst

78 self.transitions = []	90 self.transitions = []

79	91

80 def Transitions(self):	92 def Transitions(self):

81 """Return a dictionary mapping transition type -> occurences."""	93 """Return a dictionary mapping transition type -> occurences."""

82 all = {}	94 all = {}

83 for trans in self.transitions:	95 for trans in self.transitions:

84 all[trans] = all.get(trans, 0) + 1	96 all[trans] = all.get(trans, 0) + 1

85 # We currently don't use the chain type.	97 # We currently don't use the chain type.

86 # TODO(evanm): make this a command-line option.	98 # TODO(evanm): make this a command-line option.

87 # if trans & 0x30000000 != 0:	99 # if trans & 0x30000000 != 0:

88 # chain = ''	100 # chain = ''

89 # if trans & 0x10000000:	101 # if trans & 0x10000000:

90 # chain = 'start'	102 # chain = 'start'

91 # if trans & 0x20000000:	103 # if trans & 0x20000000:

92 # if len(chain) == 0:	104 # if len(chain) == 0:

93 # chain = 'end'	105 # chain = 'end'

94 # else:	106 # else:

95 # chain = ''	107 # chain = ''

96 # if len(chain) > 0:	108 # if len(chain) > 0:

97 # edge['chain'] = chain	109 # edge['chain'] = chain

98 return all	110 return all

99	111

	112

100 def ClusterBy(objs, pred):	113 def ClusterBy(objs, pred):

101 """Group a list of objects by a predicate.	114 """Group a list of objects by a predicate.

102	115

103 Given a list of objects and a predicate over the objects, return a	116 Given a list of objects and a predicate over the objects, return a

104 dictionary mapping pred(obj) -> all objs with the same pred(obj)."""	117 dictionary mapping pred(obj) -> all objs with the same pred(obj)."""

105 clusters = {}	118 clusters = {}

106 for obj in objs:	119 for obj in objs:

107 cluster = pred(obj)	120 cluster = pred(obj)

108 clusters[cluster] = clusters.get(cluster, [])	121 clusters[cluster] = clusters.get(cluster, [])

109 clusters[cluster].append(obj)	122 clusters[cluster].append(obj)

110 return clusters	123 return clusters

111	124

112 def EscapeDot(str):	125

	126 def EscapeDot(string):

113 """Escape a string suitable for embedding in a graphviz graph."""	127 """Escape a string suitable for embedding in a graphviz graph."""

114 # TODO(evanm): this is likely not sufficient.	128 # TODO(evanm): this is likely not sufficient.

115 return str.replace('\n', '\\n')	129 return string.replace('\n', '\\n')

116	130

117 class SQLite:	131

	132 class SQLite(object):

118 """Trivial interface to executing SQLite queries.	133 """Trivial interface to executing SQLite queries.

119 Spawns a new process with each call."""	134 Spawns a new process with each call."""

120 def __init__(self, file=None):	135 def __init__(self, file=None):

121 self.file = file	136 self.file = file

122	137

123 def Run(self, sql):	138 def Run(self, sql):

124 """Execute \|sql\|, yielding each row of results as an array."""	139 """Execute \|sql\|, yielding each row of results as an array."""

125 subproc = subprocess.Popen(['sqlite', self.file],	140 subproc = subprocess.Popen(['sqlite', self.file],

126 stdin=subprocess.PIPE,	141 stdin=subprocess.PIPE,

127 stdout=subprocess.PIPE)	142 stdout=subprocess.PIPE)

128 subproc.stdin.write('.mode tabs\n')	143 subproc.stdin.write('.mode tabs\n')

129 subproc.stdin.write(sql + ';')	144 subproc.stdin.write(sql + ';')

130 subproc.stdin.close()	145 subproc.stdin.close()

131 for line in subproc.stdout:	146 for line in subproc.stdout:

132 row = line.strip().split('\t')	147 row = line.strip().split('\t')

133 yield row	148 yield row

134	149

	150

135 def LoadHistory(filename):	151 def LoadHistory(filename):

136 db = SQLite(filename)	152 db = SQLite(filename)

137	153

138 urls = {} # Map of urlid => url.	154 urls = {} # Map of urlid => url.

139 urls['0'] = URL('0', 'start') # Node name '0' is our special 'start' node.	155 urls['0'] = URL('0', 'start') # Node name '0' is our special 'start' node.

140 for id, url in db.Run('SELECT id, url FROM urls'):	156 for id, url in db.Run('SELECT id, url FROM urls'):

141 urls[id] = URL(id, url)	157 urls[id] = URL(id, url)

142	158

143 visiturlids = {} # Map of visitid => urlid.	159 visiturlids = {} # Map of visitid => urlid.

144 visiturlids['0'] = '0' # '0' is our special 'start' node.	160 visiturlids['0'] = '0' # '0' is our special 'start' node.

145 edges = {} # Map of urlid->urlid->Edge.	161 edges = {} # Map of urlid->urlid->Edge.

146 for src, dst, url, trans in db.Run('SELECT from_visit, id, url, transition '	162 for src, dst, url, trans in db.Run('SELECT from_visit, id, url, transition '

147 'FROM visits ORDER BY id'):	163 'FROM visits ORDER BY id'):

148 visiturlids[dst] = url	164 visiturlids[dst] = url

149 src = visiturlids[src]	165 src = visiturlids[src]

150 dst = visiturlids[dst]	166 dst = visiturlids[dst]

151 edges[src] = edges.get(src, {})	167 edges[src] = edges.get(src, {})

152 edge = edges[src][dst] = edges[src].get(dst, Edge(src, dst))	168 edge = edges[src][dst] = edges[src].get(dst, Edge(src, dst))

153 # SQLite outputs transition values as signed integers, but they're really	169 # SQLite outputs transition values as signed integers, but they're really

154 # a bitfield. Below does "unsigned trans = static_cast<unsigned>(trans)".	170 # a bitfield. Below does "unsigned trans = static_cast<unsigned>(trans)".

155 trans = struct.unpack('I', struct.pack('i', int(trans)))[0]	171 trans = struct.unpack('I', struct.pack('i', int(trans)))[0]

156 edge.transitions.append(trans)	172 edge.transitions.append(trans)

157	173

158 return urls, edges	174 return urls, edges

159	175

160 # Some transition types, copied from page_transition_types.h.

161 TRANS_TYPES = {

162 0: 'link',

163 1: 'typed',

164 2: 'most-visited',

165 3: 'auto subframe',

166 7: 'form',

167 }

168	176

169 urls, edges = LoadHistory(sys.argv[1])	177 def main():

	178 urls, edges = LoadHistory(sys.argv[1])

	179 print 'digraph G {'

	180 print ' graph [rankdir=LR]' # Display left to right.

	181 print ' node [shape=box]' # Display nodes as boxes.

	182 print ' subgraph { rank=source; 0 [label="start"] }'

170	183

171 print 'digraph G {'	184 # Output all the nodes within graph clusters.

172 print ' graph [rankdir=LR]' # Display left to right.	185 hosts = ClusterBy(urls.values(), lambda url: url.host)

173 print ' node [shape=box]' # Display nodes as boxes.	186 for i, (host, urls) in enumerate(hosts.items()):

174 print ' subgraph { rank=source; 0 [label="start"] }'	187 # Cluster all URLs under this host if it has more than one entry.

	188 host_clustered = len(urls) > 1

	189 if host_clustered:

	190 print 'subgraph clusterhost%d {' % i

	191 print ' label="%s"' % host

	192 paths = ClusterBy(urls, lambda url: url.path)

	193 for j, (path, urls) in enumerate(paths.items()):

	194 # Cluster all URLs under this host if it has more than one entry.

	195 path_clustered = host_clustered and len(urls) > 1

	196 if path_clustered:

	197 print ' subgraph cluster%d%d {' % (i, j)

	198 print ' label="%s"' % path

	199 for url in urls:

	200 if url.id == '0': continue # We already output the special start node.

	201 pretty = url.PrettyPrint(include_host=not host_clustered,

	202 include_path=not path_clustered)

	203 print ' %s [label="%s"]' % (url.id, EscapeDot(pretty))

	204 if path_clustered:

	205 print ' }'

	206 if host_clustered:

	207 print '}'

175	208

176 # Output all the nodes within graph clusters.	209 # Output all the edges between nodes.

177 hosts = ClusterBy(urls.values(), lambda url: url.host)	210 for src, dsts in edges.items():

178 for i, (host, urls) in enumerate(hosts.items()):	211 for dst, edge in dsts.items():

179 # Cluster all URLs under this host if it has more than one entry.	212 # Gather up all the transitions into the label.

180 host_clustered = len(urls) > 1	213 label = [] # Label for the edge.

181 if host_clustered:	214 transitions = edge.Transitions()

182 print 'subgraph clusterhost%d {' % i	215 for trans, count in transitions.items():

183 print ' label="%s"' % host	216 text = ''

184 paths = ClusterBy(urls, lambda url: url.path)	217 if count > 1:

185 for j, (path, urls) in enumerate(paths.items()):	218 text = '%dx ' % count

186 # Cluster all URLs under this host if it has more than one entry.	219 base_type = trans & 0xFF

187 path_clustered = host_clustered and len(urls) > 1	220 redir = (trans & 0xC0000000) != 0

188 if path_clustered:	221 start = (trans & 0x10000000) != 0

189 print ' subgraph cluster%d%d {' % (i, j)	222 end = (trans & 0x20000000) != 0

190 print ' label="%s"' % path	223 if start or end:

191 for url in urls:	224 if start:

192 if url.id == '0': continue # We already output the special start node.	225 text += '<'

193 pretty = url.PrettyPrint(include_host=not host_clustered,	226 if end:

194 include_path=not path_clustered)	227 text += '>'

195 print ' %s [label="%s"]' % (url.id, EscapeDot(pretty))	228 text += ' '

196 if path_clustered:	229 if redir:

197 print ' }'	230 text += 'R '

198 if host_clustered:	231 text += TRANS_TYPES.get(base_type, 'trans%d' % base_type)

199 print '}'	232 label.append(text)

	233 if len(label) == 0:

	234 continue

200	235

201 # Output all the edges between nodes.	236 edgeattrs = [] # Graphviz attributes for the edge.

202 for src, dsts in edges.items():	237 # If the edge is from the start and the transitions are fishy, make it

203 for dst, edge in dsts.items():	238 # display as a dotted line.

204 # Gather up all the transitions into the label.	239 if src == '0' and len(transitions.keys()) == 1 and transitions.has_key(0):

205 label = [] # Label for the edge.	240 edgeattrs.append('style=dashed')

206 transitions = edge.Transitions()	241 if len(label) > 0:

207 for trans, count in transitions.items():	242 edgeattrs.append('label="%s"' % EscapeDot('\n'.join(label)))

208 text = ''

209 if count > 1:

210 text = '%dx ' % count

211 base_type = trans & 0xFF

212 redir = (trans & 0xC0000000) != 0

213 start = (trans & 0x10000000) != 0

214 end = (trans & 0x20000000) != 0

215 if start or end:

216 if start:

217 text += '<'

218 if end:

219 text += '>'

220 text += ' '

221 if redir:

222 text += 'R '

223 text += TRANS_TYPES.get(base_type, 'trans%d' % base_type)

224 label.append(text)

225 if len(label) == 0:

226 continue

227	243

228 edgeattrs = [] # Graphviz attributes for the edge.	244 out = '%s -> %s' % (src, dst)

229 # If the edge is from the start and the transitions are fishy, make it	245 if len(edgeattrs) > 0:

230 # display as a dotted line.	246 out += ' [%s]' % ','.join(edgeattrs)

231 if src == '0' and len(transitions.keys()) == 1 and transitions.has_key(0):	247 print out

232 edgeattrs.append('style=dashed')	248 print '}'

233 if len(label) > 0:	249 return 0

234 edgeattrs.append('label="%s"' % EscapeDot('\n'.join(label)))

235	250

236 out = '%s -> %s' % (src, dst)

237 if len(edgeattrs) > 0:

238 out += ' [%s]' % ','.join(edgeattrs)

239 print out

240 print '}'

241	251

	252 if __name__ == '__main__':

	253 sys.exit(main())

OLD	NEW

« no previous file with comments | « chrome/tools/extract_histograms.py ('k') | chrome/tools/inconsistent-eol.py » ('j') | no next file with comments »