Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(25)

Side by Side Diff: tools/android/loading/resource_sack.py

Issue 1837193002: Clovis: update resource sack to use new dependency graph. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « tools/android/loading/graph.py ('k') | tools/android/loading/resource_sack_display.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2016 The Chromium Authors. All rights reserved. 1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 """A collection of ResourceGraphs. 5 """A collection of ResourceGraphs.
6 6
7 Processes multiple ResourceGraphs, all presumably from requests to the same 7 Processes multiple ResourceGraphs, all presumably from requests to the same
8 site. Common urls are collected in Bags and different statistics on the 8 site. Common urls are collected in Bags and different statistics on the
9 relationship between bags are collected. 9 relationship between bags are collected.
10 """ 10 """
11 11
12 import collections 12 import collections
13 import json 13 import json
14 import sys 14 import sys
15 import urlparse 15 import urlparse
16 16
17 from collections import defaultdict 17 from collections import defaultdict
18 18
19 import content_classification_lens 19 import content_classification_lens
20 import dag 20 import graph
21 import user_satisfied_lens 21 import user_satisfied_lens
22 22
23 class GraphSack(object): 23 class GraphSack(object):
24 """Aggreate of ResourceGraphs. 24 """Aggreate of RequestDependencyGraphs.
25 25
26 Collects ResourceGraph nodes into bags, where each bag contains the nodes with 26 Collects RequestDependencyGraph nodes into bags, where each bag contains the
27 common urls. Dependency edges are tracked between bags (so that each bag may 27 nodes with common urls. Dependency edges are tracked between bags (so that
28 be considered as a node of a graph). This graph of bags is referred to as a 28 each bag may be considered as a node of a graph). This graph of bags is
29 sack. 29 referred to as a sack.
30 30
31 Each bag is associated with a dag.Node, even though the bag graph may not be a 31 Each bag is associated with a dag.Node, even though the bag graph may not be a
32 DAG. The edges are annotated with list of graphs and nodes that generated 32 DAG. The edges are annotated with list of graphs and nodes that generated
33 them. 33 them.
34 """ 34 """
35 # See CoreSet(). 35 # See CoreSet().
36 CORE_THRESHOLD = 0.8 36 CORE_THRESHOLD = 0.8
37 37
38 _GraphInfo = collections.namedtuple('_GraphInfo', ( 38 _GraphInfo = collections.namedtuple('_GraphInfo', (
39 'cost', # The graph cost (aka critical path length). 39 'cost', # The graph cost (aka critical path length).
40 'total_costs', # A vector by node index of total cost of each node.
41 )) 40 ))
42 41
43 def __init__(self): 42 def __init__(self):
44 # A bag is a node in our combined graph. 43 # Each bag in our sack is named as indicated by this map.
45 self._bags = [] 44 self._name_to_bag = {}
46 # Each bag in our sack corresponds to a url, as expressed by this map. 45 # List our edges by bag pairs: (from_bag, to_bag) -> graph.Edge.
47 self._url_to_bag = {} 46 self._edges = {}
48 # Maps graph -> _GraphInfo structures for each graph we've consumed. 47 # Maps graph -> _GraphInfo structures for each graph we've consumed.
49 self._graph_info = {} 48 self._graph_info = {}
50 49
51 def ConsumeGraph(self, graph): 50 # Our graph, updated after each ConsumeGraph.
51 self._graph = None
52
53 def ConsumeGraph(self, request_graph):
52 """Add a graph and process. 54 """Add a graph and process.
53 55
54 Args: 56 Args:
55 graph: (ResourceGraph) the graph to add. The graph is processed sorted 57 graph: (RequestDependencyGraph) the graph to add. The graph is processed
56 according to its current filter. 58 sorted according to its current filter.
Benoit L 2016/03/29 13:45:50 nit: There are no filters anymore.
mattcary 2016/03/29 13:56:07 Done.
57 """ 59 """
58 assert graph not in self._graph_info 60 assert graph not in self._graph_info
59 critical_path = [] 61 cost = request_graph.Cost()
60 total_costs = [] 62 self._graph_info[request_graph] = self._GraphInfo(cost=cost)
61 cost = graph.Cost(path_list=critical_path, 63 for n in request_graph.graph.Nodes():
Benoit L 2016/03/29 13:45:50 The nodes are no longer sorted, is it an issue? (b
mattcary 2016/03/29 13:56:07 No. The idea was that because adding nodes transit
62 costs_out=total_costs) 64 self.AddNode(request_graph, n)
63 self._graph_info[graph] = self._GraphInfo(
64 cost=cost, total_costs=total_costs)
65 for n in graph.Nodes(sort=True):
66 assert graph._node_filter(n.Node())
67 self.AddNode(graph, n)
68 for node in critical_path:
69 self._url_to_bag[node.Url()].MarkCritical()
70 65
71 def AddNode(self, graph, node): 66 # TODO(mattcary): this is inefficient but our current API doesn't require an
67 # explicit graph creation from the client.
68 self._graph = graph.DirectedGraph(self.bags, self._edges.itervalues())
69
70 def AddNode(self, request_graph, node):
72 """Add a node to our collection. 71 """Add a node to our collection.
73 72
74 Args: 73 Args:
75 graph: (ResourceGraph) the graph in which the node lives. 74 graph: (RequestDependencyGraph) the graph in which the node lives.
76 node: (NodeInfo) the node to add. 75 node: (RequestDependencyGraph node) the node to add.
77 76
78 Returns: 77 Returns:
79 The Bag containing the node. 78 The Bag containing the node.
80 """ 79 """
81 if not graph._node_filter(node): 80 sack_name = self._GetSackName(node)
82 return 81 if sack_name not in self._name_to_bag:
83 if node.Url() not in self._url_to_bag: 82 self._name_to_bag[sack_name] = Bag(self, sack_name)
84 new_index = len(self._bags) 83 bag = self._name_to_bag[sack_name]
85 self._bags.append(Bag(self, new_index, node.Url())) 84 bag.AddNode(request_graph, node)
86 self._url_to_bag[node.Url()] = self._bags[-1] 85 return bag
87 self._url_to_bag[node.Url()].AddNode(graph, node) 86
88 return self._url_to_bag[node.Url()] 87 def AddEdge(self, from_bag, to_bag):
88 """Add an edge between two bags."""
89 if (from_bag, to_bag) not in self._edges:
90 self._edges[(from_bag, to_bag)] = graph.Edge(from_bag, to_bag)
89 91
90 def CoreSet(self, *graph_sets): 92 def CoreSet(self, *graph_sets):
91 """Compute the core set of this sack. 93 """Compute the core set of this sack.
92 94
93 The core set of a sack is the set of resource that are common to most of the 95 The core set of a sack is the set of resource that are common to most of the
94 graphs in the sack. A core set of a set of graphs are the resources that 96 graphs in the sack. A core set of a set of graphs are the resources that
95 appear with frequency at least CORE_THRESHOLD. For a collection of graph 97 appear with frequency at least CORE_THRESHOLD. For a collection of graph
96 sets, for instance pulling the same page under different network 98 sets, for instance pulling the same page under different network
97 connections, we intersect the core sets to produce a page core set that 99 connections, we intersect the core sets to produce a page core set that
98 describes the key resources used by the page. See https://goo.gl/LmqQRS for 100 describes the key resources used by the page. See https://goo.gl/LmqQRS for
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
134 @property 136 @property
135 def num_graphs(self): 137 def num_graphs(self):
136 return len(self.graph_info) 138 return len(self.graph_info)
137 139
138 @property 140 @property
139 def graph_info(self): 141 def graph_info(self):
140 return self._graph_info 142 return self._graph_info
141 143
142 @property 144 @property
143 def bags(self): 145 def bags(self):
144 return self._bags 146 return self._name_to_bag.values()
145 147
146 def _SingleCore(self, graph_set): 148 def _SingleCore(self, graph_set):
147 core = set() 149 core = set()
148 graph_set = set(graph_set) 150 graph_set = set(graph_set)
149 num_graphs = len(graph_set) 151 num_graphs = len(graph_set)
150 for b in self.bags: 152 for b in self.bags:
151 count = sum([g in graph_set for g in b.graphs]) 153 count = sum([g in graph_set for g in b.graphs])
152 if float(count) / num_graphs > self.CORE_THRESHOLD: 154 if float(count) / num_graphs > self.CORE_THRESHOLD:
153 core.add(b.label) 155 core.add(b.label)
154 return core 156 return core
155 157
158 def _GetSackName(self, node):
159 return self._MakeShortname(node.request.url)
156 160
157 class Bag(dag.Node): 161 @classmethod
158 def __init__(self, sack, index, url): 162 def _MakeShortname(cls, url):
Benoit L 2016/03/29 13:45:50 nit: Add a TODO(lizeb) to move this method elsewhe
mattcary 2016/03/29 13:56:07 Done. Note it's not exactly copied... but I agree
159 super(Bag, self).__init__(index) 163 parsed = urlparse.urlparse(url)
164 if parsed.scheme == 'data':
165 if ';' in parsed.path:
166 kind, _ = parsed.path.split(';', 1)
167 else:
168 kind, _ = parsed.path.split(',', 1)
169 return 'data:' + kind
170 path = parsed.path[:10]
171 hostname = parsed.hostname if parsed.hostname else '?.?.?'
172 return hostname + '/' + path
173
174
175 class Bag(graph.Node):
176 def __init__(self, sack, label):
177 super(Bag, self).__init__()
160 self._sack = sack 178 self._sack = sack
161 self._url = url 179 self._label = label
162 self._label = self._MakeShortname(url)
163 # Maps a ResourceGraph to its Nodes contained in this Bag. 180 # Maps a ResourceGraph to its Nodes contained in this Bag.
164 self._graphs = defaultdict(set) 181 self._graphs = defaultdict(set)
165 # Maps each successor bag to the set of (graph, node, graph-successor)
166 # tuples that generated it.
167 self._successor_sources = defaultdict(set)
168 # Maps each successor bag to a set of edge costs. This is just used to
169 # track min and max; if we want more statistics we'd have to count the
170 # costs with multiplicity.
171 self._successor_edge_costs = defaultdict(set)
172
173 # Miscellaneous counts and costs used in display.
174 self._total_costs = []
175 self._relative_costs = []
176 self._num_critical = 0
177
178 @property
179 def url(self):
180 return self._url
181 182
182 @property 183 @property
183 def label(self): 184 def label(self):
184 return self._label 185 return self._label
185 186
186 @property 187 @property
187 def graphs(self): 188 def graphs(self):
188 return self._graphs 189 return self._graphs.iterkeys()
189
190 @property
191 def successor_sources(self):
192 return self._successor_sources
193
194 @property
195 def successor_edge_costs(self):
196 return self._successor_edge_costs
197
198 @property
199 def total_costs(self):
200 return self._total_costs
201
202 @property
203 def relative_costs(self):
204 return self._relative_costs
205
206 @property
207 def num_critical(self):
208 return self._num_critical
209 190
210 @property 191 @property
211 def num_nodes(self): 192 def num_nodes(self):
212 return len(self._total_costs) 193 return sum(len(g) for g in self._graphs.itervalues())
213 194
214 def MarkCritical(self): 195 def AddNode(self, request_graph, node):
215 self._num_critical += 1 196 if node in self._graphs[request_graph]:
216
217 def AddNode(self, graph, node):
218 if node in self._graphs[graph]:
219 return # Already added. 197 return # Already added.
220 graph_info = self._sack.graph_info[graph] 198 self._graphs[request_graph].add(node)
221 self._graphs[graph].add(node) 199 for edge in request_graph.graph.OutEdges(node):
222 node_total_cost = graph_info.total_costs[node.Index()] 200 out_bag = self._sack.AddNode(request_graph, edge.to_node)
223 self._total_costs.append(node_total_cost) 201 self._sack.AddEdge(self, out_bag)
224 self._relative_costs.append(
225 float(node_total_cost) / graph_info.cost)
226 for s in node.Node().Successors():
227 if not graph._node_filter(s):
228 continue
229 node_info = graph.NodeInfo(s)
230 successor_bag = self._sack.AddNode(graph, node_info)
231 self.AddSuccessor(successor_bag)
232 self._successor_sources[successor_bag].add((graph, node, s))
233 self._successor_edge_costs[successor_bag].add(graph.EdgeCost(node, s))
234
235 @classmethod
236 def _MakeShortname(cls, url):
237 parsed = urlparse.urlparse(url)
238 if parsed.scheme == 'data':
239 if ';' in parsed.path:
240 kind, _ = parsed.path.split(';', 1)
241 else:
242 kind, _ = parsed.path.split(',', 1)
243 return 'data:' + kind
244 path = parsed.path[:10]
245 hostname = parsed.hostname if parsed.hostname else '?.?.?'
246 return hostname + '/' + path
OLDNEW
« no previous file with comments | « tools/android/loading/graph.py ('k') | tools/android/loading/resource_sack_display.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698