Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(417)

Side by Side Diff: tools/android/loading/loading_model.py

Issue 1610273002: Upgrade loading_model to use the new request_track and loading_trace. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: rebase Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | tools/android/loading/loading_model_unittest.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2015 The Chromium Authors. All rights reserved. 1 # Copyright 2015 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 """Models for loading in chrome. 5 """Models for loading in chrome.
6 6
7 (Redirect the following to the general model module once we have one) 7 (Redirect the following to the general model module once we have one)
8 A model is an object with the following methods. 8 A model is an object with the following methods.
9 CostMs(): return the cost of the cost in milliseconds. 9 CostMs(): return the cost of the model in milliseconds.
10 Set(): set model-specifical parameters. 10 Set(): set model-specific parameters.
11 11
12 ResourceGraph 12 ResourceGraph
13 This creates a DAG of resource dependancies from loading.log_requests to model 13 This creates a DAG of resource dependencies from loading.log_requests to model
14 loading time. The model may be parameterized by changing the loading time of 14 loading time. The model may be parameterized by changing the loading time of
15 a particular or all resources. 15 a particular or all resources.
16 """ 16 """
17 17
18 import logging 18 import logging
19 import os 19 import os
20 import urlparse 20 import urlparse
21 import sys 21 import sys
22 22
23 import dag 23 import dag
24 import log_parser 24 import request_dependencies_lens
25 25
26 class ResourceGraph(object): 26 class ResourceGraph(object):
27 """A model of loading by a DAG (tree?) of resource dependancies. 27 """A model of loading by a DAG (tree?) of resource dependancies.
28 28
29 Set parameters: 29 Set parameters:
30 cache_all: if true, assume zero loading time for all resources. 30 cache_all: if true, assume zero loading time for all resources.
31 """ 31 """
32 32 def __init__(self, trace):
33 def __init__(self, requests): 33 """Create from a LoadingTrace.
34 """Create from a parsed request set.
35 34
36 Args: 35 Args:
37 requests: [RequestData, ...] filtered RequestData from loading.log_parser. 36 trace: (LoadingTrace) Loading trace.
38 """ 37 """
39 self._BuildDag(requests) 38 self._BuildDag(trace)
40 self._global_start = min([n.StartTime() for n in self._node_info]) 39 self._global_start = min([n.StartTime() for n in self._node_info])
41 # Sort before splitting children so that we can correctly dectect if a 40 # Sort before splitting children so that we can correctly dectect if a
42 # reparented child is actually a dependency for a child of its new parent. 41 # reparented child is actually a dependency for a child of its new parent.
43 try: 42 try:
44 for n in dag.TopologicalSort(self._nodes): 43 for n in dag.TopologicalSort(self._nodes):
45 self._SplitChildrenByTime(self._node_info[n.Index()]) 44 self._SplitChildrenByTime(self._node_info[n.Index()])
46 except AssertionError as exc: 45 except AssertionError as exc:
47 sys.stderr.write('Bad topological sort: %s\n' 46 sys.stderr.write('Bad topological sort: %s\n'
48 'Skipping child split\n' % str(exc)) 47 'Skipping child split\n' % str(exc))
49 self._cache_all = False 48 self._cache_all = False
(...skipping 125 matching lines...) Expand 10 before | Expand all | Expand 10 after
175 costs[n.Index()] = cost 174 costs[n.Index()] = cost
176 max_cost = max(costs) 175 max_cost = max(costs)
177 assert max_cost > 0 # Otherwise probably the filter went awry. 176 assert max_cost > 0 # Otherwise probably the filter went awry.
178 if path_list is not None: 177 if path_list is not None:
179 del path_list[:-1] 178 del path_list[:-1]
180 n = (i for i in self._nodes if costs[i.Index()] == max_cost).next() 179 n = (i for i in self._nodes if costs[i.Index()] == max_cost).next()
181 path_list.append(self._node_info[n.Index()]) 180 path_list.append(self._node_info[n.Index()])
182 while n.Predecessors(): 181 while n.Predecessors():
183 n = reduce(lambda costliest, next: 182 n = reduce(lambda costliest, next:
184 next if (self._node_filter(next) and 183 next if (self._node_filter(next) and
185 cost[next.Index()] > cost[costliest.Index()]) 184 costs[next.Index()] > costs[costliest.Index()])
186 else costliest, 185 else costliest,
187 n.Predecessors()) 186 n.Predecessors())
188 path_list.insert(0, self._node_info[n.Index()]) 187 path_list.insert(0, self._node_info[n.Index()])
189 return max_cost 188 return max_cost
190 189
191 def FilterAds(self, node): 190 def FilterAds(self, node):
192 """A filter for use in eg, Cost, to remove advertising nodes. 191 """A filter for use in eg, Cost, to remove advertising nodes.
193 192
194 Args: 193 Args:
195 node: A dag.Node. 194 node: A dag.Node.
(...skipping 119 matching lines...) Expand 10 before | Expand all | Expand 10 after
315 """Create a new node info. 314 """Create a new node info.
316 315
317 Args: 316 Args:
318 node: The node to augment. 317 node: The node to augment.
319 request: The request associated with this node. 318 request: The request associated with this node.
320 """ 319 """
321 self._request = request 320 self._request = request
322 self._node = node 321 self._node = node
323 self._edge_costs = {} 322 self._edge_costs = {}
324 self._edge_annotations = {} 323 self._edge_annotations = {}
325 # All fields in timing are millis relative to requestTime, which is epoch 324 # All fields in timing are millis relative to request_time, which is epoch
326 # seconds. 325 # seconds.
327 self._node_cost = max([t for f, t in request.timing._asdict().iteritems() 326 self._node_cost = max([t for f, t in request.timing._asdict().iteritems()
328 if f != 'requestTime']) 327 if f != 'request_time'])
329 328
330 def __str__(self): 329 def __str__(self):
331 return self.ShortName() 330 return self.ShortName()
332 331
333 def Node(self): 332 def Node(self):
334 return self._node 333 return self._node
335 334
336 def Index(self): 335 def Index(self):
337 return self._node.Index() 336 return self._node.Index()
338 337
339 def Request(self): 338 def Request(self):
340 return self._request 339 return self._request
341 340
342 def NodeCost(self): 341 def NodeCost(self):
343 return self._node_cost 342 return self._node_cost
344 343
345 def EdgeCost(self, s): 344 def EdgeCost(self, s):
346 return self._edge_costs[s] 345 return self._edge_costs[s]
347 346
348 def StartTime(self): 347 def StartTime(self):
349 return self._request.timing.requestTime * 1000 348 return self._request.timing.request_time * 1000
350 349
351 def EndTime(self): 350 def EndTime(self):
352 return self._request.timing.requestTime * 1000 + self._node_cost 351 return self._request.timing.request_time * 1000 + self._node_cost
353 352
354 def EdgeAnnotation(self, s): 353 def EdgeAnnotation(self, s):
355 assert s.Node() in self.Node().Successors() 354 assert s.Node() in self.Node().Successors()
356 return self._edge_annotations.get(s, []) 355 return self._edge_annotations.get(s, [])
357 356
358 def ContentType(self): 357 def ContentType(self):
359 return log_parser.Resource.FromRequest(self._request).GetContentType() 358 return self._request.GetContentType()
360 359
361 def ShortName(self): 360 def ShortName(self):
362 return log_parser.Resource.FromRequest(self._request).GetShortName() 361 """Returns either the hostname of the resource, or the filename,
362 or the end of the path. Tries to include the domain as much as possible.
363 """
364 parsed = urlparse.urlparse(self._request.url)
365 path = parsed.path
366 if path != '' and path != '/':
367 last_path = parsed.path.split('/')[-1]
368 if len(last_path) < 10:
369 if len(path) < 10:
370 return parsed.hostname + '/' + path
371 else:
372 return parsed.hostname + '/..' + parsed.path[-10:]
373 elif len(last_path) > 10:
374 return parsed.hostname + '/..' + last_path[:5]
375 else:
376 return parsed.hostname + '/..' + last_path
377 else:
378 return parsed.hostname
363 379
364 def Url(self): 380 def Url(self):
365 return self._request.url 381 return self._request.url
366 382
367 def SetEdgeCost(self, child, cost): 383 def SetEdgeCost(self, child, cost):
368 assert child.Node() in self._node.Successors() 384 assert child.Node() in self._node.Successors()
369 self._edge_costs[child] = cost 385 self._edge_costs[child] = cost
370 386
371 def AddEdgeAnnotation(self, s, annotation): 387 def AddEdgeAnnotation(self, s, annotation):
372 assert s.Node() in self._node.Successors() 388 assert s.Node() in self._node.Successors()
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
415 def _EdgeCost(self, parent, child): 431 def _EdgeCost(self, parent, child):
416 """Convenience function for redirecting Nodes to _NodeInfo.""" 432 """Convenience function for redirecting Nodes to _NodeInfo."""
417 return self._node_info[parent.Index()].EdgeCost( 433 return self._node_info[parent.Index()].EdgeCost(
418 self._node_info[child.Index()]) 434 self._node_info[child.Index()])
419 435
420 def _EdgeAnnotation(self, parent, child): 436 def _EdgeAnnotation(self, parent, child):
421 """Convenience function for redirecting Nodes to _NodeInfo.""" 437 """Convenience function for redirecting Nodes to _NodeInfo."""
422 return self._node_info[parent.Index()].EdgeAnnotation( 438 return self._node_info[parent.Index()].EdgeAnnotation(
423 self._node_info[child.Index()]) 439 self._node_info[child.Index()])
424 440
425 def _BuildDag(self, requests): 441 def _BuildDag(self, trace):
426 """Build DAG of resources. 442 """Build DAG of resources.
427 443
428 Build a DAG from our requests and augment with _NodeInfo (see above) in a 444 Build a DAG from our requests and augment with _NodeInfo (see above) in a
429 parallel array indexed by Node.Index(). 445 parallel array indexed by Node.Index().
430 446
431 Creates self._nodes and self._node_info. 447 Creates self._nodes and self._node_info.
432 448
433 Args: 449 Args:
434 requests: [Request, ...] Requests from loading.log_parser. 450 trace: A LoadingTrace.
435 """ 451 """
436 self._nodes = [] 452 self._nodes = []
437 self._node_info = [] 453 self._node_info = []
438 indicies_by_url = {} 454 index_by_request = {}
439 requests_by_completion = log_parser.SortedByCompletion(requests) 455 for request in trace.request_track.GetEvents():
440 for request in requests:
441 next_index = len(self._nodes) 456 next_index = len(self._nodes)
442 indicies_by_url.setdefault(request.url, []).append(next_index) 457 assert request not in index_by_request
458 index_by_request[request] = next_index
443 node = dag.Node(next_index) 459 node = dag.Node(next_index)
444 node_info = self._NodeInfo(node, request) 460 node_info = self._NodeInfo(node, request)
445 self._nodes.append(node) 461 self._nodes.append(node)
446 self._node_info.append(node_info) 462 self._node_info.append(node_info)
447 for url, indicies in indicies_by_url.iteritems(): 463
448 if len(indicies) > 1: 464 dependencies = request_dependencies_lens.RequestDependencyLens(
449 logging.warning('Multiple loads (%d) for url: %s' % 465 trace).GetRequestDependencies()
450 (len(indicies), url)) 466 for child_rq, parent_rq, reason in dependencies:
451 for i in xrange(len(requests)): 467 parent = self._node_info[index_by_request[parent_rq]]
452 request = requests[i] 468 child = self._node_info[index_by_request[child_rq]]
453 current_node_info = self._node_info[i] 469 edge_cost = child.StartTime() - parent.EndTime()
454 resource = log_parser.Resource.FromRequest(current_node_info.Request()) 470 if edge_cost < 0:
455 initiator = request.initiator 471 edge_cost = 0
456 initiator_type = initiator['type'] 472 if child.StartTime() < parent.StartTime():
457 predecessor_url = None
458 predecessor_type = None
459 # Classify & infer the predecessor. If a candidate url we identify as the
460 # predecessor is not in index_by_url, then we haven't seen it in our
461 # requests and we will try to find a better predecessor.
462 if initiator_type == 'parser':
463 url = initiator['url']
464 if url in indicies_by_url:
465 predecessor_url = url
466 predecessor_type = 'parser'
467 elif initiator_type == 'script' and 'stackTrace' in initiator:
468 for frame in initiator['stackTrace']:
469 url = frame['url']
470 if url in indicies_by_url:
471 predecessor_url = url
472 predecessor_type = 'stack'
473 break
474 elif initiator_type == 'script':
475 # When the initiator is a script without a stackTrace, infer that it
476 # comes from the most recent script from the same hostname. TLD+1 might
477 # be better, but finding what is a TLD requires a database.
478 request_hostname = urlparse.urlparse(request.url).hostname
479 sorted_script_requests_from_hostname = [
480 r for r in requests_by_completion
481 if (resource.GetContentType() in ('script', 'html', 'json')
482 and urlparse.urlparse(r.url).hostname == request_hostname)]
483 most_recent = None
484 # Linear search is bad, but this shouldn't matter here.
485 for r in sorted_script_requests_from_hostname:
486 if r.timestamp < request.timing.requestTime:
487 most_recent = r
488 else:
489 break
490 if most_recent is not None:
491 url = most_recent.url
492 if url in indicies_by_url:
493 predecessor_url = url
494 predecessor_type = 'script_inferred'
495 # TODO(mattcary): we skip initiator type other, is that correct?
496 if predecessor_url is not None:
497 predecessor = self._FindBestPredecessor(
498 current_node_info, indicies_by_url[predecessor_url])
499 edge_cost = current_node_info.StartTime() - predecessor.EndTime()
500 if edge_cost < 0:
501 edge_cost = 0
502 if current_node_info.StartTime() < predecessor.StartTime():
503 logging.error('Inverted dependency: %s->%s', 473 logging.error('Inverted dependency: %s->%s',
504 predecessor.ShortName(), current_node_info.ShortName()) 474 parent.ShortName(), child.ShortName())
505 # Note that current.StartTime() < predecessor.EndTime() appears to 475 # Note that child.StartTime() < parent.EndTime() appears to happen a
506 # happen a fair amount in practice. 476 # fair amount in practice.
507 predecessor.Node().AddSuccessor(current_node_info.Node()) 477 parent.Node().AddSuccessor(child.Node())
508 predecessor.SetEdgeCost(current_node_info, edge_cost) 478 parent.SetEdgeCost(child, edge_cost)
509 predecessor.AddEdgeAnnotation(current_node_info, predecessor_type) 479 parent.AddEdgeAnnotation(child, reason)
510
511 def _FindBestPredecessor(self, node_info, candidate_indicies):
512 """Find best predecessor for node_info
513
514 If there is only one candidate, we use it regardless of timings. We will
515 later warn about inverted dependencies. If there are more than one, we use
516 the latest whose end time is before node_info's start time. If there is no
517 such candidate, we throw up our hands and return an arbitrary one.
518
519 Args:
520 node_info: _NodeInfo of interest.
521 candidate_indicies: indicies of candidate predecessors.
522
523 Returns:
524 _NodeInfo of best predecessor.
525 """
526 assert candidate_indicies
527 if len(candidate_indicies) == 1:
528 return self._node_info[candidate_indicies[0]]
529 candidate = self._node_info[candidate_indicies[0]]
530 for i in xrange(1, len(candidate_indicies)):
531 next_candidate = self._node_info[candidate_indicies[i]]
532 if (next_candidate.EndTime() < node_info.StartTime() and
533 next_candidate.StartTime() > candidate.StartTime()):
534 candidate = next_candidate
535 if candidate.EndTime() > node_info.StartTime():
536 logging.warning('Multiple candidates but all inverted for ' +
537 node_info.ShortName())
538 return candidate
539
540 480
541 def _SplitChildrenByTime(self, parent): 481 def _SplitChildrenByTime(self, parent):
542 """Split children of a node by request times. 482 """Split children of a node by request times.
543 483
544 The initiator of a request may not be the true dependency of a request. For 484 The initiator of a request may not be the true dependency of a request. For
545 example, a script may appear to load several resources independently, but in 485 example, a script may appear to load several resources independently, but in
546 fact one of them may be a JSON data file, and the remaining resources assets 486 fact one of them may be a JSON data file, and the remaining resources assets
547 described in the JSON. The assets should be dependent upon the JSON data 487 described in the JSON. The assets should be dependent upon the JSON data
548 file, and not the original script. 488 file, and not the original script.
549 489
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after
617 highlight: a list of node items to emphasize. Any resource url which 557 highlight: a list of node items to emphasize. Any resource url which
618 contains any highlight text will be distinguished in the output. 558 contains any highlight text will be distinguished in the output.
619 559
620 Returns: 560 Returns:
621 A string describing the resource in graphviz format. 561 A string describing the resource in graphviz format.
622 The resource is color-coded according to its content type, and its shape 562 The resource is color-coded according to its content type, and its shape
623 is oval if its max-age is less than 300s (or if it's not cacheable). 563 is oval if its max-age is less than 300s (or if it's not cacheable).
624 """ 564 """
625 node_info = self._node_info[index] 565 node_info = self._node_info[index]
626 color = self._CONTENT_TYPE_TO_COLOR[node_info.ContentType()] 566 color = self._CONTENT_TYPE_TO_COLOR[node_info.ContentType()]
627 max_age = log_parser.MaxAge(node_info.Request()) 567 max_age = node_info.Request().MaxAge()
628 shape = 'polygon' if max_age > 300 else 'oval' 568 shape = 'polygon' if max_age > 300 else 'oval'
629 styles = ['filled'] 569 styles = ['filled']
630 if highlight: 570 if highlight:
631 for fragment in highlight: 571 for fragment in highlight:
632 if fragment in node_info.Url(): 572 if fragment in node_info.Url():
633 styles.append('dotted') 573 styles.append('dotted')
634 break 574 break
635 return ('%d [label = "%s\\n%.2f->%.2f (%.2f)"; style = "%s"; ' 575 return ('%d [label = "%s\\n%.2f->%.2f (%.2f)"; style = "%s"; '
636 'fillcolor = %s; shape = %s];\n' 576 'fillcolor = %s; shape = %s];\n'
637 % (index, node_info.ShortName(), 577 % (index, node_info.ShortName(),
(...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after
726 Dict of image url + short name to NodeInfo. 666 Dict of image url + short name to NodeInfo.
727 """ 667 """
728 image_to_info = {} 668 image_to_info = {}
729 for n in self._node_info: 669 for n in self._node_info:
730 if (n.ContentType().startswith('image') and 670 if (n.ContentType().startswith('image') and
731 not self._IsAdUrl(n.Url())): 671 not self._IsAdUrl(n.Url())):
732 key = str((n.Url(), n.ShortName(), n.StartTime())) 672 key = str((n.Url(), n.ShortName(), n.StartTime()))
733 assert key not in image_to_info, n.Url() 673 assert key not in image_to_info, n.Url()
734 image_to_info[key] = n 674 image_to_info[key] = n
735 return image_to_info 675 return image_to_info
OLDNEW
« no previous file with comments | « no previous file | tools/android/loading/loading_model_unittest.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698