OLD | NEW |
1 # Copyright 2015 The Chromium Authors. All rights reserved. | 1 # Copyright 2015 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 """Models for loading in chrome. | 5 """Models for loading in chrome. |
6 | 6 |
7 (Redirect the following to the general model module once we have one) | 7 (Redirect the following to the general model module once we have one) |
8 A model is an object with the following methods. | 8 A model is an object with the following methods. |
9 CostMs(): return the cost of the cost in milliseconds. | 9 CostMs(): return the cost of the model in milliseconds. |
10 Set(): set model-specifical parameters. | 10 Set(): set model-specific parameters. |
11 | 11 |
12 ResourceGraph | 12 ResourceGraph |
13 This creates a DAG of resource dependancies from loading.log_requests to model | 13 This creates a DAG of resource dependencies from loading.log_requests to model |
14 loading time. The model may be parameterized by changing the loading time of | 14 loading time. The model may be parameterized by changing the loading time of |
15 a particular or all resources. | 15 a particular or all resources. |
16 """ | 16 """ |
17 | 17 |
18 import logging | 18 import logging |
19 import os | 19 import os |
20 import urlparse | 20 import urlparse |
21 import sys | 21 import sys |
22 | 22 |
23 import dag | 23 import dag |
24 import log_parser | 24 import request_dependencies_lens |
25 | 25 |
26 class ResourceGraph(object): | 26 class ResourceGraph(object): |
27 """A model of loading by a DAG (tree?) of resource dependancies. | 27 """A model of loading by a DAG (tree?) of resource dependancies. |
28 | 28 |
29 Set parameters: | 29 Set parameters: |
30 cache_all: if true, assume zero loading time for all resources. | 30 cache_all: if true, assume zero loading time for all resources. |
31 """ | 31 """ |
32 | 32 def __init__(self, trace): |
33 def __init__(self, requests): | 33 """Create from a LoadingTrace. |
34 """Create from a parsed request set. | |
35 | 34 |
36 Args: | 35 Args: |
37 requests: [RequestData, ...] filtered RequestData from loading.log_parser. | 36 trace: (LoadingTrace) Loading trace. |
38 """ | 37 """ |
39 self._BuildDag(requests) | 38 self._BuildDag(trace) |
40 self._global_start = min([n.StartTime() for n in self._node_info]) | 39 self._global_start = min([n.StartTime() for n in self._node_info]) |
41 # Sort before splitting children so that we can correctly dectect if a | 40 # Sort before splitting children so that we can correctly dectect if a |
42 # reparented child is actually a dependency for a child of its new parent. | 41 # reparented child is actually a dependency for a child of its new parent. |
43 try: | 42 try: |
44 for n in dag.TopologicalSort(self._nodes): | 43 for n in dag.TopologicalSort(self._nodes): |
45 self._SplitChildrenByTime(self._node_info[n.Index()]) | 44 self._SplitChildrenByTime(self._node_info[n.Index()]) |
46 except AssertionError as exc: | 45 except AssertionError as exc: |
47 sys.stderr.write('Bad topological sort: %s\n' | 46 sys.stderr.write('Bad topological sort: %s\n' |
48 'Skipping child split\n' % str(exc)) | 47 'Skipping child split\n' % str(exc)) |
49 self._cache_all = False | 48 self._cache_all = False |
(...skipping 125 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
175 costs[n.Index()] = cost | 174 costs[n.Index()] = cost |
176 max_cost = max(costs) | 175 max_cost = max(costs) |
177 assert max_cost > 0 # Otherwise probably the filter went awry. | 176 assert max_cost > 0 # Otherwise probably the filter went awry. |
178 if path_list is not None: | 177 if path_list is not None: |
179 del path_list[:-1] | 178 del path_list[:-1] |
180 n = (i for i in self._nodes if costs[i.Index()] == max_cost).next() | 179 n = (i for i in self._nodes if costs[i.Index()] == max_cost).next() |
181 path_list.append(self._node_info[n.Index()]) | 180 path_list.append(self._node_info[n.Index()]) |
182 while n.Predecessors(): | 181 while n.Predecessors(): |
183 n = reduce(lambda costliest, next: | 182 n = reduce(lambda costliest, next: |
184 next if (self._node_filter(next) and | 183 next if (self._node_filter(next) and |
185 cost[next.Index()] > cost[costliest.Index()]) | 184 costs[next.Index()] > costs[costliest.Index()]) |
186 else costliest, | 185 else costliest, |
187 n.Predecessors()) | 186 n.Predecessors()) |
188 path_list.insert(0, self._node_info[n.Index()]) | 187 path_list.insert(0, self._node_info[n.Index()]) |
189 return max_cost | 188 return max_cost |
190 | 189 |
191 def FilterAds(self, node): | 190 def FilterAds(self, node): |
192 """A filter for use in eg, Cost, to remove advertising nodes. | 191 """A filter for use in eg, Cost, to remove advertising nodes. |
193 | 192 |
194 Args: | 193 Args: |
195 node: A dag.Node. | 194 node: A dag.Node. |
(...skipping 119 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
315 """Create a new node info. | 314 """Create a new node info. |
316 | 315 |
317 Args: | 316 Args: |
318 node: The node to augment. | 317 node: The node to augment. |
319 request: The request associated with this node. | 318 request: The request associated with this node. |
320 """ | 319 """ |
321 self._request = request | 320 self._request = request |
322 self._node = node | 321 self._node = node |
323 self._edge_costs = {} | 322 self._edge_costs = {} |
324 self._edge_annotations = {} | 323 self._edge_annotations = {} |
325 # All fields in timing are millis relative to requestTime, which is epoch | 324 # All fields in timing are millis relative to request_time, which is epoch |
326 # seconds. | 325 # seconds. |
327 self._node_cost = max([t for f, t in request.timing._asdict().iteritems() | 326 self._node_cost = max([t for f, t in request.timing._asdict().iteritems() |
328 if f != 'requestTime']) | 327 if f != 'request_time']) |
329 | 328 |
330 def __str__(self): | 329 def __str__(self): |
331 return self.ShortName() | 330 return self.ShortName() |
332 | 331 |
333 def Node(self): | 332 def Node(self): |
334 return self._node | 333 return self._node |
335 | 334 |
336 def Index(self): | 335 def Index(self): |
337 return self._node.Index() | 336 return self._node.Index() |
338 | 337 |
339 def Request(self): | 338 def Request(self): |
340 return self._request | 339 return self._request |
341 | 340 |
342 def NodeCost(self): | 341 def NodeCost(self): |
343 return self._node_cost | 342 return self._node_cost |
344 | 343 |
345 def EdgeCost(self, s): | 344 def EdgeCost(self, s): |
346 return self._edge_costs[s] | 345 return self._edge_costs[s] |
347 | 346 |
348 def StartTime(self): | 347 def StartTime(self): |
349 return self._request.timing.requestTime * 1000 | 348 return self._request.timing.request_time * 1000 |
350 | 349 |
351 def EndTime(self): | 350 def EndTime(self): |
352 return self._request.timing.requestTime * 1000 + self._node_cost | 351 return self._request.timing.request_time * 1000 + self._node_cost |
353 | 352 |
354 def EdgeAnnotation(self, s): | 353 def EdgeAnnotation(self, s): |
355 assert s.Node() in self.Node().Successors() | 354 assert s.Node() in self.Node().Successors() |
356 return self._edge_annotations.get(s, []) | 355 return self._edge_annotations.get(s, []) |
357 | 356 |
358 def ContentType(self): | 357 def ContentType(self): |
359 return log_parser.Resource.FromRequest(self._request).GetContentType() | 358 return self._request.GetContentType() |
360 | 359 |
361 def ShortName(self): | 360 def ShortName(self): |
362 return log_parser.Resource.FromRequest(self._request).GetShortName() | 361 """Returns either the hostname of the resource, or the filename, |
| 362 or the end of the path. Tries to include the domain as much as possible. |
| 363 """ |
| 364 parsed = urlparse.urlparse(self._request.url) |
| 365 path = parsed.path |
| 366 if path != '' and path != '/': |
| 367 last_path = parsed.path.split('/')[-1] |
| 368 if len(last_path) < 10: |
| 369 if len(path) < 10: |
| 370 return parsed.hostname + '/' + path |
| 371 else: |
| 372 return parsed.hostname + '/..' + parsed.path[-10:] |
| 373 elif len(last_path) > 10: |
| 374 return parsed.hostname + '/..' + last_path[:5] |
| 375 else: |
| 376 return parsed.hostname + '/..' + last_path |
| 377 else: |
| 378 return parsed.hostname |
363 | 379 |
364 def Url(self): | 380 def Url(self): |
365 return self._request.url | 381 return self._request.url |
366 | 382 |
367 def SetEdgeCost(self, child, cost): | 383 def SetEdgeCost(self, child, cost): |
368 assert child.Node() in self._node.Successors() | 384 assert child.Node() in self._node.Successors() |
369 self._edge_costs[child] = cost | 385 self._edge_costs[child] = cost |
370 | 386 |
371 def AddEdgeAnnotation(self, s, annotation): | 387 def AddEdgeAnnotation(self, s, annotation): |
372 assert s.Node() in self._node.Successors() | 388 assert s.Node() in self._node.Successors() |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
415 def _EdgeCost(self, parent, child): | 431 def _EdgeCost(self, parent, child): |
416 """Convenience function for redirecting Nodes to _NodeInfo.""" | 432 """Convenience function for redirecting Nodes to _NodeInfo.""" |
417 return self._node_info[parent.Index()].EdgeCost( | 433 return self._node_info[parent.Index()].EdgeCost( |
418 self._node_info[child.Index()]) | 434 self._node_info[child.Index()]) |
419 | 435 |
420 def _EdgeAnnotation(self, parent, child): | 436 def _EdgeAnnotation(self, parent, child): |
421 """Convenience function for redirecting Nodes to _NodeInfo.""" | 437 """Convenience function for redirecting Nodes to _NodeInfo.""" |
422 return self._node_info[parent.Index()].EdgeAnnotation( | 438 return self._node_info[parent.Index()].EdgeAnnotation( |
423 self._node_info[child.Index()]) | 439 self._node_info[child.Index()]) |
424 | 440 |
425 def _BuildDag(self, requests): | 441 def _BuildDag(self, trace): |
426 """Build DAG of resources. | 442 """Build DAG of resources. |
427 | 443 |
428 Build a DAG from our requests and augment with _NodeInfo (see above) in a | 444 Build a DAG from our requests and augment with _NodeInfo (see above) in a |
429 parallel array indexed by Node.Index(). | 445 parallel array indexed by Node.Index(). |
430 | 446 |
431 Creates self._nodes and self._node_info. | 447 Creates self._nodes and self._node_info. |
432 | 448 |
433 Args: | 449 Args: |
434 requests: [Request, ...] Requests from loading.log_parser. | 450 trace: A LoadingTrace. |
435 """ | 451 """ |
436 self._nodes = [] | 452 self._nodes = [] |
437 self._node_info = [] | 453 self._node_info = [] |
438 indicies_by_url = {} | 454 index_by_request = {} |
439 requests_by_completion = log_parser.SortedByCompletion(requests) | 455 for request in trace.request_track.GetEvents(): |
440 for request in requests: | |
441 next_index = len(self._nodes) | 456 next_index = len(self._nodes) |
442 indicies_by_url.setdefault(request.url, []).append(next_index) | 457 assert request not in index_by_request |
| 458 index_by_request[request] = next_index |
443 node = dag.Node(next_index) | 459 node = dag.Node(next_index) |
444 node_info = self._NodeInfo(node, request) | 460 node_info = self._NodeInfo(node, request) |
445 self._nodes.append(node) | 461 self._nodes.append(node) |
446 self._node_info.append(node_info) | 462 self._node_info.append(node_info) |
447 for url, indicies in indicies_by_url.iteritems(): | 463 |
448 if len(indicies) > 1: | 464 dependencies = request_dependencies_lens.RequestDependencyLens( |
449 logging.warning('Multiple loads (%d) for url: %s' % | 465 trace).GetRequestDependencies() |
450 (len(indicies), url)) | 466 for child_rq, parent_rq, reason in dependencies: |
451 for i in xrange(len(requests)): | 467 parent = self._node_info[index_by_request[parent_rq]] |
452 request = requests[i] | 468 child = self._node_info[index_by_request[child_rq]] |
453 current_node_info = self._node_info[i] | 469 edge_cost = child.StartTime() - parent.EndTime() |
454 resource = log_parser.Resource.FromRequest(current_node_info.Request()) | 470 if edge_cost < 0: |
455 initiator = request.initiator | 471 edge_cost = 0 |
456 initiator_type = initiator['type'] | 472 if child.StartTime() < parent.StartTime(): |
457 predecessor_url = None | |
458 predecessor_type = None | |
459 # Classify & infer the predecessor. If a candidate url we identify as the | |
460 # predecessor is not in index_by_url, then we haven't seen it in our | |
461 # requests and we will try to find a better predecessor. | |
462 if initiator_type == 'parser': | |
463 url = initiator['url'] | |
464 if url in indicies_by_url: | |
465 predecessor_url = url | |
466 predecessor_type = 'parser' | |
467 elif initiator_type == 'script' and 'stackTrace' in initiator: | |
468 for frame in initiator['stackTrace']: | |
469 url = frame['url'] | |
470 if url in indicies_by_url: | |
471 predecessor_url = url | |
472 predecessor_type = 'stack' | |
473 break | |
474 elif initiator_type == 'script': | |
475 # When the initiator is a script without a stackTrace, infer that it | |
476 # comes from the most recent script from the same hostname. TLD+1 might | |
477 # be better, but finding what is a TLD requires a database. | |
478 request_hostname = urlparse.urlparse(request.url).hostname | |
479 sorted_script_requests_from_hostname = [ | |
480 r for r in requests_by_completion | |
481 if (resource.GetContentType() in ('script', 'html', 'json') | |
482 and urlparse.urlparse(r.url).hostname == request_hostname)] | |
483 most_recent = None | |
484 # Linear search is bad, but this shouldn't matter here. | |
485 for r in sorted_script_requests_from_hostname: | |
486 if r.timestamp < request.timing.requestTime: | |
487 most_recent = r | |
488 else: | |
489 break | |
490 if most_recent is not None: | |
491 url = most_recent.url | |
492 if url in indicies_by_url: | |
493 predecessor_url = url | |
494 predecessor_type = 'script_inferred' | |
495 # TODO(mattcary): we skip initiator type other, is that correct? | |
496 if predecessor_url is not None: | |
497 predecessor = self._FindBestPredecessor( | |
498 current_node_info, indicies_by_url[predecessor_url]) | |
499 edge_cost = current_node_info.StartTime() - predecessor.EndTime() | |
500 if edge_cost < 0: | |
501 edge_cost = 0 | |
502 if current_node_info.StartTime() < predecessor.StartTime(): | |
503 logging.error('Inverted dependency: %s->%s', | 473 logging.error('Inverted dependency: %s->%s', |
504 predecessor.ShortName(), current_node_info.ShortName()) | 474 parent.ShortName(), child.ShortName()) |
505 # Note that current.StartTime() < predecessor.EndTime() appears to | 475 # Note that child.StartTime() < parent.EndTime() appears to happen a |
506 # happen a fair amount in practice. | 476 # fair amount in practice. |
507 predecessor.Node().AddSuccessor(current_node_info.Node()) | 477 parent.Node().AddSuccessor(child.Node()) |
508 predecessor.SetEdgeCost(current_node_info, edge_cost) | 478 parent.SetEdgeCost(child, edge_cost) |
509 predecessor.AddEdgeAnnotation(current_node_info, predecessor_type) | 479 parent.AddEdgeAnnotation(child, reason) |
510 | |
511 def _FindBestPredecessor(self, node_info, candidate_indicies): | |
512 """Find best predecessor for node_info | |
513 | |
514 If there is only one candidate, we use it regardless of timings. We will | |
515 later warn about inverted dependencies. If there are more than one, we use | |
516 the latest whose end time is before node_info's start time. If there is no | |
517 such candidate, we throw up our hands and return an arbitrary one. | |
518 | |
519 Args: | |
520 node_info: _NodeInfo of interest. | |
521 candidate_indicies: indicies of candidate predecessors. | |
522 | |
523 Returns: | |
524 _NodeInfo of best predecessor. | |
525 """ | |
526 assert candidate_indicies | |
527 if len(candidate_indicies) == 1: | |
528 return self._node_info[candidate_indicies[0]] | |
529 candidate = self._node_info[candidate_indicies[0]] | |
530 for i in xrange(1, len(candidate_indicies)): | |
531 next_candidate = self._node_info[candidate_indicies[i]] | |
532 if (next_candidate.EndTime() < node_info.StartTime() and | |
533 next_candidate.StartTime() > candidate.StartTime()): | |
534 candidate = next_candidate | |
535 if candidate.EndTime() > node_info.StartTime(): | |
536 logging.warning('Multiple candidates but all inverted for ' + | |
537 node_info.ShortName()) | |
538 return candidate | |
539 | |
540 | 480 |
541 def _SplitChildrenByTime(self, parent): | 481 def _SplitChildrenByTime(self, parent): |
542 """Split children of a node by request times. | 482 """Split children of a node by request times. |
543 | 483 |
544 The initiator of a request may not be the true dependency of a request. For | 484 The initiator of a request may not be the true dependency of a request. For |
545 example, a script may appear to load several resources independently, but in | 485 example, a script may appear to load several resources independently, but in |
546 fact one of them may be a JSON data file, and the remaining resources assets | 486 fact one of them may be a JSON data file, and the remaining resources assets |
547 described in the JSON. The assets should be dependent upon the JSON data | 487 described in the JSON. The assets should be dependent upon the JSON data |
548 file, and not the original script. | 488 file, and not the original script. |
549 | 489 |
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
617 highlight: a list of node items to emphasize. Any resource url which | 557 highlight: a list of node items to emphasize. Any resource url which |
618 contains any highlight text will be distinguished in the output. | 558 contains any highlight text will be distinguished in the output. |
619 | 559 |
620 Returns: | 560 Returns: |
621 A string describing the resource in graphviz format. | 561 A string describing the resource in graphviz format. |
622 The resource is color-coded according to its content type, and its shape | 562 The resource is color-coded according to its content type, and its shape |
623 is oval if its max-age is less than 300s (or if it's not cacheable). | 563 is oval if its max-age is less than 300s (or if it's not cacheable). |
624 """ | 564 """ |
625 node_info = self._node_info[index] | 565 node_info = self._node_info[index] |
626 color = self._CONTENT_TYPE_TO_COLOR[node_info.ContentType()] | 566 color = self._CONTENT_TYPE_TO_COLOR[node_info.ContentType()] |
627 max_age = log_parser.MaxAge(node_info.Request()) | 567 max_age = node_info.Request().MaxAge() |
628 shape = 'polygon' if max_age > 300 else 'oval' | 568 shape = 'polygon' if max_age > 300 else 'oval' |
629 styles = ['filled'] | 569 styles = ['filled'] |
630 if highlight: | 570 if highlight: |
631 for fragment in highlight: | 571 for fragment in highlight: |
632 if fragment in node_info.Url(): | 572 if fragment in node_info.Url(): |
633 styles.append('dotted') | 573 styles.append('dotted') |
634 break | 574 break |
635 return ('%d [label = "%s\\n%.2f->%.2f (%.2f)"; style = "%s"; ' | 575 return ('%d [label = "%s\\n%.2f->%.2f (%.2f)"; style = "%s"; ' |
636 'fillcolor = %s; shape = %s];\n' | 576 'fillcolor = %s; shape = %s];\n' |
637 % (index, node_info.ShortName(), | 577 % (index, node_info.ShortName(), |
(...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
726 Dict of image url + short name to NodeInfo. | 666 Dict of image url + short name to NodeInfo. |
727 """ | 667 """ |
728 image_to_info = {} | 668 image_to_info = {} |
729 for n in self._node_info: | 669 for n in self._node_info: |
730 if (n.ContentType().startswith('image') and | 670 if (n.ContentType().startswith('image') and |
731 not self._IsAdUrl(n.Url())): | 671 not self._IsAdUrl(n.Url())): |
732 key = str((n.Url(), n.ShortName(), n.StartTime())) | 672 key = str((n.Url(), n.ShortName(), n.StartTime())) |
733 assert key not in image_to_info, n.Url() | 673 assert key not in image_to_info, n.Url() |
734 image_to_info[key] = n | 674 image_to_info[key] = n |
735 return image_to_info | 675 return image_to_info |
OLD | NEW |