| OLD | NEW |
| 1 # Copyright 2015 The Chromium Authors. All rights reserved. | 1 # Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 """Models for loading in chrome. | 5 """Models for loading in chrome. |
| 6 | 6 |
| 7 (Redirect the following to the general model module once we have one) | 7 (Redirect the following to the general model module once we have one) |
| 8 A model is an object with the following methods. | 8 A model is an object with the following methods. |
| 9 CostMs(): return the cost of the model in milliseconds. | 9 CostMs(): return the cost of the model in milliseconds. |
| 10 Set(): set model-specific parameters. | 10 Set(): set model-specific parameters. |
| (...skipping 184 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 195 | 195 |
| 196 def FilterAds(self, node): | 196 def FilterAds(self, node): |
| 197 """A filter for use in eg, Cost, to remove advertising nodes. | 197 """A filter for use in eg, Cost, to remove advertising nodes. |
| 198 | 198 |
| 199 Args: | 199 Args: |
| 200 node: A dag.Node. | 200 node: A dag.Node. |
| 201 | 201 |
| 202 Returns: | 202 Returns: |
| 203 True if the node is not ad-related. | 203 True if the node is not ad-related. |
| 204 """ | 204 """ |
| 205 return not self._IsAdUrl(self._node_info[node.Index()].Url()) | 205 node_info = self._node_info[node.Index()] |
| 206 return not (node_info.IsAd() or node_info.IsTracking()) |
| 206 | 207 |
| 207 def MakeGraphviz(self, output, highlight=None): | 208 def MakeGraphviz(self, output, highlight=None): |
| 208 """Output a graphviz representation of our DAG. | 209 """Output a graphviz representation of our DAG. |
| 209 | 210 |
| 210 Args: | 211 Args: |
| 211 output: a file-like output stream which recieves a graphviz dot. | 212 output: a file-like output stream which recieves a graphviz dot. |
| 212 highlight: a list of node items to emphasize. Any resource url which | 213 highlight: a list of node items to emphasize. Any resource url which |
| 213 contains any highlight text will be distinguished in the output. | 214 contains any highlight text will be distinguished in the output. |
| 214 """ | 215 """ |
| 215 output.write("""digraph dependencies { | 216 output.write("""digraph dependencies { |
| (...skipping 279 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 495 self._nodes = [] | 496 self._nodes = [] |
| 496 self._node_info = [] | 497 self._node_info = [] |
| 497 index_by_request = {} | 498 index_by_request = {} |
| 498 for request in trace.request_track.GetEvents(): | 499 for request in trace.request_track.GetEvents(): |
| 499 next_index = len(self._nodes) | 500 next_index = len(self._nodes) |
| 500 assert request not in index_by_request | 501 assert request not in index_by_request |
| 501 index_by_request[request] = next_index | 502 index_by_request[request] = next_index |
| 502 node = dag.Node(next_index) | 503 node = dag.Node(next_index) |
| 503 node_info = self._NodeInfo(node, request) | 504 node_info = self._NodeInfo(node, request) |
| 504 if self._content_lens: | 505 if self._content_lens: |
| 505 node.SetRequestContent(self._content_lens.IsAdRequest(request), | 506 node_info.SetRequestContent( |
| 506 self._content_lens.IsTrackingRequest(request)) | 507 self._content_lens.IsAdRequest(request), |
| 508 self._content_lens.IsTrackingRequest(request)) |
| 507 self._nodes.append(node) | 509 self._nodes.append(node) |
| 508 self._node_info.append(node_info) | 510 self._node_info.append(node_info) |
| 509 | 511 |
| 510 dependencies = request_dependencies_lens.RequestDependencyLens( | 512 dependencies = request_dependencies_lens.RequestDependencyLens( |
| 511 trace).GetRequestDependencies() | 513 trace).GetRequestDependencies() |
| 512 for parent_rq, child_rq, reason in dependencies: | 514 for parent_rq, child_rq, reason in dependencies: |
| 513 parent = self._node_info[index_by_request[parent_rq]] | 515 parent = self._node_info[index_by_request[parent_rq]] |
| 514 child = self._node_info[index_by_request[child_rq]] | 516 child = self._node_info[index_by_request[child_rq]] |
| 515 edge_cost = child.StartTime() - parent.EndTime() | 517 edge_cost = child.StartTime() - parent.EndTime() |
| 516 if edge_cost < 0: | 518 if edge_cost < 0: |
| (...skipping 115 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 632 if node_info.IsAd() or node_info.IsTracking(): | 634 if node_info.IsAd() or node_info.IsTracking(): |
| 633 styles += ['bold', 'diagonals'] | 635 styles += ['bold', 'diagonals'] |
| 634 return ('%d [label = "%s\\n%.2f->%.2f (%.2f)"; style = "%s"; ' | 636 return ('%d [label = "%s\\n%.2f->%.2f (%.2f)"; style = "%s"; ' |
| 635 'fillcolor = %s; shape = %s];\n' | 637 'fillcolor = %s; shape = %s];\n' |
| 636 % (index, node_info.ShortName(), | 638 % (index, node_info.ShortName(), |
| 637 node_info.StartTime() - self._global_start, | 639 node_info.StartTime() - self._global_start, |
| 638 node_info.EndTime() - self._global_start, | 640 node_info.EndTime() - self._global_start, |
| 639 node_info.EndTime() - node_info.StartTime(), | 641 node_info.EndTime() - node_info.StartTime(), |
| 640 ','.join(styles), color, shape)) | 642 ','.join(styles), color, shape)) |
| 641 | 643 |
| 642 @classmethod | |
| 643 def _IsAdUrl(cls, url): | |
| 644 """Return true if the url is an ad. | |
| 645 | |
| 646 We group content that doesn't seem to be specific to the website along with | |
| 647 ads, eg staticxx.facebook.com, as well as analytics like googletagmanager (? | |
| 648 is this correct?). | |
| 649 | |
| 650 Args: | |
| 651 url: The full string url to examine. | |
| 652 | |
| 653 Returns: | |
| 654 True iff the url appears to be an ad. | |
| 655 | |
| 656 """ | |
| 657 # See below for how these patterns are defined. | |
| 658 AD_PATTERNS = ['2mdn.net', | |
| 659 'admarvel.com', | |
| 660 'adnxs.com', | |
| 661 'adobedtm.com', | |
| 662 'adsrvr.org', | |
| 663 'adsafeprotected.com', | |
| 664 'adsymptotic.com', | |
| 665 'adtech.de', | |
| 666 'adtechus.com', | |
| 667 'advertising.com', | |
| 668 'atwola.com', # brand protection from cscglobal.com? | |
| 669 'bounceexchange.com', | |
| 670 'betrad.com', | |
| 671 'casalemedia.com', | |
| 672 'cloudfront.net//test.png', | |
| 673 'cloudfront.net//atrk.js', | |
| 674 'contextweb.com', | |
| 675 'crwdcntrl.net', | |
| 676 'doubleclick.net', | |
| 677 'dynamicyield.com', | |
| 678 'krxd.net', | |
| 679 'facebook.com//ping', | |
| 680 'fastclick.net', | |
| 681 'google.com//-ads.js', | |
| 682 'cse.google.com', # Custom search engine. | |
| 683 'googleadservices.com', | |
| 684 'googlesyndication.com', | |
| 685 'googletagmanager.com', | |
| 686 'lightboxcdn.com', | |
| 687 'mediaplex.com', | |
| 688 'meltdsp.com', | |
| 689 'mobile.nytimes.com//ads-success', | |
| 690 'mookie1.com', | |
| 691 'newrelic.com', | |
| 692 'nr-data.net', # Apparently part of newrelic. | |
| 693 'optnmnstr.com', | |
| 694 'pubmatic.com', | |
| 695 'quantcast.com', | |
| 696 'quantserve.com', | |
| 697 'rubiconproject.com', | |
| 698 'scorecardresearch.com', | |
| 699 'sekindo.com', | |
| 700 'serving-sys.com', | |
| 701 'sharethrough.com', | |
| 702 'staticxx.facebook.com', # ? | |
| 703 'syndication.twimg.com', | |
| 704 'tapad.com', | |
| 705 'yieldmo.com', | |
| 706 ] | |
| 707 parts = urlparse.urlparse(url) | |
| 708 for pattern in AD_PATTERNS: | |
| 709 if '//' in pattern: | |
| 710 domain, path = pattern.split('//') | |
| 711 else: | |
| 712 domain, path = (pattern, None) | |
| 713 if parts.netloc.endswith(domain): | |
| 714 if not path or path in parts.path: | |
| 715 return True | |
| 716 return False | |
| 717 | |
| 718 def _ExtractImages(self): | 644 def _ExtractImages(self): |
| 719 """Return interesting image resources. | 645 """Return interesting image resources. |
| 720 | 646 |
| 721 Uninteresting image resources are things like ads that we don't expect to be | 647 Uninteresting image resources are things like ads that we don't expect to be |
| 722 constant across fetches. | 648 constant across fetches. |
| 723 | 649 |
| 724 Returns: | 650 Returns: |
| 725 Dict of image url + short name to NodeInfo. | 651 Dict of image url + short name to NodeInfo. |
| 726 """ | 652 """ |
| 727 image_to_info = {} | 653 image_to_info = {} |
| 728 for n in self._node_info: | 654 for n in self._node_info: |
| 729 if (n.ContentType().startswith('image') and | 655 if (n.ContentType().startswith('image') and |
| 730 not self._IsAdUrl(n.Url())): | 656 not self._IsAdUrl(n.Url())): |
| 731 key = str((n.Url(), n.ShortName(), n.StartTime())) | 657 key = str((n.Url(), n.ShortName(), n.StartTime())) |
| 732 assert key not in image_to_info, n.Url() | 658 assert key not in image_to_info, n.Url() |
| 733 image_to_info[key] = n | 659 image_to_info[key] = n |
| 734 return image_to_info | 660 return image_to_info |
| OLD | NEW |