OLD | NEW |
1 # Copyright 2015 The Chromium Authors. All rights reserved. | 1 # Copyright 2015 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 """Models for loading in chrome. | 5 """Models for loading in chrome. |
6 | 6 |
7 (Redirect the following to the general model module once we have one) | 7 (Redirect the following to the general model module once we have one) |
8 A model is an object with the following methods. | 8 A model is an object with the following methods. |
9 CostMs(): return the cost of the model in milliseconds. | 9 CostMs(): return the cost of the model in milliseconds. |
10 Set(): set model-specific parameters. | 10 Set(): set model-specific parameters. |
(...skipping 184 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
195 | 195 |
196 def FilterAds(self, node): | 196 def FilterAds(self, node): |
197 """A filter for use in eg, Cost, to remove advertising nodes. | 197 """A filter for use in eg, Cost, to remove advertising nodes. |
198 | 198 |
199 Args: | 199 Args: |
200 node: A dag.Node. | 200 node: A dag.Node. |
201 | 201 |
202 Returns: | 202 Returns: |
203 True if the node is not ad-related. | 203 True if the node is not ad-related. |
204 """ | 204 """ |
205 return not self._IsAdUrl(self._node_info[node.Index()].Url()) | 205 node_info = self._node_info[node.Index()] |
| 206 return not (node_info.IsAd() or node_info.IsTracking()) |
206 | 207 |
207 def MakeGraphviz(self, output, highlight=None): | 208 def MakeGraphviz(self, output, highlight=None): |
208 """Output a graphviz representation of our DAG. | 209 """Output a graphviz representation of our DAG. |
209 | 210 |
210 Args: | 211 Args: |
211 output: a file-like output stream which recieves a graphviz dot. | 212 output: a file-like output stream which recieves a graphviz dot. |
212 highlight: a list of node items to emphasize. Any resource url which | 213 highlight: a list of node items to emphasize. Any resource url which |
213 contains any highlight text will be distinguished in the output. | 214 contains any highlight text will be distinguished in the output. |
214 """ | 215 """ |
215 output.write("""digraph dependencies { | 216 output.write("""digraph dependencies { |
(...skipping 279 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
495 self._nodes = [] | 496 self._nodes = [] |
496 self._node_info = [] | 497 self._node_info = [] |
497 index_by_request = {} | 498 index_by_request = {} |
498 for request in trace.request_track.GetEvents(): | 499 for request in trace.request_track.GetEvents(): |
499 next_index = len(self._nodes) | 500 next_index = len(self._nodes) |
500 assert request not in index_by_request | 501 assert request not in index_by_request |
501 index_by_request[request] = next_index | 502 index_by_request[request] = next_index |
502 node = dag.Node(next_index) | 503 node = dag.Node(next_index) |
503 node_info = self._NodeInfo(node, request) | 504 node_info = self._NodeInfo(node, request) |
504 if self._content_lens: | 505 if self._content_lens: |
505 node.SetRequestContent(self._content_lens.IsAdRequest(request), | 506 node_info.SetRequestContent( |
506 self._content_lens.IsTrackingRequest(request)) | 507 self._content_lens.IsAdRequest(request), |
| 508 self._content_lens.IsTrackingRequest(request)) |
507 self._nodes.append(node) | 509 self._nodes.append(node) |
508 self._node_info.append(node_info) | 510 self._node_info.append(node_info) |
509 | 511 |
510 dependencies = request_dependencies_lens.RequestDependencyLens( | 512 dependencies = request_dependencies_lens.RequestDependencyLens( |
511 trace).GetRequestDependencies() | 513 trace).GetRequestDependencies() |
512 for parent_rq, child_rq, reason in dependencies: | 514 for parent_rq, child_rq, reason in dependencies: |
513 parent = self._node_info[index_by_request[parent_rq]] | 515 parent = self._node_info[index_by_request[parent_rq]] |
514 child = self._node_info[index_by_request[child_rq]] | 516 child = self._node_info[index_by_request[child_rq]] |
515 edge_cost = child.StartTime() - parent.EndTime() | 517 edge_cost = child.StartTime() - parent.EndTime() |
516 if edge_cost < 0: | 518 if edge_cost < 0: |
(...skipping 115 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
632 if node_info.IsAd() or node_info.IsTracking(): | 634 if node_info.IsAd() or node_info.IsTracking(): |
633 styles += ['bold', 'diagonals'] | 635 styles += ['bold', 'diagonals'] |
634 return ('%d [label = "%s\\n%.2f->%.2f (%.2f)"; style = "%s"; ' | 636 return ('%d [label = "%s\\n%.2f->%.2f (%.2f)"; style = "%s"; ' |
635 'fillcolor = %s; shape = %s];\n' | 637 'fillcolor = %s; shape = %s];\n' |
636 % (index, node_info.ShortName(), | 638 % (index, node_info.ShortName(), |
637 node_info.StartTime() - self._global_start, | 639 node_info.StartTime() - self._global_start, |
638 node_info.EndTime() - self._global_start, | 640 node_info.EndTime() - self._global_start, |
639 node_info.EndTime() - node_info.StartTime(), | 641 node_info.EndTime() - node_info.StartTime(), |
640 ','.join(styles), color, shape)) | 642 ','.join(styles), color, shape)) |
641 | 643 |
642 @classmethod | |
643 def _IsAdUrl(cls, url): | |
644 """Return true if the url is an ad. | |
645 | |
646 We group content that doesn't seem to be specific to the website along with | |
647 ads, eg staticxx.facebook.com, as well as analytics like googletagmanager (? | |
648 is this correct?). | |
649 | |
650 Args: | |
651 url: The full string url to examine. | |
652 | |
653 Returns: | |
654 True iff the url appears to be an ad. | |
655 | |
656 """ | |
657 # See below for how these patterns are defined. | |
658 AD_PATTERNS = ['2mdn.net', | |
659 'admarvel.com', | |
660 'adnxs.com', | |
661 'adobedtm.com', | |
662 'adsrvr.org', | |
663 'adsafeprotected.com', | |
664 'adsymptotic.com', | |
665 'adtech.de', | |
666 'adtechus.com', | |
667 'advertising.com', | |
668 'atwola.com', # brand protection from cscglobal.com? | |
669 'bounceexchange.com', | |
670 'betrad.com', | |
671 'casalemedia.com', | |
672 'cloudfront.net//test.png', | |
673 'cloudfront.net//atrk.js', | |
674 'contextweb.com', | |
675 'crwdcntrl.net', | |
676 'doubleclick.net', | |
677 'dynamicyield.com', | |
678 'krxd.net', | |
679 'facebook.com//ping', | |
680 'fastclick.net', | |
681 'google.com//-ads.js', | |
682 'cse.google.com', # Custom search engine. | |
683 'googleadservices.com', | |
684 'googlesyndication.com', | |
685 'googletagmanager.com', | |
686 'lightboxcdn.com', | |
687 'mediaplex.com', | |
688 'meltdsp.com', | |
689 'mobile.nytimes.com//ads-success', | |
690 'mookie1.com', | |
691 'newrelic.com', | |
692 'nr-data.net', # Apparently part of newrelic. | |
693 'optnmnstr.com', | |
694 'pubmatic.com', | |
695 'quantcast.com', | |
696 'quantserve.com', | |
697 'rubiconproject.com', | |
698 'scorecardresearch.com', | |
699 'sekindo.com', | |
700 'serving-sys.com', | |
701 'sharethrough.com', | |
702 'staticxx.facebook.com', # ? | |
703 'syndication.twimg.com', | |
704 'tapad.com', | |
705 'yieldmo.com', | |
706 ] | |
707 parts = urlparse.urlparse(url) | |
708 for pattern in AD_PATTERNS: | |
709 if '//' in pattern: | |
710 domain, path = pattern.split('//') | |
711 else: | |
712 domain, path = (pattern, None) | |
713 if parts.netloc.endswith(domain): | |
714 if not path or path in parts.path: | |
715 return True | |
716 return False | |
717 | |
718 def _ExtractImages(self): | 644 def _ExtractImages(self): |
719 """Return interesting image resources. | 645 """Return interesting image resources. |
720 | 646 |
721 Uninteresting image resources are things like ads that we don't expect to be | 647 Uninteresting image resources are things like ads that we don't expect to be |
722 constant across fetches. | 648 constant across fetches. |
723 | 649 |
724 Returns: | 650 Returns: |
725 Dict of image url + short name to NodeInfo. | 651 Dict of image url + short name to NodeInfo. |
726 """ | 652 """ |
727 image_to_info = {} | 653 image_to_info = {} |
728 for n in self._node_info: | 654 for n in self._node_info: |
729 if (n.ContentType().startswith('image') and | 655 if (n.ContentType().startswith('image') and |
730 not self._IsAdUrl(n.Url())): | 656 not self._IsAdUrl(n.Url())): |
731 key = str((n.Url(), n.ShortName(), n.StartTime())) | 657 key = str((n.Url(), n.ShortName(), n.StartTime())) |
732 assert key not in image_to_info, n.Url() | 658 assert key not in image_to_info, n.Url() |
733 image_to_info[key] = n | 659 image_to_info[key] = n |
734 return image_to_info | 660 return image_to_info |
OLD | NEW |