Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(63)

Side by Side Diff: tools/android/loading/loading_model.py

Issue 1645953002: Conditional import of adblocker. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Use new filters for ad filtering Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2015 The Chromium Authors. All rights reserved. 1 # Copyright 2015 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 """Models for loading in chrome. 5 """Models for loading in chrome.
6 6
7 (Redirect the following to the general model module once we have one) 7 (Redirect the following to the general model module once we have one)
8 A model is an object with the following methods. 8 A model is an object with the following methods.
9 CostMs(): return the cost of the model in milliseconds. 9 CostMs(): return the cost of the model in milliseconds.
10 Set(): set model-specific parameters. 10 Set(): set model-specific parameters.
(...skipping 184 matching lines...) Expand 10 before | Expand all | Expand 10 after
195 195
196 def FilterAds(self, node): 196 def FilterAds(self, node):
197 """A filter for use in eg, Cost, to remove advertising nodes. 197 """A filter for use in eg, Cost, to remove advertising nodes.
198 198
199 Args: 199 Args:
200 node: A dag.Node. 200 node: A dag.Node.
201 201
202 Returns: 202 Returns:
203 True if the node is not ad-related. 203 True if the node is not ad-related.
204 """ 204 """
205 return not self._IsAdUrl(self._node_info[node.Index()].Url()) 205 node_info = self._node_info[node.Index()]
206 return not (node_info.IsAd() or node_info.IsTracking())
206 207
207 def MakeGraphviz(self, output, highlight=None): 208 def MakeGraphviz(self, output, highlight=None):
208 """Output a graphviz representation of our DAG. 209 """Output a graphviz representation of our DAG.
209 210
210 Args: 211 Args:
211 output: a file-like output stream which recieves a graphviz dot. 212 output: a file-like output stream which recieves a graphviz dot.
212 highlight: a list of node items to emphasize. Any resource url which 213 highlight: a list of node items to emphasize. Any resource url which
213 contains any highlight text will be distinguished in the output. 214 contains any highlight text will be distinguished in the output.
214 """ 215 """
215 output.write("""digraph dependencies { 216 output.write("""digraph dependencies {
(...skipping 279 matching lines...) Expand 10 before | Expand all | Expand 10 after
495 self._nodes = [] 496 self._nodes = []
496 self._node_info = [] 497 self._node_info = []
497 index_by_request = {} 498 index_by_request = {}
498 for request in trace.request_track.GetEvents(): 499 for request in trace.request_track.GetEvents():
499 next_index = len(self._nodes) 500 next_index = len(self._nodes)
500 assert request not in index_by_request 501 assert request not in index_by_request
501 index_by_request[request] = next_index 502 index_by_request[request] = next_index
502 node = dag.Node(next_index) 503 node = dag.Node(next_index)
503 node_info = self._NodeInfo(node, request) 504 node_info = self._NodeInfo(node, request)
504 if self._content_lens: 505 if self._content_lens:
505 node.SetRequestContent(self._content_lens.IsAdRequest(request), 506 node_info.SetRequestContent(
506 self._content_lens.IsTrackingRequest(request)) 507 self._content_lens.IsAdRequest(request),
508 self._content_lens.IsTrackingRequest(request))
507 self._nodes.append(node) 509 self._nodes.append(node)
508 self._node_info.append(node_info) 510 self._node_info.append(node_info)
509 511
510 dependencies = request_dependencies_lens.RequestDependencyLens( 512 dependencies = request_dependencies_lens.RequestDependencyLens(
511 trace).GetRequestDependencies() 513 trace).GetRequestDependencies()
512 for parent_rq, child_rq, reason in dependencies: 514 for parent_rq, child_rq, reason in dependencies:
513 parent = self._node_info[index_by_request[parent_rq]] 515 parent = self._node_info[index_by_request[parent_rq]]
514 child = self._node_info[index_by_request[child_rq]] 516 child = self._node_info[index_by_request[child_rq]]
515 edge_cost = child.StartTime() - parent.EndTime() 517 edge_cost = child.StartTime() - parent.EndTime()
516 if edge_cost < 0: 518 if edge_cost < 0:
(...skipping 115 matching lines...) Expand 10 before | Expand all | Expand 10 after
632 if node_info.IsAd() or node_info.IsTracking(): 634 if node_info.IsAd() or node_info.IsTracking():
633 styles += ['bold', 'diagonals'] 635 styles += ['bold', 'diagonals']
634 return ('%d [label = "%s\\n%.2f->%.2f (%.2f)"; style = "%s"; ' 636 return ('%d [label = "%s\\n%.2f->%.2f (%.2f)"; style = "%s"; '
635 'fillcolor = %s; shape = %s];\n' 637 'fillcolor = %s; shape = %s];\n'
636 % (index, node_info.ShortName(), 638 % (index, node_info.ShortName(),
637 node_info.StartTime() - self._global_start, 639 node_info.StartTime() - self._global_start,
638 node_info.EndTime() - self._global_start, 640 node_info.EndTime() - self._global_start,
639 node_info.EndTime() - node_info.StartTime(), 641 node_info.EndTime() - node_info.StartTime(),
640 ','.join(styles), color, shape)) 642 ','.join(styles), color, shape))
641 643
642 @classmethod
643 def _IsAdUrl(cls, url):
644 """Return true if the url is an ad.
645
646 We group content that doesn't seem to be specific to the website along with
647 ads, eg staticxx.facebook.com, as well as analytics like googletagmanager (?
648 is this correct?).
649
650 Args:
651 url: The full string url to examine.
652
653 Returns:
654 True iff the url appears to be an ad.
655
656 """
657 # See below for how these patterns are defined.
658 AD_PATTERNS = ['2mdn.net',
659 'admarvel.com',
660 'adnxs.com',
661 'adobedtm.com',
662 'adsrvr.org',
663 'adsafeprotected.com',
664 'adsymptotic.com',
665 'adtech.de',
666 'adtechus.com',
667 'advertising.com',
668 'atwola.com', # brand protection from cscglobal.com?
669 'bounceexchange.com',
670 'betrad.com',
671 'casalemedia.com',
672 'cloudfront.net//test.png',
673 'cloudfront.net//atrk.js',
674 'contextweb.com',
675 'crwdcntrl.net',
676 'doubleclick.net',
677 'dynamicyield.com',
678 'krxd.net',
679 'facebook.com//ping',
680 'fastclick.net',
681 'google.com//-ads.js',
682 'cse.google.com', # Custom search engine.
683 'googleadservices.com',
684 'googlesyndication.com',
685 'googletagmanager.com',
686 'lightboxcdn.com',
687 'mediaplex.com',
688 'meltdsp.com',
689 'mobile.nytimes.com//ads-success',
690 'mookie1.com',
691 'newrelic.com',
692 'nr-data.net', # Apparently part of newrelic.
693 'optnmnstr.com',
694 'pubmatic.com',
695 'quantcast.com',
696 'quantserve.com',
697 'rubiconproject.com',
698 'scorecardresearch.com',
699 'sekindo.com',
700 'serving-sys.com',
701 'sharethrough.com',
702 'staticxx.facebook.com', # ?
703 'syndication.twimg.com',
704 'tapad.com',
705 'yieldmo.com',
706 ]
707 parts = urlparse.urlparse(url)
708 for pattern in AD_PATTERNS:
709 if '//' in pattern:
710 domain, path = pattern.split('//')
711 else:
712 domain, path = (pattern, None)
713 if parts.netloc.endswith(domain):
714 if not path or path in parts.path:
715 return True
716 return False
717
718 def _ExtractImages(self): 644 def _ExtractImages(self):
719 """Return interesting image resources. 645 """Return interesting image resources.
720 646
721 Uninteresting image resources are things like ads that we don't expect to be 647 Uninteresting image resources are things like ads that we don't expect to be
722 constant across fetches. 648 constant across fetches.
723 649
724 Returns: 650 Returns:
725 Dict of image url + short name to NodeInfo. 651 Dict of image url + short name to NodeInfo.
726 """ 652 """
727 image_to_info = {} 653 image_to_info = {}
728 for n in self._node_info: 654 for n in self._node_info:
729 if (n.ContentType().startswith('image') and 655 if (n.ContentType().startswith('image') and
730 not self._IsAdUrl(n.Url())): 656 not self._IsAdUrl(n.Url())):
731 key = str((n.Url(), n.ShortName(), n.StartTime())) 657 key = str((n.Url(), n.ShortName(), n.StartTime()))
732 assert key not in image_to_info, n.Url() 658 assert key not in image_to_info, n.Url()
733 image_to_info[key] = n 659 image_to_info[key] = n
734 return image_to_info 660 return image_to_info
OLDNEW
« no previous file with comments | « tools/android/loading/content_classification_lens_unittest.py ('k') | tools/android/loading/loading_model_unittest.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698