Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(59)

Side by Side Diff: tools/android/loading/content_classification_lens.py

Issue 1626393002: tools/android/loading: ContentClassificationLens, ads and tracking requests. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Address comments. Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4
5 """Labels requests according to the type of content they represent."""
6
7 import adblockparser # Available on PyPI, through pip.
8 import collections
9 import os
10
11 import loading_trace
12 import request_track
13
14
15 class ContentClassificationLens(object):
16 """Associates requests and frames with the type of content they represent."""
17 def __init__(self, trace, ad_rules, tracking_rules):
18 """Initializes an instance of ContentClassificationLens.
19
20 Args:
21 trace: (LoadingTrace) loading trace.
22 ad_rules: ([str]) List of Adblock+ compatible rules used to classify ads.
23 tracking_rules: ([str]) List of Adblock+ compatible rules used to
24 classify tracking and analytics.
25 """
26 self._trace = trace
27 self._requests = trace.request_track.GetEvents()
28 self._main_frame_id = trace.page_track.GetEvents()[0]['frame_id']
29 self._frame_to_requests = collections.defaultdict(list)
30 self._ad_requests = set()
31 self._tracking_requests = set()
32 self._ad_matcher = _RulesMatcher(ad_rules, True)
33 self._tracking_matcher = _RulesMatcher(tracking_rules, True)
34 self._GroupRequestsByFrameId()
35 self._LabelRequests()
36
37 def IsAdRequest(self, request):
38 """Returns True iff the request matches one of the ad_rules."""
39 return request.request_id in self._ad_requests
40
41 def IsTrackingRequest(self, request):
42 """Returns True iff the request matches one of the tracking_rules."""
43 return request.request_id in self._tracking_requests
44
45 def IsAdFrame(self, frame_id, ratio):
46 """A Frame is an Ad frame if more than |ratio| of its requests are
47 ad-related, and is not the main frame."""
48 if frame_id == self._main_frame_id:
49 return False
50 ad_requests_count = sum(r in self._ad_requests
51 for r in self._frame_to_requests[frame_id])
52 frame_requests_count = len(self._frame_to_requests[frame_id])
53 return (float(ad_requests_count) / frame_requests_count) > ratio
54
55 @classmethod
56 def WithRulesFiles(cls, trace, ad_rules_filename, tracking_rules_filename):
57 """Returns an instance of ContentClassificationLens with the rules read
58 from files.
59 """
60 ad_rules = []
61 tracking_rules = []
62 if os.path.exists(ad_rules_filename):
63 ad_rules = open(ad_rules_filename, 'r').readlines()
64 if os.path.exists(tracking_rules_filename):
65 tracking_rules = open(tracking_rules_filename, 'r').readlines()
66 return ContentClassificationLens(trace, ad_rules, tracking_rules)
67
68 def _GroupRequestsByFrameId(self):
69 for request in self._requests:
70 frame_id = request.frame_id
71 self._frame_to_requests[frame_id].append(request.request_id)
72
73 def _LabelRequests(self):
74 for request in self._requests:
75 request_id = request.request_id
76 if self._ad_matcher.Matches(request):
77 self._ad_requests.add(request_id)
78 if self._tracking_matcher.Matches(request):
79 self._tracking_requests.add(request_id)
80
81
82 class _RulesMatcher(object):
83 """Matches requests with rules in Adblock+ format."""
84 _WHITELIST_PREFIX = '@@'
85 _RESOURCE_TYPE_TO_OPTIONS_KEY = {
86 'Script': 'script', 'Stylesheet': 'stylesheet', 'Image': 'image',
87 'XHR': 'xmlhttprequest'}
88 def __init__(self, rules, no_whitelist):
89 """Initializes an instance of _RulesMatcher.
90
91 Args:
92 rules: ([str]) list of rules.
93 no_whitelist: (bool) Whether the whitelisting rules should be ignored.
94 """
95 self._rules = self._FilterRules(rules, no_whitelist)
96 self._matcher = adblockparser.AdblockRules(self._rules)
97
98 def Matches(self, request):
99 """Returns whether a request matches one of the rules."""
100 url = request.url
101 return self._matcher.should_block(url, self._GetOptions(request))
102
103 @classmethod
104 def _GetOptions(cls, request):
105 options = {}
106 resource_type = request.resource_type
107 option = cls._RESOURCE_TYPE_TO_OPTIONS_KEY.get(resource_type)
108 if option:
109 options[option] = True
110 return options
111
112 @classmethod
113 def _FilterRules(cls, rules, no_whitelist):
114 if not no_whitelist:
115 return rules
116 else:
117 return [rule for rule in rules
118 if not rule.startswith(cls._WHITELIST_PREFIX)]
OLDNEW
« no previous file with comments | « tools/android/loading/analyze.py ('k') | tools/android/loading/content_classification_lens_unittest.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698