Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 """Labels requests according to the type of content they represent.""" | 5 """Labels requests according to the type of content they represent.""" |
| 6 | 6 |
| 7 import adblockparser # Available on PyPI, through pip. | |
| 8 import collections | 7 import collections |
| 8 import logging | |
| 9 import os | 9 import os |
| 10 | 10 |
| 11 import loading_trace | 11 import loading_trace |
| 12 import request_track | 12 import request_track |
| 13 | 13 |
| 14 | 14 |
| 15 class ContentClassificationLens(object): | 15 class ContentClassificationLens(object): |
| 16 """Associates requests and frames with the type of content they represent.""" | 16 """Associates requests and frames with the type of content they represent.""" |
| 17 def __init__(self, trace, ad_rules, tracking_rules): | 17 def __init__(self, trace, ad_rules, tracking_rules): |
| 18 """Initializes an instance of ContentClassificationLens. | 18 """Initializes an instance of ContentClassificationLens. |
| (...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 86 'Script': 'script', 'Stylesheet': 'stylesheet', 'Image': 'image', | 86 'Script': 'script', 'Stylesheet': 'stylesheet', 'Image': 'image', |
| 87 'XHR': 'xmlhttprequest'} | 87 'XHR': 'xmlhttprequest'} |
| 88 def __init__(self, rules, no_whitelist): | 88 def __init__(self, rules, no_whitelist): |
| 89 """Initializes an instance of _RulesMatcher. | 89 """Initializes an instance of _RulesMatcher. |
| 90 | 90 |
| 91 Args: | 91 Args: |
| 92 rules: ([str]) list of rules. | 92 rules: ([str]) list of rules. |
| 93 no_whitelist: (bool) Whether the whitelisting rules should be ignored. | 93 no_whitelist: (bool) Whether the whitelisting rules should be ignored. |
| 94 """ | 94 """ |
| 95 self._rules = self._FilterRules(rules, no_whitelist) | 95 self._rules = self._FilterRules(rules, no_whitelist) |
| 96 self._matcher = adblockparser.AdblockRules(self._rules) | 96 if self._rules: |
|
blundell
2016/01/28 11:55:36
Maybe a comment above this like "Only try to inclu
| |
| 97 try: | |
| 98 import adblockparser | |
| 99 self._matcher = adblockparser.AdblockRules(self._rules) | |
| 100 except ImportError: | |
| 101 logging.critical('Likely you need to install adblockparser. Try:\n' | |
| 102 ' pip install --user adblockparser\n' | |
| 103 'For 10-100x better performance, also try:\n' | |
| 104 " pip install --user 're2 >= 0.2.21'") | |
| 105 raise | |
| 106 else: | |
| 107 self._matcher = None | |
| 97 | 108 |
| 98 def Matches(self, request): | 109 def Matches(self, request): |
| 99 """Returns whether a request matches one of the rules.""" | 110 """Returns whether a request matches one of the rules.""" |
| 111 if self._matcher is None: | |
| 112 return False | |
| 100 url = request.url | 113 url = request.url |
| 101 return self._matcher.should_block(url, self._GetOptions(request)) | 114 return self._matcher.should_block(url, self._GetOptions(request)) |
| 102 | 115 |
| 103 @classmethod | 116 @classmethod |
| 104 def _GetOptions(cls, request): | 117 def _GetOptions(cls, request): |
| 105 options = {} | 118 options = {} |
| 106 resource_type = request.resource_type | 119 resource_type = request.resource_type |
| 107 option = cls._RESOURCE_TYPE_TO_OPTIONS_KEY.get(resource_type) | 120 option = cls._RESOURCE_TYPE_TO_OPTIONS_KEY.get(resource_type) |
| 108 if option: | 121 if option: |
| 109 options[option] = True | 122 options[option] = True |
| 110 return options | 123 return options |
| 111 | 124 |
| 112 @classmethod | 125 @classmethod |
| 113 def _FilterRules(cls, rules, no_whitelist): | 126 def _FilterRules(cls, rules, no_whitelist): |
| 114 if not no_whitelist: | 127 if not no_whitelist: |
| 115 return rules | 128 return rules |
| 116 else: | 129 else: |
| 117 return [rule for rule in rules | 130 return [rule for rule in rules |
| 118 if not rule.startswith(cls._WHITELIST_PREFIX)] | 131 if not rule.startswith(cls._WHITELIST_PREFIX)] |
| OLD | NEW |