| OLD | NEW |
| 1 // Copyright 2017 The Chromium Authors. All rights reserved. | 1 // Copyright 2017 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/subresource_filter/core/common/url_pattern_index.h" | 5 #include "components/subresource_filter/core/common/url_pattern_index.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <limits> | 8 #include <limits> |
| 9 #include <string> | 9 #include <string> |
| 10 | 10 |
| 11 #include "base/logging.h" | 11 #include "base/logging.h" |
| 12 #include "base/numerics/safe_conversions.h" | 12 #include "base/numerics/safe_conversions.h" |
| 13 #include "base/strings/string_piece.h" | 13 #include "base/strings/string_piece.h" |
| 14 #include "base/strings/string_util.h" | 14 #include "base/strings/string_util.h" |
| 15 #include "components/subresource_filter/core/common/ngram_extractor.h" | 15 #include "components/subresource_filter/core/common/ngram_extractor.h" |
| 16 #include "components/subresource_filter/core/common/url_pattern.h" | 16 #include "components/subresource_filter/core/common/url_pattern.h" |
| 17 #include "url/gurl.h" | 17 #include "url/gurl.h" |
| 18 #include "url/origin.h" | 18 #include "url/origin.h" |
| 19 | 19 |
| 20 namespace subresource_filter { | 20 namespace subresource_filter { |
| 21 | 21 |
| 22 namespace { | 22 namespace { |
| 23 | 23 |
| 24 using FlatStringOffset = flatbuffers::Offset<flatbuffers::String>; | 24 using FlatStringOffset = flatbuffers::Offset<flatbuffers::String>; |
| 25 using FlatDomains = flatbuffers::Vector<FlatStringOffset>; | 25 using FlatDomains = flatbuffers::Vector<FlatStringOffset>; |
| 26 using FlatDomainsOffset = flatbuffers::Offset<FlatDomains>; | 26 using FlatDomainsOffset = flatbuffers::Offset<FlatDomains>; |
| 27 using ActivationMaskPair = |
| 28 std::pair<proto::ActivationType, flat::ActivationType>; |
| 29 using ElementTypeMaskPair = std::pair<proto::ElementType, flat::ElementType>; |
| 30 |
| 31 const ActivationMaskPair kActivationMaskPairs[] = { |
| 32 {proto::ACTIVATION_TYPE_DOCUMENT, flat::ActivationType_DOCUMENT}, |
| 33 {proto::ACTIVATION_TYPE_ELEMHIDE, |
| 34 flat::ActivationType_NONE}, // ELEMHIDE is not supported. |
| 35 {proto::ACTIVATION_TYPE_GENERICHIDE, |
| 36 flat::ActivationType_NONE}, // Generic block is not supported. |
| 37 {proto::ACTIVATION_TYPE_GENERICBLOCK, flat::ActivationType_GENERIC_BLOCK}, |
| 38 }; |
| 39 |
| 40 const ElementTypeMaskPair kElementTypeMaskPairs[] = { |
| 41 {proto::ELEMENT_TYPE_OTHER, flat::ElementType_OTHER}, |
| 42 {proto::ELEMENT_TYPE_SCRIPT, flat::ElementType_SCRIPT}, |
| 43 {proto::ELEMENT_TYPE_IMAGE, flat::ElementType_IMAGE}, |
| 44 {proto::ELEMENT_TYPE_STYLESHEET, flat::ElementType_STYLESHEET}, |
| 45 {proto::ELEMENT_TYPE_OBJECT, flat::ElementType_OBJECT}, |
| 46 {proto::ELEMENT_TYPE_XMLHTTPREQUEST, flat::ElementType_XMLHTTPREQUEST}, |
| 47 {proto::ELEMENT_TYPE_OBJECT_SUBREQUEST, |
| 48 flat::ElementType_OBJECT}, // Normally we can not distinguish between the |
| 49 // main plugin resource and any other loads it |
| 50 // makes. We treat them both as OBJECT |
| 51 // requests. |
| 52 {proto::ELEMENT_TYPE_SUBDOCUMENT, flat::ElementType_SUBDOCUMENT}, |
| 53 {proto::ELEMENT_TYPE_PING, flat::ElementType_PING}, |
| 54 {proto::ELEMENT_TYPE_MEDIA, flat::ElementType_MEDIA}, |
| 55 {proto::ELEMENT_TYPE_FONT, flat::ElementType_FONT}, |
| 56 {proto::ELEMENT_TYPE_POPUP, |
| 57 flat::ElementType_NONE}, // Filterning popups is not supported. |
| 58 {proto::ELEMENT_TYPE_WEBSOCKET, flat::ElementType_WEBSOCKET}, |
| 59 }; |
| 27 | 60 |
| 28 base::StringPiece ToStringPiece(const flatbuffers::String* string) { | 61 base::StringPiece ToStringPiece(const flatbuffers::String* string) { |
| 29 DCHECK(string); | 62 DCHECK(string); |
| 30 return base::StringPiece(string->c_str(), string->size()); | 63 return base::StringPiece(string->c_str(), string->size()); |
| 31 } | 64 } |
| 32 | 65 |
| 33 // Performs three-way comparison between two domains. In the total order defined | 66 // Performs three-way comparison between two domains. In the total order defined |
| 34 // by this predicate, the lengths of domains will be monotonically decreasing. | 67 // by this predicate, the lengths of domains will be monotonically decreasing. |
| 35 int CompareDomains(base::StringPiece lhs_domain, base::StringPiece rhs_domain) { | 68 int CompareDomains(base::StringPiece lhs_domain, base::StringPiece rhs_domain) { |
| 36 if (lhs_domain.size() != rhs_domain.size()) | 69 if (lhs_domain.size() != rhs_domain.size()) |
| (...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 135 case proto::ANCHOR_TYPE_SUBDOMAIN: | 168 case proto::ANCHOR_TYPE_SUBDOMAIN: |
| 136 *result = flat::AnchorType_SUBDOMAIN; | 169 *result = flat::AnchorType_SUBDOMAIN; |
| 137 break; | 170 break; |
| 138 default: | 171 default: |
| 139 return false; // Unsupported anchor type. | 172 return false; // Unsupported anchor type. |
| 140 } | 173 } |
| 141 return true; | 174 return true; |
| 142 } | 175 } |
| 143 | 176 |
| 144 bool InitializeOptions() { | 177 bool InitializeOptions() { |
| 178 static_assert(flat::OptionFlag_ANY <= std::numeric_limits<uint8_t>::max(), |
| 179 "Option flags can not be stored in uint8_t."); |
| 180 |
| 145 if (rule_.semantics() == proto::RULE_SEMANTICS_WHITELIST) { | 181 if (rule_.semantics() == proto::RULE_SEMANTICS_WHITELIST) { |
| 146 options_ |= flat::OptionFlag_IS_WHITELIST; | 182 options_ |= flat::OptionFlag_IS_WHITELIST; |
| 147 } else if (rule_.semantics() != proto::RULE_SEMANTICS_BLACKLIST) { | 183 } else if (rule_.semantics() != proto::RULE_SEMANTICS_BLACKLIST) { |
| 148 return false; // Unsupported semantics. | 184 return false; // Unsupported semantics. |
| 149 } | 185 } |
| 150 | 186 |
| 151 switch (rule_.source_type()) { | 187 switch (rule_.source_type()) { |
| 152 case proto::SOURCE_TYPE_ANY: | 188 case proto::SOURCE_TYPE_ANY: |
| 153 options_ |= flat::OptionFlag_APPLIES_TO_THIRD_PARTY; | 189 options_ |= flat::OptionFlag_APPLIES_TO_THIRD_PARTY; |
| 154 // Note: fall through here intentionally. | 190 // Note: fall through here intentionally. |
| 155 case proto::SOURCE_TYPE_FIRST_PARTY: | 191 case proto::SOURCE_TYPE_FIRST_PARTY: |
| 156 options_ |= flat::OptionFlag_APPLIES_TO_FIRST_PARTY; | 192 options_ |= flat::OptionFlag_APPLIES_TO_FIRST_PARTY; |
| 157 break; | 193 break; |
| 158 case proto::SOURCE_TYPE_THIRD_PARTY: | 194 case proto::SOURCE_TYPE_THIRD_PARTY: |
| 159 options_ |= flat::OptionFlag_APPLIES_TO_THIRD_PARTY; | 195 options_ |= flat::OptionFlag_APPLIES_TO_THIRD_PARTY; |
| 160 break; | 196 break; |
| 161 | 197 |
| 162 default: | 198 default: |
| 163 return false; // Unsupported source type. | 199 return false; // Unsupported source type. |
| 164 } | 200 } |
| 165 | 201 |
| 166 if (rule_.match_case()) | 202 if (rule_.match_case()) |
| 167 options_ |= flat::OptionFlag_IS_MATCH_CASE; | 203 options_ |= flat::OptionFlag_IS_MATCH_CASE; |
| 168 | 204 |
| 169 return true; | 205 return true; |
| 170 } | 206 } |
| 171 | 207 |
| 172 bool InitializeElementTypes() { | 208 bool InitializeElementTypes() { |
| 173 static_assert( | 209 static_assert(flat::ElementType_ANY <= std::numeric_limits<uint16_t>::max(), |
| 174 proto::ELEMENT_TYPE_ALL <= std::numeric_limits<uint16_t>::max(), | 210 "Element types can not be stored in uint16_t."); |
| 175 "Element types can not be stored in uint16_t."); | |
| 176 element_types_ = static_cast<uint16_t>(rule_.element_types()); | |
| 177 | 211 |
| 178 // Note: Normally we can not distinguish between the main plugin resource | 212 element_types_ = flat::ElementType_NONE; |
| 179 // and any other loads it makes. We treat them both as OBJECT requests. | |
| 180 if (element_types_ & proto::ELEMENT_TYPE_OBJECT_SUBREQUEST) | |
| 181 element_types_ |= proto::ELEMENT_TYPE_OBJECT; | |
| 182 | 213 |
| 183 // Ignore unknown element types. | 214 for (const auto& pair : kElementTypeMaskPairs) |
| 184 element_types_ &= proto::ELEMENT_TYPE_ALL; | 215 if (rule_.element_types() & pair.first) |
| 185 // Filtering popups is not supported. | 216 element_types_ |= pair.second; |
| 186 element_types_ &= ~proto::ELEMENT_TYPE_POPUP; | |
| 187 | 217 |
| 188 return true; | 218 return true; |
| 189 } | 219 } |
| 190 | 220 |
| 191 bool InitializeActivationTypes() { | 221 bool InitializeActivationTypes() { |
| 192 static_assert( | 222 static_assert( |
| 193 proto::ACTIVATION_TYPE_ALL <= std::numeric_limits<uint8_t>::max(), | 223 flat::ActivationType_ANY <= std::numeric_limits<uint8_t>::max(), |
| 194 "Activation types can not be stored in uint8_t."); | 224 "Activation types can not be stored in uint8_t."); |
| 195 activation_types_ = static_cast<uint8_t>(rule_.activation_types()); | 225 activation_types_ = flat::ActivationType_NONE; |
| 196 | 226 |
| 197 // Only the following activation types are supported, ignore the others. | 227 for (const auto& pair : kActivationMaskPairs) |
| 198 activation_types_ &= | 228 if (rule_.activation_types() & pair.first) |
| 199 proto::ACTIVATION_TYPE_DOCUMENT | proto::ACTIVATION_TYPE_GENERICBLOCK; | 229 activation_types_ |= pair.second; |
| 200 | 230 |
| 201 return true; | 231 return true; |
| 202 } | 232 } |
| 203 | 233 |
| 204 bool InitializeUrlPattern() { | 234 bool InitializeUrlPattern() { |
| 205 switch (rule_.url_pattern_type()) { | 235 switch (rule_.url_pattern_type()) { |
| 206 case proto::URL_PATTERN_TYPE_SUBSTRING: | 236 case proto::URL_PATTERN_TYPE_SUBSTRING: |
| 207 url_pattern_type_ = flat::UrlPatternType_SUBSTRING; | 237 url_pattern_type_ = flat::UrlPatternType_SUBSTRING; |
| 208 break; | 238 break; |
| 209 case proto::URL_PATTERN_TYPE_WILDCARDED: | 239 case proto::URL_PATTERN_TYPE_WILDCARDED: |
| (...skipping 190 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 400 // the |origin|. Thus, domain filters with more domain components trump filters | 430 // the |origin|. Thus, domain filters with more domain components trump filters |
| 401 // with fewer domain components, i.e. the more specific a filter is, the higher | 431 // with fewer domain components, i.e. the more specific a filter is, the higher |
| 402 // the priority. | 432 // the priority. |
| 403 // | 433 // |
| 404 // A rule whose domain list is empty or contains only negative domains is still | 434 // A rule whose domain list is empty or contains only negative domains is still |
| 405 // considered a "generic" rule. Therefore, if |disable_generic_rules| is set, | 435 // considered a "generic" rule. Therefore, if |disable_generic_rules| is set, |
| 406 // this function will always return false for such rules. | 436 // this function will always return false for such rules. |
| 407 bool DoesOriginMatchDomainList(const url::Origin& origin, | 437 bool DoesOriginMatchDomainList(const url::Origin& origin, |
| 408 const flat::UrlRule& rule, | 438 const flat::UrlRule& rule, |
| 409 bool disable_generic_rules) { | 439 bool disable_generic_rules) { |
| 410 const bool is_generic = !rule.domains_included(); | 440 const bool is_generic = |
| 411 DCHECK(is_generic || rule.domains_included()->size()); | 441 !rule.domains_included() || !rule.domains_included()->size(); |
| 442 // DCHECK(is_generic || rule.domains_included()->size()); |
| 412 if (disable_generic_rules && is_generic) | 443 if (disable_generic_rules && is_generic) |
| 413 return false; | 444 return false; |
| 414 | 445 |
| 415 // Unique |origin| matches lists of exception domains only. | 446 // Unique |origin| matches lists of exception domains only. |
| 416 if (origin.unique()) | 447 if (origin.unique()) |
| 417 return is_generic; | 448 return is_generic; |
| 418 | 449 |
| 419 size_t longest_matching_included_domain_length = 1; | 450 size_t longest_matching_included_domain_length = 1; |
| 420 if (!is_generic) { | 451 if (!is_generic) { |
| 421 longest_matching_included_domain_length = | 452 longest_matching_included_domain_length = |
| (...skipping 136 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 558 (activation_type == proto::ACTIVATION_TYPE_UNSPECIFIED)) { | 589 (activation_type == proto::ACTIVATION_TYPE_UNSPECIFIED)) { |
| 559 return nullptr; | 590 return nullptr; |
| 560 } | 591 } |
| 561 | 592 |
| 562 return FindMatchInFlatUrlPatternIndex(*flat_index_, url, first_party_origin, | 593 return FindMatchInFlatUrlPatternIndex(*flat_index_, url, first_party_origin, |
| 563 element_type, activation_type, | 594 element_type, activation_type, |
| 564 is_third_party, disable_generic_rules); | 595 is_third_party, disable_generic_rules); |
| 565 } | 596 } |
| 566 | 597 |
| 567 } // namespace subresource_filter | 598 } // namespace subresource_filter |
| OLD | NEW |