OLD | NEW |
1 // Copyright 2017 The Chromium Authors. All rights reserved. | 1 // Copyright 2017 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/url_pattern_index/url_pattern_index.h" | 5 #include "components/url_pattern_index/url_pattern_index.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <limits> | 8 #include <limits> |
9 #include <string> | 9 #include <string> |
10 | 10 |
| 11 #include "base/containers/flat_map.h" |
11 #include "base/logging.h" | 12 #include "base/logging.h" |
12 #include "base/numerics/safe_conversions.h" | 13 #include "base/numerics/safe_conversions.h" |
13 #include "base/strings/string_piece.h" | 14 #include "base/strings/string_piece.h" |
14 #include "base/strings/string_util.h" | 15 #include "base/strings/string_util.h" |
15 #include "components/url_pattern_index/ngram_extractor.h" | 16 #include "components/url_pattern_index/ngram_extractor.h" |
16 #include "components/url_pattern_index/url_pattern.h" | 17 #include "components/url_pattern_index/url_pattern.h" |
17 #include "url/gurl.h" | 18 #include "url/gurl.h" |
18 #include "url/origin.h" | 19 #include "url/origin.h" |
19 | 20 |
20 namespace url_pattern_index { | 21 namespace url_pattern_index { |
21 | 22 |
22 namespace { | 23 namespace { |
23 | 24 |
24 using FlatStringOffset = flatbuffers::Offset<flatbuffers::String>; | 25 using FlatStringOffset = flatbuffers::Offset<flatbuffers::String>; |
25 using FlatDomains = flatbuffers::Vector<FlatStringOffset>; | 26 using FlatDomains = flatbuffers::Vector<FlatStringOffset>; |
26 using FlatDomainsOffset = flatbuffers::Offset<FlatDomains>; | 27 using FlatDomainsOffset = flatbuffers::Offset<FlatDomains>; |
27 | 28 |
| 29 // Maps proto::ActivationType to flat::ActivationType. |
| 30 const base::flat_map<proto::ActivationType, flat::ActivationType> |
| 31 kActivationTypeMap( |
| 32 { |
| 33 {proto::ACTIVATION_TYPE_UNSPECIFIED, flat::ActivationType_NONE}, |
| 34 {proto::ACTIVATION_TYPE_DOCUMENT, flat::ActivationType_DOCUMENT}, |
| 35 // ELEMHIDE is not supported. |
| 36 {proto::ACTIVATION_TYPE_ELEMHIDE, flat::ActivationType_NONE}, |
| 37 // GENERICHIDE is not supported. |
| 38 {proto::ACTIVATION_TYPE_GENERICHIDE, flat::ActivationType_NONE}, |
| 39 {proto::ACTIVATION_TYPE_GENERICBLOCK, |
| 40 flat::ActivationType_GENERIC_BLOCK}, |
| 41 }, |
| 42 base::KEEP_FIRST_OF_DUPES); |
| 43 |
| 44 // Maps proto::ElementType to flat::ElementType. |
| 45 const base::flat_map<proto::ElementType, flat::ElementType> kElementTypeMap( |
| 46 { |
| 47 {proto::ELEMENT_TYPE_UNSPECIFIED, flat::ElementType_NONE}, |
| 48 {proto::ELEMENT_TYPE_OTHER, flat::ElementType_OTHER}, |
| 49 {proto::ELEMENT_TYPE_SCRIPT, flat::ElementType_SCRIPT}, |
| 50 {proto::ELEMENT_TYPE_IMAGE, flat::ElementType_IMAGE}, |
| 51 {proto::ELEMENT_TYPE_STYLESHEET, flat::ElementType_STYLESHEET}, |
| 52 {proto::ELEMENT_TYPE_OBJECT, flat::ElementType_OBJECT}, |
| 53 {proto::ELEMENT_TYPE_XMLHTTPREQUEST, flat::ElementType_XMLHTTPREQUEST}, |
| 54 {proto::ELEMENT_TYPE_OBJECT_SUBREQUEST, |
| 55 flat::ElementType_OBJECT_SUBREQUEST}, |
| 56 {proto::ELEMENT_TYPE_SUBDOCUMENT, flat::ElementType_SUBDOCUMENT}, |
| 57 {proto::ELEMENT_TYPE_PING, flat::ElementType_PING}, |
| 58 {proto::ELEMENT_TYPE_MEDIA, flat::ElementType_MEDIA}, |
| 59 {proto::ELEMENT_TYPE_FONT, flat::ElementType_FONT}, |
| 60 // Filterning popups is not supported. |
| 61 {proto::ELEMENT_TYPE_POPUP, flat::ElementType_NONE}, |
| 62 {proto::ELEMENT_TYPE_WEBSOCKET, flat::ElementType_WEBSOCKET}, |
| 63 }, |
| 64 base::KEEP_FIRST_OF_DUPES); |
| 65 |
| 66 flat::ActivationType ProtoToFlatActivationType(proto::ActivationType type) { |
| 67 const auto it = kActivationTypeMap.find(type); |
| 68 DCHECK(it != kActivationTypeMap.end()); |
| 69 return it->second; |
| 70 } |
| 71 |
| 72 flat::ElementType ProtoToFlatElementType(proto::ElementType type) { |
| 73 const auto it = kElementTypeMap.find(type); |
| 74 DCHECK(it != kElementTypeMap.end()); |
| 75 return it->second; |
| 76 } |
| 77 |
28 base::StringPiece ToStringPiece(const flatbuffers::String* string) { | 78 base::StringPiece ToStringPiece(const flatbuffers::String* string) { |
29 DCHECK(string); | 79 DCHECK(string); |
30 return base::StringPiece(string->c_str(), string->size()); | 80 return base::StringPiece(string->c_str(), string->size()); |
31 } | 81 } |
32 | 82 |
33 // Performs three-way comparison between two domains. In the total order defined | 83 // Performs three-way comparison between two domains. In the total order defined |
34 // by this predicate, the lengths of domains will be monotonically decreasing. | 84 // by this predicate, the lengths of domains will be monotonically decreasing. |
35 int CompareDomains(base::StringPiece lhs_domain, base::StringPiece rhs_domain) { | 85 int CompareDomains(base::StringPiece lhs_domain, base::StringPiece rhs_domain) { |
36 if (lhs_domain.size() != rhs_domain.size()) | 86 if (lhs_domain.size() != rhs_domain.size()) |
37 return lhs_domain.size() > rhs_domain.size() ? -1 : 1; | 87 return lhs_domain.size() > rhs_domain.size() ? -1 : 1; |
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
135 case proto::ANCHOR_TYPE_SUBDOMAIN: | 185 case proto::ANCHOR_TYPE_SUBDOMAIN: |
136 *result = flat::AnchorType_SUBDOMAIN; | 186 *result = flat::AnchorType_SUBDOMAIN; |
137 break; | 187 break; |
138 default: | 188 default: |
139 return false; // Unsupported anchor type. | 189 return false; // Unsupported anchor type. |
140 } | 190 } |
141 return true; | 191 return true; |
142 } | 192 } |
143 | 193 |
144 bool InitializeOptions() { | 194 bool InitializeOptions() { |
| 195 static_assert(flat::OptionFlag_ANY <= std::numeric_limits<uint8_t>::max(), |
| 196 "Option flags can not be stored in uint8_t."); |
| 197 |
145 if (rule_.semantics() == proto::RULE_SEMANTICS_WHITELIST) { | 198 if (rule_.semantics() == proto::RULE_SEMANTICS_WHITELIST) { |
146 options_ |= flat::OptionFlag_IS_WHITELIST; | 199 options_ |= flat::OptionFlag_IS_WHITELIST; |
147 } else if (rule_.semantics() != proto::RULE_SEMANTICS_BLACKLIST) { | 200 } else if (rule_.semantics() != proto::RULE_SEMANTICS_BLACKLIST) { |
148 return false; // Unsupported semantics. | 201 return false; // Unsupported semantics. |
149 } | 202 } |
150 | 203 |
151 switch (rule_.source_type()) { | 204 switch (rule_.source_type()) { |
152 case proto::SOURCE_TYPE_ANY: | 205 case proto::SOURCE_TYPE_ANY: |
153 options_ |= flat::OptionFlag_APPLIES_TO_THIRD_PARTY; | 206 options_ |= flat::OptionFlag_APPLIES_TO_THIRD_PARTY; |
154 // Note: fall through here intentionally. | 207 // Note: fall through here intentionally. |
155 case proto::SOURCE_TYPE_FIRST_PARTY: | 208 case proto::SOURCE_TYPE_FIRST_PARTY: |
156 options_ |= flat::OptionFlag_APPLIES_TO_FIRST_PARTY; | 209 options_ |= flat::OptionFlag_APPLIES_TO_FIRST_PARTY; |
157 break; | 210 break; |
158 case proto::SOURCE_TYPE_THIRD_PARTY: | 211 case proto::SOURCE_TYPE_THIRD_PARTY: |
159 options_ |= flat::OptionFlag_APPLIES_TO_THIRD_PARTY; | 212 options_ |= flat::OptionFlag_APPLIES_TO_THIRD_PARTY; |
160 break; | 213 break; |
161 | 214 |
162 default: | 215 default: |
163 return false; // Unsupported source type. | 216 return false; // Unsupported source type. |
164 } | 217 } |
165 | 218 |
166 if (rule_.match_case()) | 219 if (rule_.match_case()) |
167 options_ |= flat::OptionFlag_IS_MATCH_CASE; | 220 options_ |= flat::OptionFlag_IS_MATCH_CASE; |
168 | 221 |
169 return true; | 222 return true; |
170 } | 223 } |
171 | 224 |
172 bool InitializeElementTypes() { | 225 bool InitializeElementTypes() { |
173 static_assert( | 226 static_assert(flat::ElementType_ANY <= std::numeric_limits<uint16_t>::max(), |
174 proto::ELEMENT_TYPE_ALL <= std::numeric_limits<uint16_t>::max(), | 227 "Element types can not be stored in uint16_t."); |
175 "Element types can not be stored in uint16_t."); | |
176 element_types_ = static_cast<uint16_t>(rule_.element_types()); | |
177 | 228 |
178 // Note: Normally we can not distinguish between the main plugin resource | 229 // Ensure all proto::ElementType(s) are mapped in |kElementTypeMap|. |
179 // and any other loads it makes. We treat them both as OBJECT requests. | 230 #if DCHECK_IS_ON() |
180 if (element_types_ & proto::ELEMENT_TYPE_OBJECT_SUBREQUEST) | 231 uint16_t mask = 0; |
181 element_types_ |= proto::ELEMENT_TYPE_OBJECT; | 232 for (const auto& pair : kElementTypeMap) |
| 233 mask |= pair.first; |
| 234 DCHECK_EQ(proto::ELEMENT_TYPE_ALL, mask); |
| 235 #endif // DCHECK_IS_ON() |
182 | 236 |
183 // Ignore unknown element types. | 237 element_types_ = flat::ElementType_NONE; |
184 element_types_ &= proto::ELEMENT_TYPE_ALL; | 238 |
185 // Filtering popups is not supported. | 239 for (const auto& pair : kElementTypeMap) |
186 element_types_ &= ~proto::ELEMENT_TYPE_POPUP; | 240 if (rule_.element_types() & pair.first) |
| 241 element_types_ |= pair.second; |
| 242 |
| 243 // Normally we can not distinguish between the main plugin resource and any |
| 244 // other loads it makes. We treat them both as OBJECT requests. Hence an |
| 245 // OBJECT request would also match OBJECT_SUBREQUEST rules, but not the |
| 246 // the other way round. |
| 247 if (element_types_ & flat::ElementType_OBJECT_SUBREQUEST) |
| 248 element_types_ |= flat::ElementType_OBJECT; |
187 | 249 |
188 return true; | 250 return true; |
189 } | 251 } |
190 | 252 |
191 bool InitializeActivationTypes() { | 253 bool InitializeActivationTypes() { |
192 static_assert( | 254 static_assert( |
193 proto::ACTIVATION_TYPE_ALL <= std::numeric_limits<uint8_t>::max(), | 255 flat::ActivationType_ANY <= std::numeric_limits<uint8_t>::max(), |
194 "Activation types can not be stored in uint8_t."); | 256 "Activation types can not be stored in uint8_t."); |
195 activation_types_ = static_cast<uint8_t>(rule_.activation_types()); | |
196 | 257 |
197 // Only the following activation types are supported, ignore the others. | 258 // Ensure all proto::ActivationType(s) are mapped in |kActivationTypeMap|. |
198 activation_types_ &= | 259 #if DCHECK_IS_ON() |
199 proto::ACTIVATION_TYPE_DOCUMENT | proto::ACTIVATION_TYPE_GENERICBLOCK; | 260 uint16_t mask = 0; |
| 261 for (const auto& pair : kActivationTypeMap) |
| 262 mask |= pair.first; |
| 263 DCHECK_EQ(proto::ACTIVATION_TYPE_ALL, mask); |
| 264 #endif // DCHECK_IS_ON() |
| 265 |
| 266 activation_types_ = flat::ActivationType_NONE; |
| 267 |
| 268 for (const auto& pair : kActivationTypeMap) |
| 269 if (rule_.activation_types() & pair.first) |
| 270 activation_types_ |= pair.second; |
200 | 271 |
201 return true; | 272 return true; |
202 } | 273 } |
203 | 274 |
204 bool InitializeUrlPattern() { | 275 bool InitializeUrlPattern() { |
205 switch (rule_.url_pattern_type()) { | 276 switch (rule_.url_pattern_type()) { |
206 case proto::URL_PATTERN_TYPE_SUBSTRING: | 277 case proto::URL_PATTERN_TYPE_SUBSTRING: |
207 url_pattern_type_ = flat::UrlPatternType_SUBSTRING; | 278 url_pattern_type_ = flat::UrlPatternType_SUBSTRING; |
208 break; | 279 break; |
209 case proto::URL_PATTERN_TYPE_WILDCARDED: | 280 case proto::URL_PATTERN_TYPE_WILDCARDED: |
(...skipping 213 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
423 } | 494 } |
424 if (longest_matching_included_domain_length && rule.domains_excluded()) { | 495 if (longest_matching_included_domain_length && rule.domains_excluded()) { |
425 return GetLongestMatchingSubdomain(origin, *rule.domains_excluded()) < | 496 return GetLongestMatchingSubdomain(origin, *rule.domains_excluded()) < |
426 longest_matching_included_domain_length; | 497 longest_matching_included_domain_length; |
427 } | 498 } |
428 return !!longest_matching_included_domain_length; | 499 return !!longest_matching_included_domain_length; |
429 } | 500 } |
430 | 501 |
431 // Returns whether the request matches flags of the specified URL |rule|. Takes | 502 // Returns whether the request matches flags of the specified URL |rule|. Takes |
432 // into account: | 503 // into account: |
433 // - |element_type| of the requested resource, if not *_UNSPECIFIED. | 504 // - |element_type| of the requested resource, if not *_NONE. |
434 // - |activation_type| for a subdocument request, if not *_UNSPECIFIED. | 505 // - |activation_type| for a subdocument request, if not *_NONE. |
435 // - Whether the resource |is_third_party| w.r.t. its embedding document. | 506 // - Whether the resource |is_third_party| w.r.t. its embedding document. |
436 bool DoesRuleFlagsMatch(const flat::UrlRule& rule, | 507 bool DoesRuleFlagsMatch(const flat::UrlRule& rule, |
437 proto::ElementType element_type, | 508 flat::ElementType element_type, |
438 proto::ActivationType activation_type, | 509 flat::ActivationType activation_type, |
439 bool is_third_party) { | 510 bool is_third_party) { |
440 DCHECK((element_type == proto::ELEMENT_TYPE_UNSPECIFIED) != | 511 DCHECK((element_type == flat::ElementType_NONE) != |
441 (activation_type == proto::ACTIVATION_TYPE_UNSPECIFIED)); | 512 (activation_type == flat::ActivationType_NONE)); |
442 | 513 |
443 if (element_type != proto::ELEMENT_TYPE_UNSPECIFIED && | 514 if (element_type != flat::ElementType_NONE && |
444 !(rule.element_types() & element_type)) { | 515 !(rule.element_types() & element_type)) { |
445 return false; | 516 return false; |
446 } | 517 } |
447 if (activation_type != proto::ACTIVATION_TYPE_UNSPECIFIED && | 518 if (activation_type != flat::ActivationType_NONE && |
448 !(rule.activation_types() & activation_type)) { | 519 !(rule.activation_types() & activation_type)) { |
449 return false; | 520 return false; |
450 } | 521 } |
451 | 522 |
452 if (is_third_party && | 523 if (is_third_party && |
453 !(rule.options() & flat::OptionFlag_APPLIES_TO_THIRD_PARTY)) { | 524 !(rule.options() & flat::OptionFlag_APPLIES_TO_THIRD_PARTY)) { |
454 return false; | 525 return false; |
455 } | 526 } |
456 if (!is_third_party && | 527 if (!is_third_party && |
457 !(rule.options() & flat::OptionFlag_APPLIES_TO_FIRST_PARTY)) { | 528 !(rule.options() & flat::OptionFlag_APPLIES_TO_FIRST_PARTY)) { |
458 return false; | 529 return false; |
459 } | 530 } |
460 | 531 |
461 return true; | 532 return true; |
462 } | 533 } |
463 | 534 |
464 const flat::UrlRule* FindMatchAmongCandidates( | 535 const flat::UrlRule* FindMatchAmongCandidates( |
465 const FlatUrlRuleList* candidates, | 536 const FlatUrlRuleList* candidates, |
466 const GURL& url, | 537 const GURL& url, |
467 const url::Origin& document_origin, | 538 const url::Origin& document_origin, |
468 proto::ElementType element_type, | 539 flat::ElementType element_type, |
469 proto::ActivationType activation_type, | 540 flat::ActivationType activation_type, |
470 bool is_third_party, | 541 bool is_third_party, |
471 bool disable_generic_rules) { | 542 bool disable_generic_rules) { |
472 if (!candidates) | 543 if (!candidates) |
473 return nullptr; | 544 return nullptr; |
474 for (const flat::UrlRule* rule : *candidates) { | 545 for (const flat::UrlRule* rule : *candidates) { |
475 DCHECK_NE(rule, nullptr); | 546 DCHECK_NE(rule, nullptr); |
476 DCHECK_NE(rule->url_pattern_type(), flat::UrlPatternType_REGEXP); | 547 DCHECK_NE(rule->url_pattern_type(), flat::UrlPatternType_REGEXP); |
477 if (!DoesRuleFlagsMatch(*rule, element_type, activation_type, | 548 if (!DoesRuleFlagsMatch(*rule, element_type, activation_type, |
478 is_third_party)) { | 549 is_third_party)) { |
479 continue; | 550 continue; |
(...skipping 10 matching lines...) Expand all Loading... |
490 return nullptr; | 561 return nullptr; |
491 } | 562 } |
492 | 563 |
493 // Returns whether the network request matches a UrlPattern |index| represented | 564 // Returns whether the network request matches a UrlPattern |index| represented |
494 // in its FlatBuffers format. |is_third_party| should reflect the relation | 565 // in its FlatBuffers format. |is_third_party| should reflect the relation |
495 // between |url| and |document_origin|. | 566 // between |url| and |document_origin|. |
496 const flat::UrlRule* FindMatchInFlatUrlPatternIndex( | 567 const flat::UrlRule* FindMatchInFlatUrlPatternIndex( |
497 const flat::UrlPatternIndex& index, | 568 const flat::UrlPatternIndex& index, |
498 const GURL& url, | 569 const GURL& url, |
499 const url::Origin& document_origin, | 570 const url::Origin& document_origin, |
500 proto::ElementType element_type, | 571 flat::ElementType element_type, |
501 proto::ActivationType activation_type, | 572 flat::ActivationType activation_type, |
502 bool is_third_party, | 573 bool is_third_party, |
503 bool disable_generic_rules) { | 574 bool disable_generic_rules) { |
504 const FlatNGramIndex* hash_table = index.ngram_index(); | 575 const FlatNGramIndex* hash_table = index.ngram_index(); |
505 const flat::NGramToRules* empty_slot = index.ngram_index_empty_slot(); | 576 const flat::NGramToRules* empty_slot = index.ngram_index_empty_slot(); |
506 DCHECK_NE(hash_table, nullptr); | 577 DCHECK_NE(hash_table, nullptr); |
507 | 578 |
508 NGramHashTableProber prober; | 579 NGramHashTableProber prober; |
509 | 580 |
510 auto ngrams = CreateNGramExtractor<kNGramSize, uint64_t>( | 581 auto ngrams = CreateNGramExtractor<kNGramSize, uint64_t>( |
511 url.spec(), [](char) { return false; }); | 582 url.spec(), [](char) { return false; }); |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
545 | 616 |
546 UrlPatternIndexMatcher::~UrlPatternIndexMatcher() = default; | 617 UrlPatternIndexMatcher::~UrlPatternIndexMatcher() = default; |
547 | 618 |
548 const flat::UrlRule* UrlPatternIndexMatcher::FindMatch( | 619 const flat::UrlRule* UrlPatternIndexMatcher::FindMatch( |
549 const GURL& url, | 620 const GURL& url, |
550 const url::Origin& first_party_origin, | 621 const url::Origin& first_party_origin, |
551 proto::ElementType element_type, | 622 proto::ElementType element_type, |
552 proto::ActivationType activation_type, | 623 proto::ActivationType activation_type, |
553 bool is_third_party, | 624 bool is_third_party, |
554 bool disable_generic_rules) const { | 625 bool disable_generic_rules) const { |
| 626 return FindMatch(url, first_party_origin, |
| 627 ProtoToFlatElementType(element_type), |
| 628 ProtoToFlatActivationType(activation_type), is_third_party, |
| 629 disable_generic_rules); |
| 630 } |
| 631 |
| 632 const flat::UrlRule* UrlPatternIndexMatcher::FindMatch( |
| 633 const GURL& url, |
| 634 const url::Origin& first_party_origin, |
| 635 flat::ElementType element_type, |
| 636 flat::ActivationType activation_type, |
| 637 bool is_third_party, |
| 638 bool disable_generic_rules) const { |
555 if (!flat_index_ || !url.is_valid()) | 639 if (!flat_index_ || !url.is_valid()) |
556 return nullptr; | 640 return nullptr; |
557 if ((element_type == proto::ELEMENT_TYPE_UNSPECIFIED) == | 641 if ((element_type == flat::ElementType_NONE) == |
558 (activation_type == proto::ACTIVATION_TYPE_UNSPECIFIED)) { | 642 (activation_type == flat::ActivationType_NONE)) { |
559 return nullptr; | 643 return nullptr; |
560 } | 644 } |
561 | 645 |
562 return FindMatchInFlatUrlPatternIndex(*flat_index_, url, first_party_origin, | 646 return FindMatchInFlatUrlPatternIndex(*flat_index_, url, first_party_origin, |
563 element_type, activation_type, | 647 element_type, activation_type, |
564 is_third_party, disable_generic_rules); | 648 is_third_party, disable_generic_rules); |
565 } | 649 } |
566 | 650 |
567 } // namespace url_pattern_index | 651 } // namespace url_pattern_index |
OLD | NEW |