OLD | NEW |
---|---|
1 // Copyright 2017 The Chromium Authors. All rights reserved. | 1 // Copyright 2017 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/url_pattern_index/url_pattern_index.h" | 5 #include "components/url_pattern_index/url_pattern_index.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <limits> | 8 #include <limits> |
9 #include <map> | |
9 #include <string> | 10 #include <string> |
10 | 11 |
11 #include "base/logging.h" | 12 #include "base/logging.h" |
12 #include "base/numerics/safe_conversions.h" | 13 #include "base/numerics/safe_conversions.h" |
14 #include "base/stl_util.h" | |
13 #include "base/strings/string_piece.h" | 15 #include "base/strings/string_piece.h" |
14 #include "base/strings/string_util.h" | 16 #include "base/strings/string_util.h" |
15 #include "components/url_pattern_index/ngram_extractor.h" | 17 #include "components/url_pattern_index/ngram_extractor.h" |
16 #include "components/url_pattern_index/url_pattern.h" | 18 #include "components/url_pattern_index/url_pattern.h" |
17 #include "url/gurl.h" | 19 #include "url/gurl.h" |
18 #include "url/origin.h" | 20 #include "url/origin.h" |
19 | 21 |
20 namespace url_pattern_index { | 22 namespace url_pattern_index { |
21 | 23 |
22 namespace { | 24 namespace { |
23 | 25 |
24 using FlatStringOffset = flatbuffers::Offset<flatbuffers::String>; | 26 using FlatStringOffset = flatbuffers::Offset<flatbuffers::String>; |
25 using FlatDomains = flatbuffers::Vector<FlatStringOffset>; | 27 using FlatDomains = flatbuffers::Vector<FlatStringOffset>; |
26 using FlatDomainsOffset = flatbuffers::Offset<FlatDomains>; | 28 using FlatDomainsOffset = flatbuffers::Offset<FlatDomains>; |
27 | 29 |
30 // Maps proto::ActivationType to flat::ActivationType. | |
31 const std::map<proto::ActivationType, flat::ActivationType> kActivationTypeMap = | |
pkalinnikov
2017/06/28 15:59:43
For performance reasons (queries should be fast),
karandeepb
2017/06/29 00:58:18
I preferred to use a flat map, since I think it le
pkalinnikov
2017/06/29 09:19:16
Acknowledged.
| |
32 { | |
33 {proto::ACTIVATION_TYPE_UNSPECIFIED, flat::ActivationType_NONE}, | |
34 {proto::ACTIVATION_TYPE_DOCUMENT, flat::ActivationType_DOCUMENT}, | |
35 // ELEMHIDE is not supported. | |
36 {proto::ACTIVATION_TYPE_ELEMHIDE, flat::ActivationType_NONE}, | |
37 // GENERICHIDE is not supported. | |
38 {proto::ACTIVATION_TYPE_GENERICHIDE, flat::ActivationType_NONE}, | |
39 {proto::ACTIVATION_TYPE_GENERICBLOCK, | |
40 flat::ActivationType_GENERIC_BLOCK}, | |
41 }; | |
42 | |
43 // Maps proto::ElementType to flat::ElementType. | |
44 const std::map<proto::ElementType, flat::ElementType> kElementTypeMap = { | |
45 {proto::ELEMENT_TYPE_UNSPECIFIED, flat::ElementType_NONE}, | |
46 {proto::ELEMENT_TYPE_OTHER, flat::ElementType_OTHER}, | |
47 {proto::ELEMENT_TYPE_SCRIPT, flat::ElementType_SCRIPT}, | |
48 {proto::ELEMENT_TYPE_IMAGE, flat::ElementType_IMAGE}, | |
49 {proto::ELEMENT_TYPE_STYLESHEET, flat::ElementType_STYLESHEET}, | |
50 {proto::ELEMENT_TYPE_OBJECT, flat::ElementType_OBJECT}, | |
51 {proto::ELEMENT_TYPE_XMLHTTPREQUEST, flat::ElementType_XMLHTTPREQUEST}, | |
52 {proto::ELEMENT_TYPE_OBJECT_SUBREQUEST, | |
53 flat::ElementType_OBJECT_SUBREQUEST}, | |
54 {proto::ELEMENT_TYPE_SUBDOCUMENT, flat::ElementType_SUBDOCUMENT}, | |
55 {proto::ELEMENT_TYPE_PING, flat::ElementType_PING}, | |
56 {proto::ELEMENT_TYPE_MEDIA, flat::ElementType_MEDIA}, | |
57 {proto::ELEMENT_TYPE_FONT, flat::ElementType_FONT}, | |
58 // Filterning popups is not supported. | |
59 {proto::ELEMENT_TYPE_POPUP, flat::ElementType_NONE}, | |
60 {proto::ELEMENT_TYPE_WEBSOCKET, flat::ElementType_WEBSOCKET}, | |
61 }; | |
62 | |
63 flat::ActivationType ProtoToFlatActivationType(proto::ActivationType type) { | |
64 DCHECK(base::ContainsKey(kActivationTypeMap, type)); | |
65 return kActivationTypeMap.at(type); | |
66 } | |
67 | |
68 flat::ElementType ProtoToFlatElementType(proto::ElementType type) { | |
69 DCHECK(base::ContainsKey(kElementTypeMap, type)); | |
70 return kElementTypeMap.at(type); | |
71 } | |
72 | |
28 base::StringPiece ToStringPiece(const flatbuffers::String* string) { | 73 base::StringPiece ToStringPiece(const flatbuffers::String* string) { |
29 DCHECK(string); | 74 DCHECK(string); |
30 return base::StringPiece(string->c_str(), string->size()); | 75 return base::StringPiece(string->c_str(), string->size()); |
31 } | 76 } |
32 | 77 |
33 // Performs three-way comparison between two domains. In the total order defined | 78 // Performs three-way comparison between two domains. In the total order defined |
34 // by this predicate, the lengths of domains will be monotonically decreasing. | 79 // by this predicate, the lengths of domains will be monotonically decreasing. |
35 int CompareDomains(base::StringPiece lhs_domain, base::StringPiece rhs_domain) { | 80 int CompareDomains(base::StringPiece lhs_domain, base::StringPiece rhs_domain) { |
36 if (lhs_domain.size() != rhs_domain.size()) | 81 if (lhs_domain.size() != rhs_domain.size()) |
37 return lhs_domain.size() > rhs_domain.size() ? -1 : 1; | 82 return lhs_domain.size() > rhs_domain.size() ? -1 : 1; |
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
135 case proto::ANCHOR_TYPE_SUBDOMAIN: | 180 case proto::ANCHOR_TYPE_SUBDOMAIN: |
136 *result = flat::AnchorType_SUBDOMAIN; | 181 *result = flat::AnchorType_SUBDOMAIN; |
137 break; | 182 break; |
138 default: | 183 default: |
139 return false; // Unsupported anchor type. | 184 return false; // Unsupported anchor type. |
140 } | 185 } |
141 return true; | 186 return true; |
142 } | 187 } |
143 | 188 |
144 bool InitializeOptions() { | 189 bool InitializeOptions() { |
190 static_assert(flat::OptionFlag_ANY <= std::numeric_limits<uint8_t>::max(), | |
191 "Option flags can not be stored in uint8_t."); | |
192 | |
145 if (rule_.semantics() == proto::RULE_SEMANTICS_WHITELIST) { | 193 if (rule_.semantics() == proto::RULE_SEMANTICS_WHITELIST) { |
146 options_ |= flat::OptionFlag_IS_WHITELIST; | 194 options_ |= flat::OptionFlag_IS_WHITELIST; |
147 } else if (rule_.semantics() != proto::RULE_SEMANTICS_BLACKLIST) { | 195 } else if (rule_.semantics() != proto::RULE_SEMANTICS_BLACKLIST) { |
148 return false; // Unsupported semantics. | 196 return false; // Unsupported semantics. |
149 } | 197 } |
150 | 198 |
151 switch (rule_.source_type()) { | 199 switch (rule_.source_type()) { |
152 case proto::SOURCE_TYPE_ANY: | 200 case proto::SOURCE_TYPE_ANY: |
153 options_ |= flat::OptionFlag_APPLIES_TO_THIRD_PARTY; | 201 options_ |= flat::OptionFlag_APPLIES_TO_THIRD_PARTY; |
154 // Note: fall through here intentionally. | 202 // Note: fall through here intentionally. |
155 case proto::SOURCE_TYPE_FIRST_PARTY: | 203 case proto::SOURCE_TYPE_FIRST_PARTY: |
156 options_ |= flat::OptionFlag_APPLIES_TO_FIRST_PARTY; | 204 options_ |= flat::OptionFlag_APPLIES_TO_FIRST_PARTY; |
157 break; | 205 break; |
158 case proto::SOURCE_TYPE_THIRD_PARTY: | 206 case proto::SOURCE_TYPE_THIRD_PARTY: |
159 options_ |= flat::OptionFlag_APPLIES_TO_THIRD_PARTY; | 207 options_ |= flat::OptionFlag_APPLIES_TO_THIRD_PARTY; |
160 break; | 208 break; |
161 | 209 |
162 default: | 210 default: |
163 return false; // Unsupported source type. | 211 return false; // Unsupported source type. |
164 } | 212 } |
165 | 213 |
166 if (rule_.match_case()) | 214 if (rule_.match_case()) |
167 options_ |= flat::OptionFlag_IS_MATCH_CASE; | 215 options_ |= flat::OptionFlag_IS_MATCH_CASE; |
168 | 216 |
169 return true; | 217 return true; |
170 } | 218 } |
171 | 219 |
172 bool InitializeElementTypes() { | 220 bool InitializeElementTypes() { |
173 static_assert( | 221 static_assert(flat::ElementType_ANY <= std::numeric_limits<uint16_t>::max(), |
174 proto::ELEMENT_TYPE_ALL <= std::numeric_limits<uint16_t>::max(), | 222 "Element types can not be stored in uint16_t."); |
175 "Element types can not be stored in uint16_t."); | |
176 element_types_ = static_cast<uint16_t>(rule_.element_types()); | |
177 | 223 |
178 // Note: Normally we can not distinguish between the main plugin resource | 224 // Ensure all proto::ElementType(s) are mapped in |kElementTypeMap|. |
179 // and any other loads it makes. We treat them both as OBJECT requests. | 225 #if DCHECK_IS_ON() |
180 if (element_types_ & proto::ELEMENT_TYPE_OBJECT_SUBREQUEST) | 226 uint16_t mask = 0; |
pkalinnikov
2017/06/28 15:59:43
nit: Can you make it int? Same below for Activatio
karandeepb
2017/06/29 00:58:18
Isn't this better being a more concrete type? |ele
pkalinnikov
2017/06/29 09:19:16
I agree that *current* proto::ElementType values f
| |
181 element_types_ |= proto::ELEMENT_TYPE_OBJECT; | 227 for (const auto& pair : kElementTypeMap) |
228 mask |= pair.first; | |
229 DCHECK_EQ(proto::ELEMENT_TYPE_ALL, mask); | |
230 #endif // DCHECK_IS_ON() | |
182 | 231 |
183 // Ignore unknown element types. | 232 element_types_ = flat::ElementType_NONE; |
184 element_types_ &= proto::ELEMENT_TYPE_ALL; | 233 |
185 // Filtering popups is not supported. | 234 for (const auto& pair : kElementTypeMap) |
186 element_types_ &= ~proto::ELEMENT_TYPE_POPUP; | 235 if (rule_.element_types() & pair.first) |
236 element_types_ |= pair.second; | |
237 | |
238 // Normally we can not distinguish between the main plugin resource and any | |
239 // other loads it makes. We treat them both as OBJECT requests. Hence an | |
240 // OBJECT request would also match OBJECT_SUBREQUEST rules, but not the | |
241 // the other way round. | |
242 if (element_types_ & flat::ElementType_OBJECT_SUBREQUEST) | |
pkalinnikov
2017/06/28 15:59:42
Should we consider merging the 2 types together? S
karandeepb
2017/06/29 00:58:18
Yeah I thought of that as well. We can add DCHECKs
pkalinnikov
2017/06/29 09:19:16
Acknowledged.
| |
243 element_types_ |= flat::ElementType_OBJECT; | |
187 | 244 |
188 return true; | 245 return true; |
189 } | 246 } |
190 | 247 |
191 bool InitializeActivationTypes() { | 248 bool InitializeActivationTypes() { |
192 static_assert( | 249 static_assert( |
193 proto::ACTIVATION_TYPE_ALL <= std::numeric_limits<uint8_t>::max(), | 250 flat::ActivationType_ANY <= std::numeric_limits<uint8_t>::max(), |
194 "Activation types can not be stored in uint8_t."); | 251 "Activation types can not be stored in uint8_t."); |
195 activation_types_ = static_cast<uint8_t>(rule_.activation_types()); | |
196 | 252 |
197 // Only the following activation types are supported, ignore the others. | 253 // Ensure all proto::ActivationType(s) are mapped in |kActivationTypeMap|. |
198 activation_types_ &= | 254 #if DCHECK_IS_ON() |
199 proto::ACTIVATION_TYPE_DOCUMENT | proto::ACTIVATION_TYPE_GENERICBLOCK; | 255 uint16_t mask = 0; |
256 for (const auto& pair : kActivationTypeMap) | |
257 mask |= pair.first; | |
258 DCHECK_EQ(proto::ACTIVATION_TYPE_ALL, mask); | |
259 #endif // DCHECK_IS_ON() | |
260 | |
261 activation_types_ = flat::ActivationType_NONE; | |
262 | |
263 for (const auto& pair : kActivationTypeMap) | |
264 if (rule_.activation_types() & pair.first) | |
265 activation_types_ |= pair.second; | |
200 | 266 |
201 return true; | 267 return true; |
202 } | 268 } |
203 | 269 |
204 bool InitializeUrlPattern() { | 270 bool InitializeUrlPattern() { |
205 switch (rule_.url_pattern_type()) { | 271 switch (rule_.url_pattern_type()) { |
206 case proto::URL_PATTERN_TYPE_SUBSTRING: | 272 case proto::URL_PATTERN_TYPE_SUBSTRING: |
207 url_pattern_type_ = flat::UrlPatternType_SUBSTRING; | 273 url_pattern_type_ = flat::UrlPatternType_SUBSTRING; |
208 break; | 274 break; |
209 case proto::URL_PATTERN_TYPE_WILDCARDED: | 275 case proto::URL_PATTERN_TYPE_WILDCARDED: |
(...skipping 213 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
423 } | 489 } |
424 if (longest_matching_included_domain_length && rule.domains_excluded()) { | 490 if (longest_matching_included_domain_length && rule.domains_excluded()) { |
425 return GetLongestMatchingSubdomain(origin, *rule.domains_excluded()) < | 491 return GetLongestMatchingSubdomain(origin, *rule.domains_excluded()) < |
426 longest_matching_included_domain_length; | 492 longest_matching_included_domain_length; |
427 } | 493 } |
428 return !!longest_matching_included_domain_length; | 494 return !!longest_matching_included_domain_length; |
429 } | 495 } |
430 | 496 |
431 // Returns whether the request matches flags of the specified URL |rule|. Takes | 497 // Returns whether the request matches flags of the specified URL |rule|. Takes |
432 // into account: | 498 // into account: |
433 // - |element_type| of the requested resource, if not *_UNSPECIFIED. | 499 // - |element_type| of the requested resource, if not *_NONE. |
434 // - |activation_type| for a subdocument request, if not *_UNSPECIFIED. | 500 // - |activation_type| for a subdocument request, if not *_NONE. |
435 // - Whether the resource |is_third_party| w.r.t. its embedding document. | 501 // - Whether the resource |is_third_party| w.r.t. its embedding document. |
436 bool DoesRuleFlagsMatch(const flat::UrlRule& rule, | 502 bool DoesRuleFlagsMatch(const flat::UrlRule& rule, |
437 proto::ElementType element_type, | 503 flat::ElementType element_type, |
438 proto::ActivationType activation_type, | 504 flat::ActivationType activation_type, |
439 bool is_third_party) { | 505 bool is_third_party) { |
440 DCHECK((element_type == proto::ELEMENT_TYPE_UNSPECIFIED) != | 506 DCHECK((element_type == flat::ElementType_NONE) != |
441 (activation_type == proto::ACTIVATION_TYPE_UNSPECIFIED)); | 507 (activation_type == flat::ActivationType_NONE)); |
442 | 508 |
443 if (element_type != proto::ELEMENT_TYPE_UNSPECIFIED && | 509 if (element_type != flat::ElementType_NONE && |
444 !(rule.element_types() & element_type)) { | 510 !(rule.element_types() & element_type)) { |
445 return false; | 511 return false; |
446 } | 512 } |
447 if (activation_type != proto::ACTIVATION_TYPE_UNSPECIFIED && | 513 if (activation_type != flat::ActivationType_NONE && |
448 !(rule.activation_types() & activation_type)) { | 514 !(rule.activation_types() & activation_type)) { |
449 return false; | 515 return false; |
450 } | 516 } |
451 | 517 |
452 if (is_third_party && | 518 if (is_third_party && |
453 !(rule.options() & flat::OptionFlag_APPLIES_TO_THIRD_PARTY)) { | 519 !(rule.options() & flat::OptionFlag_APPLIES_TO_THIRD_PARTY)) { |
454 return false; | 520 return false; |
455 } | 521 } |
456 if (!is_third_party && | 522 if (!is_third_party && |
457 !(rule.options() & flat::OptionFlag_APPLIES_TO_FIRST_PARTY)) { | 523 !(rule.options() & flat::OptionFlag_APPLIES_TO_FIRST_PARTY)) { |
458 return false; | 524 return false; |
459 } | 525 } |
460 | 526 |
461 return true; | 527 return true; |
462 } | 528 } |
463 | 529 |
464 const flat::UrlRule* FindMatchAmongCandidates( | 530 const flat::UrlRule* FindMatchAmongCandidates( |
465 const FlatUrlRuleList* candidates, | 531 const FlatUrlRuleList* candidates, |
466 const GURL& url, | 532 const GURL& url, |
467 const url::Origin& document_origin, | 533 const url::Origin& document_origin, |
468 proto::ElementType element_type, | 534 flat::ElementType element_type, |
469 proto::ActivationType activation_type, | 535 flat::ActivationType activation_type, |
470 bool is_third_party, | 536 bool is_third_party, |
471 bool disable_generic_rules) { | 537 bool disable_generic_rules) { |
472 if (!candidates) | 538 if (!candidates) |
473 return nullptr; | 539 return nullptr; |
474 for (const flat::UrlRule* rule : *candidates) { | 540 for (const flat::UrlRule* rule : *candidates) { |
475 DCHECK_NE(rule, nullptr); | 541 DCHECK_NE(rule, nullptr); |
476 DCHECK_NE(rule->url_pattern_type(), flat::UrlPatternType_REGEXP); | 542 DCHECK_NE(rule->url_pattern_type(), flat::UrlPatternType_REGEXP); |
477 if (!DoesRuleFlagsMatch(*rule, element_type, activation_type, | 543 if (!DoesRuleFlagsMatch(*rule, element_type, activation_type, |
478 is_third_party)) { | 544 is_third_party)) { |
479 continue; | 545 continue; |
(...skipping 10 matching lines...) Expand all Loading... | |
490 return nullptr; | 556 return nullptr; |
491 } | 557 } |
492 | 558 |
493 // Returns whether the network request matches a UrlPattern |index| represented | 559 // Returns whether the network request matches a UrlPattern |index| represented |
494 // in its FlatBuffers format. |is_third_party| should reflect the relation | 560 // in its FlatBuffers format. |is_third_party| should reflect the relation |
495 // between |url| and |document_origin|. | 561 // between |url| and |document_origin|. |
496 const flat::UrlRule* FindMatchInFlatUrlPatternIndex( | 562 const flat::UrlRule* FindMatchInFlatUrlPatternIndex( |
497 const flat::UrlPatternIndex& index, | 563 const flat::UrlPatternIndex& index, |
498 const GURL& url, | 564 const GURL& url, |
499 const url::Origin& document_origin, | 565 const url::Origin& document_origin, |
500 proto::ElementType element_type, | 566 flat::ElementType element_type, |
501 proto::ActivationType activation_type, | 567 flat::ActivationType activation_type, |
502 bool is_third_party, | 568 bool is_third_party, |
503 bool disable_generic_rules) { | 569 bool disable_generic_rules) { |
504 const FlatNGramIndex* hash_table = index.ngram_index(); | 570 const FlatNGramIndex* hash_table = index.ngram_index(); |
505 const flat::NGramToRules* empty_slot = index.ngram_index_empty_slot(); | 571 const flat::NGramToRules* empty_slot = index.ngram_index_empty_slot(); |
506 DCHECK_NE(hash_table, nullptr); | 572 DCHECK_NE(hash_table, nullptr); |
507 | 573 |
508 NGramHashTableProber prober; | 574 NGramHashTableProber prober; |
509 | 575 |
510 auto ngrams = CreateNGramExtractor<kNGramSize, uint64_t>( | 576 auto ngrams = CreateNGramExtractor<kNGramSize, uint64_t>( |
511 url.spec(), [](char) { return false; }); | 577 url.spec(), [](char) { return false; }); |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
545 | 611 |
546 UrlPatternIndexMatcher::~UrlPatternIndexMatcher() = default; | 612 UrlPatternIndexMatcher::~UrlPatternIndexMatcher() = default; |
547 | 613 |
548 const flat::UrlRule* UrlPatternIndexMatcher::FindMatch( | 614 const flat::UrlRule* UrlPatternIndexMatcher::FindMatch( |
549 const GURL& url, | 615 const GURL& url, |
550 const url::Origin& first_party_origin, | 616 const url::Origin& first_party_origin, |
551 proto::ElementType element_type, | 617 proto::ElementType element_type, |
552 proto::ActivationType activation_type, | 618 proto::ActivationType activation_type, |
553 bool is_third_party, | 619 bool is_third_party, |
554 bool disable_generic_rules) const { | 620 bool disable_generic_rules) const { |
621 return FindMatch(url, first_party_origin, | |
622 ProtoToFlatElementType(element_type), | |
623 ProtoToFlatActivationType(activation_type), is_third_party, | |
624 disable_generic_rules); | |
625 } | |
626 | |
627 const flat::UrlRule* UrlPatternIndexMatcher::FindMatch( | |
628 const GURL& url, | |
629 const url::Origin& first_party_origin, | |
630 flat::ElementType element_type, | |
631 flat::ActivationType activation_type, | |
632 bool is_third_party, | |
633 bool disable_generic_rules) const { | |
555 if (!flat_index_ || !url.is_valid()) | 634 if (!flat_index_ || !url.is_valid()) |
556 return nullptr; | 635 return nullptr; |
557 if ((element_type == proto::ELEMENT_TYPE_UNSPECIFIED) == | 636 if ((element_type == flat::ElementType_NONE) == |
558 (activation_type == proto::ACTIVATION_TYPE_UNSPECIFIED)) { | 637 (activation_type == flat::ActivationType_NONE)) { |
559 return nullptr; | 638 return nullptr; |
560 } | 639 } |
561 | 640 |
562 return FindMatchInFlatUrlPatternIndex(*flat_index_, url, first_party_origin, | 641 return FindMatchInFlatUrlPatternIndex(*flat_index_, url, first_party_origin, |
563 element_type, activation_type, | 642 element_type, activation_type, |
564 is_third_party, disable_generic_rules); | 643 is_third_party, disable_generic_rules); |
565 } | 644 } |
566 | 645 |
567 } // namespace url_pattern_index | 646 } // namespace url_pattern_index |
OLD | NEW |