Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(57)

Side by Side Diff: components/url_pattern_index/url_pattern_index.cc

Issue 2954613002: UrlPatternIndex: Introduce ElementType and ActivationType enums to url_pattern_index schema. (Closed)
Patch Set: Address review. Created 3 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « components/url_pattern_index/url_pattern_index.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2017 The Chromium Authors. All rights reserved. 1 // Copyright 2017 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/url_pattern_index/url_pattern_index.h" 5 #include "components/url_pattern_index/url_pattern_index.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <limits> 8 #include <limits>
9 #include <string> 9 #include <string>
10 10
11 #include "base/containers/flat_map.h"
11 #include "base/logging.h" 12 #include "base/logging.h"
12 #include "base/numerics/safe_conversions.h" 13 #include "base/numerics/safe_conversions.h"
13 #include "base/strings/string_piece.h" 14 #include "base/strings/string_piece.h"
14 #include "base/strings/string_util.h" 15 #include "base/strings/string_util.h"
15 #include "components/url_pattern_index/ngram_extractor.h" 16 #include "components/url_pattern_index/ngram_extractor.h"
16 #include "components/url_pattern_index/url_pattern.h" 17 #include "components/url_pattern_index/url_pattern.h"
17 #include "url/gurl.h" 18 #include "url/gurl.h"
18 #include "url/origin.h" 19 #include "url/origin.h"
19 20
20 namespace url_pattern_index { 21 namespace url_pattern_index {
21 22
22 namespace { 23 namespace {
23 24
24 using FlatStringOffset = flatbuffers::Offset<flatbuffers::String>; 25 using FlatStringOffset = flatbuffers::Offset<flatbuffers::String>;
25 using FlatDomains = flatbuffers::Vector<FlatStringOffset>; 26 using FlatDomains = flatbuffers::Vector<FlatStringOffset>;
26 using FlatDomainsOffset = flatbuffers::Offset<FlatDomains>; 27 using FlatDomainsOffset = flatbuffers::Offset<FlatDomains>;
27 28
29 // Maps proto::ActivationType to flat::ActivationType.
30 const base::flat_map<proto::ActivationType, flat::ActivationType>
31 kActivationTypeMap(
32 {
33 {proto::ACTIVATION_TYPE_UNSPECIFIED, flat::ActivationType_NONE},
34 {proto::ACTIVATION_TYPE_DOCUMENT, flat::ActivationType_DOCUMENT},
35 // ELEMHIDE is not supported.
36 {proto::ACTIVATION_TYPE_ELEMHIDE, flat::ActivationType_NONE},
37 // GENERICHIDE is not supported.
38 {proto::ACTIVATION_TYPE_GENERICHIDE, flat::ActivationType_NONE},
39 {proto::ACTIVATION_TYPE_GENERICBLOCK,
40 flat::ActivationType_GENERIC_BLOCK},
41 },
42 base::KEEP_FIRST_OF_DUPES);
43
44 // Maps proto::ElementType to flat::ElementType.
45 const base::flat_map<proto::ElementType, flat::ElementType> kElementTypeMap(
46 {
47 {proto::ELEMENT_TYPE_UNSPECIFIED, flat::ElementType_NONE},
48 {proto::ELEMENT_TYPE_OTHER, flat::ElementType_OTHER},
49 {proto::ELEMENT_TYPE_SCRIPT, flat::ElementType_SCRIPT},
50 {proto::ELEMENT_TYPE_IMAGE, flat::ElementType_IMAGE},
51 {proto::ELEMENT_TYPE_STYLESHEET, flat::ElementType_STYLESHEET},
52 {proto::ELEMENT_TYPE_OBJECT, flat::ElementType_OBJECT},
53 {proto::ELEMENT_TYPE_XMLHTTPREQUEST, flat::ElementType_XMLHTTPREQUEST},
54 {proto::ELEMENT_TYPE_OBJECT_SUBREQUEST,
55 flat::ElementType_OBJECT_SUBREQUEST},
56 {proto::ELEMENT_TYPE_SUBDOCUMENT, flat::ElementType_SUBDOCUMENT},
57 {proto::ELEMENT_TYPE_PING, flat::ElementType_PING},
58 {proto::ELEMENT_TYPE_MEDIA, flat::ElementType_MEDIA},
59 {proto::ELEMENT_TYPE_FONT, flat::ElementType_FONT},
60 // Filterning popups is not supported.
61 {proto::ELEMENT_TYPE_POPUP, flat::ElementType_NONE},
62 {proto::ELEMENT_TYPE_WEBSOCKET, flat::ElementType_WEBSOCKET},
63 },
64 base::KEEP_FIRST_OF_DUPES);
65
66 flat::ActivationType ProtoToFlatActivationType(proto::ActivationType type) {
67 const auto it = kActivationTypeMap.find(type);
68 DCHECK(it != kActivationTypeMap.end());
69 return it->second;
70 }
71
72 flat::ElementType ProtoToFlatElementType(proto::ElementType type) {
73 const auto it = kElementTypeMap.find(type);
74 DCHECK(it != kElementTypeMap.end());
75 return it->second;
76 }
77
28 base::StringPiece ToStringPiece(const flatbuffers::String* string) { 78 base::StringPiece ToStringPiece(const flatbuffers::String* string) {
29 DCHECK(string); 79 DCHECK(string);
30 return base::StringPiece(string->c_str(), string->size()); 80 return base::StringPiece(string->c_str(), string->size());
31 } 81 }
32 82
33 // Performs three-way comparison between two domains. In the total order defined 83 // Performs three-way comparison between two domains. In the total order defined
34 // by this predicate, the lengths of domains will be monotonically decreasing. 84 // by this predicate, the lengths of domains will be monotonically decreasing.
35 int CompareDomains(base::StringPiece lhs_domain, base::StringPiece rhs_domain) { 85 int CompareDomains(base::StringPiece lhs_domain, base::StringPiece rhs_domain) {
36 if (lhs_domain.size() != rhs_domain.size()) 86 if (lhs_domain.size() != rhs_domain.size())
37 return lhs_domain.size() > rhs_domain.size() ? -1 : 1; 87 return lhs_domain.size() > rhs_domain.size() ? -1 : 1;
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after
135 case proto::ANCHOR_TYPE_SUBDOMAIN: 185 case proto::ANCHOR_TYPE_SUBDOMAIN:
136 *result = flat::AnchorType_SUBDOMAIN; 186 *result = flat::AnchorType_SUBDOMAIN;
137 break; 187 break;
138 default: 188 default:
139 return false; // Unsupported anchor type. 189 return false; // Unsupported anchor type.
140 } 190 }
141 return true; 191 return true;
142 } 192 }
143 193
144 bool InitializeOptions() { 194 bool InitializeOptions() {
195 static_assert(flat::OptionFlag_ANY <= std::numeric_limits<uint8_t>::max(),
196 "Option flags can not be stored in uint8_t.");
197
145 if (rule_.semantics() == proto::RULE_SEMANTICS_WHITELIST) { 198 if (rule_.semantics() == proto::RULE_SEMANTICS_WHITELIST) {
146 options_ |= flat::OptionFlag_IS_WHITELIST; 199 options_ |= flat::OptionFlag_IS_WHITELIST;
147 } else if (rule_.semantics() != proto::RULE_SEMANTICS_BLACKLIST) { 200 } else if (rule_.semantics() != proto::RULE_SEMANTICS_BLACKLIST) {
148 return false; // Unsupported semantics. 201 return false; // Unsupported semantics.
149 } 202 }
150 203
151 switch (rule_.source_type()) { 204 switch (rule_.source_type()) {
152 case proto::SOURCE_TYPE_ANY: 205 case proto::SOURCE_TYPE_ANY:
153 options_ |= flat::OptionFlag_APPLIES_TO_THIRD_PARTY; 206 options_ |= flat::OptionFlag_APPLIES_TO_THIRD_PARTY;
154 // Note: fall through here intentionally. 207 // Note: fall through here intentionally.
155 case proto::SOURCE_TYPE_FIRST_PARTY: 208 case proto::SOURCE_TYPE_FIRST_PARTY:
156 options_ |= flat::OptionFlag_APPLIES_TO_FIRST_PARTY; 209 options_ |= flat::OptionFlag_APPLIES_TO_FIRST_PARTY;
157 break; 210 break;
158 case proto::SOURCE_TYPE_THIRD_PARTY: 211 case proto::SOURCE_TYPE_THIRD_PARTY:
159 options_ |= flat::OptionFlag_APPLIES_TO_THIRD_PARTY; 212 options_ |= flat::OptionFlag_APPLIES_TO_THIRD_PARTY;
160 break; 213 break;
161 214
162 default: 215 default:
163 return false; // Unsupported source type. 216 return false; // Unsupported source type.
164 } 217 }
165 218
166 if (rule_.match_case()) 219 if (rule_.match_case())
167 options_ |= flat::OptionFlag_IS_MATCH_CASE; 220 options_ |= flat::OptionFlag_IS_MATCH_CASE;
168 221
169 return true; 222 return true;
170 } 223 }
171 224
172 bool InitializeElementTypes() { 225 bool InitializeElementTypes() {
173 static_assert( 226 static_assert(flat::ElementType_ANY <= std::numeric_limits<uint16_t>::max(),
174 proto::ELEMENT_TYPE_ALL <= std::numeric_limits<uint16_t>::max(), 227 "Element types can not be stored in uint16_t.");
175 "Element types can not be stored in uint16_t.");
176 element_types_ = static_cast<uint16_t>(rule_.element_types());
177 228
178 // Note: Normally we can not distinguish between the main plugin resource 229 // Ensure all proto::ElementType(s) are mapped in |kElementTypeMap|.
179 // and any other loads it makes. We treat them both as OBJECT requests. 230 #if DCHECK_IS_ON()
180 if (element_types_ & proto::ELEMENT_TYPE_OBJECT_SUBREQUEST) 231 uint16_t mask = 0;
181 element_types_ |= proto::ELEMENT_TYPE_OBJECT; 232 for (const auto& pair : kElementTypeMap)
233 mask |= pair.first;
234 DCHECK_EQ(proto::ELEMENT_TYPE_ALL, mask);
235 #endif // DCHECK_IS_ON()
182 236
183 // Ignore unknown element types. 237 element_types_ = flat::ElementType_NONE;
184 element_types_ &= proto::ELEMENT_TYPE_ALL; 238
185 // Filtering popups is not supported. 239 for (const auto& pair : kElementTypeMap)
186 element_types_ &= ~proto::ELEMENT_TYPE_POPUP; 240 if (rule_.element_types() & pair.first)
241 element_types_ |= pair.second;
242
243 // Normally we can not distinguish between the main plugin resource and any
244 // other loads it makes. We treat them both as OBJECT requests. Hence an
245 // OBJECT request would also match OBJECT_SUBREQUEST rules, but not the
246 // the other way round.
247 if (element_types_ & flat::ElementType_OBJECT_SUBREQUEST)
248 element_types_ |= flat::ElementType_OBJECT;
187 249
188 return true; 250 return true;
189 } 251 }
190 252
191 bool InitializeActivationTypes() { 253 bool InitializeActivationTypes() {
192 static_assert( 254 static_assert(
193 proto::ACTIVATION_TYPE_ALL <= std::numeric_limits<uint8_t>::max(), 255 flat::ActivationType_ANY <= std::numeric_limits<uint8_t>::max(),
194 "Activation types can not be stored in uint8_t."); 256 "Activation types can not be stored in uint8_t.");
195 activation_types_ = static_cast<uint8_t>(rule_.activation_types());
196 257
197 // Only the following activation types are supported, ignore the others. 258 // Ensure all proto::ActivationType(s) are mapped in |kActivationTypeMap|.
198 activation_types_ &= 259 #if DCHECK_IS_ON()
199 proto::ACTIVATION_TYPE_DOCUMENT | proto::ACTIVATION_TYPE_GENERICBLOCK; 260 uint16_t mask = 0;
261 for (const auto& pair : kActivationTypeMap)
262 mask |= pair.first;
263 DCHECK_EQ(proto::ACTIVATION_TYPE_ALL, mask);
264 #endif // DCHECK_IS_ON()
265
266 activation_types_ = flat::ActivationType_NONE;
267
268 for (const auto& pair : kActivationTypeMap)
269 if (rule_.activation_types() & pair.first)
270 activation_types_ |= pair.second;
200 271
201 return true; 272 return true;
202 } 273 }
203 274
204 bool InitializeUrlPattern() { 275 bool InitializeUrlPattern() {
205 switch (rule_.url_pattern_type()) { 276 switch (rule_.url_pattern_type()) {
206 case proto::URL_PATTERN_TYPE_SUBSTRING: 277 case proto::URL_PATTERN_TYPE_SUBSTRING:
207 url_pattern_type_ = flat::UrlPatternType_SUBSTRING; 278 url_pattern_type_ = flat::UrlPatternType_SUBSTRING;
208 break; 279 break;
209 case proto::URL_PATTERN_TYPE_WILDCARDED: 280 case proto::URL_PATTERN_TYPE_WILDCARDED:
(...skipping 213 matching lines...) Expand 10 before | Expand all | Expand 10 after
423 } 494 }
424 if (longest_matching_included_domain_length && rule.domains_excluded()) { 495 if (longest_matching_included_domain_length && rule.domains_excluded()) {
425 return GetLongestMatchingSubdomain(origin, *rule.domains_excluded()) < 496 return GetLongestMatchingSubdomain(origin, *rule.domains_excluded()) <
426 longest_matching_included_domain_length; 497 longest_matching_included_domain_length;
427 } 498 }
428 return !!longest_matching_included_domain_length; 499 return !!longest_matching_included_domain_length;
429 } 500 }
430 501
431 // Returns whether the request matches flags of the specified URL |rule|. Takes 502 // Returns whether the request matches flags of the specified URL |rule|. Takes
432 // into account: 503 // into account:
433 // - |element_type| of the requested resource, if not *_UNSPECIFIED. 504 // - |element_type| of the requested resource, if not *_NONE.
434 // - |activation_type| for a subdocument request, if not *_UNSPECIFIED. 505 // - |activation_type| for a subdocument request, if not *_NONE.
435 // - Whether the resource |is_third_party| w.r.t. its embedding document. 506 // - Whether the resource |is_third_party| w.r.t. its embedding document.
436 bool DoesRuleFlagsMatch(const flat::UrlRule& rule, 507 bool DoesRuleFlagsMatch(const flat::UrlRule& rule,
437 proto::ElementType element_type, 508 flat::ElementType element_type,
438 proto::ActivationType activation_type, 509 flat::ActivationType activation_type,
439 bool is_third_party) { 510 bool is_third_party) {
440 DCHECK((element_type == proto::ELEMENT_TYPE_UNSPECIFIED) != 511 DCHECK((element_type == flat::ElementType_NONE) !=
441 (activation_type == proto::ACTIVATION_TYPE_UNSPECIFIED)); 512 (activation_type == flat::ActivationType_NONE));
442 513
443 if (element_type != proto::ELEMENT_TYPE_UNSPECIFIED && 514 if (element_type != flat::ElementType_NONE &&
444 !(rule.element_types() & element_type)) { 515 !(rule.element_types() & element_type)) {
445 return false; 516 return false;
446 } 517 }
447 if (activation_type != proto::ACTIVATION_TYPE_UNSPECIFIED && 518 if (activation_type != flat::ActivationType_NONE &&
448 !(rule.activation_types() & activation_type)) { 519 !(rule.activation_types() & activation_type)) {
449 return false; 520 return false;
450 } 521 }
451 522
452 if (is_third_party && 523 if (is_third_party &&
453 !(rule.options() & flat::OptionFlag_APPLIES_TO_THIRD_PARTY)) { 524 !(rule.options() & flat::OptionFlag_APPLIES_TO_THIRD_PARTY)) {
454 return false; 525 return false;
455 } 526 }
456 if (!is_third_party && 527 if (!is_third_party &&
457 !(rule.options() & flat::OptionFlag_APPLIES_TO_FIRST_PARTY)) { 528 !(rule.options() & flat::OptionFlag_APPLIES_TO_FIRST_PARTY)) {
458 return false; 529 return false;
459 } 530 }
460 531
461 return true; 532 return true;
462 } 533 }
463 534
464 const flat::UrlRule* FindMatchAmongCandidates( 535 const flat::UrlRule* FindMatchAmongCandidates(
465 const FlatUrlRuleList* candidates, 536 const FlatUrlRuleList* candidates,
466 const GURL& url, 537 const GURL& url,
467 const url::Origin& document_origin, 538 const url::Origin& document_origin,
468 proto::ElementType element_type, 539 flat::ElementType element_type,
469 proto::ActivationType activation_type, 540 flat::ActivationType activation_type,
470 bool is_third_party, 541 bool is_third_party,
471 bool disable_generic_rules) { 542 bool disable_generic_rules) {
472 if (!candidates) 543 if (!candidates)
473 return nullptr; 544 return nullptr;
474 for (const flat::UrlRule* rule : *candidates) { 545 for (const flat::UrlRule* rule : *candidates) {
475 DCHECK_NE(rule, nullptr); 546 DCHECK_NE(rule, nullptr);
476 DCHECK_NE(rule->url_pattern_type(), flat::UrlPatternType_REGEXP); 547 DCHECK_NE(rule->url_pattern_type(), flat::UrlPatternType_REGEXP);
477 if (!DoesRuleFlagsMatch(*rule, element_type, activation_type, 548 if (!DoesRuleFlagsMatch(*rule, element_type, activation_type,
478 is_third_party)) { 549 is_third_party)) {
479 continue; 550 continue;
(...skipping 10 matching lines...) Expand all
490 return nullptr; 561 return nullptr;
491 } 562 }
492 563
493 // Returns whether the network request matches a UrlPattern |index| represented 564 // Returns whether the network request matches a UrlPattern |index| represented
494 // in its FlatBuffers format. |is_third_party| should reflect the relation 565 // in its FlatBuffers format. |is_third_party| should reflect the relation
495 // between |url| and |document_origin|. 566 // between |url| and |document_origin|.
496 const flat::UrlRule* FindMatchInFlatUrlPatternIndex( 567 const flat::UrlRule* FindMatchInFlatUrlPatternIndex(
497 const flat::UrlPatternIndex& index, 568 const flat::UrlPatternIndex& index,
498 const GURL& url, 569 const GURL& url,
499 const url::Origin& document_origin, 570 const url::Origin& document_origin,
500 proto::ElementType element_type, 571 flat::ElementType element_type,
501 proto::ActivationType activation_type, 572 flat::ActivationType activation_type,
502 bool is_third_party, 573 bool is_third_party,
503 bool disable_generic_rules) { 574 bool disable_generic_rules) {
504 const FlatNGramIndex* hash_table = index.ngram_index(); 575 const FlatNGramIndex* hash_table = index.ngram_index();
505 const flat::NGramToRules* empty_slot = index.ngram_index_empty_slot(); 576 const flat::NGramToRules* empty_slot = index.ngram_index_empty_slot();
506 DCHECK_NE(hash_table, nullptr); 577 DCHECK_NE(hash_table, nullptr);
507 578
508 NGramHashTableProber prober; 579 NGramHashTableProber prober;
509 580
510 auto ngrams = CreateNGramExtractor<kNGramSize, uint64_t>( 581 auto ngrams = CreateNGramExtractor<kNGramSize, uint64_t>(
511 url.spec(), [](char) { return false; }); 582 url.spec(), [](char) { return false; });
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
545 616
546 UrlPatternIndexMatcher::~UrlPatternIndexMatcher() = default; 617 UrlPatternIndexMatcher::~UrlPatternIndexMatcher() = default;
547 618
548 const flat::UrlRule* UrlPatternIndexMatcher::FindMatch( 619 const flat::UrlRule* UrlPatternIndexMatcher::FindMatch(
549 const GURL& url, 620 const GURL& url,
550 const url::Origin& first_party_origin, 621 const url::Origin& first_party_origin,
551 proto::ElementType element_type, 622 proto::ElementType element_type,
552 proto::ActivationType activation_type, 623 proto::ActivationType activation_type,
553 bool is_third_party, 624 bool is_third_party,
554 bool disable_generic_rules) const { 625 bool disable_generic_rules) const {
626 return FindMatch(url, first_party_origin,
627 ProtoToFlatElementType(element_type),
628 ProtoToFlatActivationType(activation_type), is_third_party,
629 disable_generic_rules);
630 }
631
632 const flat::UrlRule* UrlPatternIndexMatcher::FindMatch(
633 const GURL& url,
634 const url::Origin& first_party_origin,
635 flat::ElementType element_type,
636 flat::ActivationType activation_type,
637 bool is_third_party,
638 bool disable_generic_rules) const {
555 if (!flat_index_ || !url.is_valid()) 639 if (!flat_index_ || !url.is_valid())
556 return nullptr; 640 return nullptr;
557 if ((element_type == proto::ELEMENT_TYPE_UNSPECIFIED) == 641 if ((element_type == flat::ElementType_NONE) ==
558 (activation_type == proto::ACTIVATION_TYPE_UNSPECIFIED)) { 642 (activation_type == flat::ActivationType_NONE)) {
559 return nullptr; 643 return nullptr;
560 } 644 }
561 645
562 return FindMatchInFlatUrlPatternIndex(*flat_index_, url, first_party_origin, 646 return FindMatchInFlatUrlPatternIndex(*flat_index_, url, first_party_origin,
563 element_type, activation_type, 647 element_type, activation_type,
564 is_third_party, disable_generic_rules); 648 is_third_party, disable_generic_rules);
565 } 649 }
566 650
567 } // namespace url_pattern_index 651 } // namespace url_pattern_index
OLDNEW
« no previous file with comments | « components/url_pattern_index/url_pattern_index.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698