Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(259)

Side by Side Diff: components/url_pattern_index/url_pattern_index.cc

Issue 2954613002: UrlPatternIndex: Introduce ElementType and ActivationType enums to url_pattern_index schema. (Closed)
Patch Set: Format Created 3 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2017 The Chromium Authors. All rights reserved. 1 // Copyright 2017 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/url_pattern_index/url_pattern_index.h" 5 #include "components/url_pattern_index/url_pattern_index.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <limits> 8 #include <limits>
9 #include <map>
9 #include <string> 10 #include <string>
10 11
11 #include "base/logging.h" 12 #include "base/logging.h"
12 #include "base/numerics/safe_conversions.h" 13 #include "base/numerics/safe_conversions.h"
14 #include "base/stl_util.h"
13 #include "base/strings/string_piece.h" 15 #include "base/strings/string_piece.h"
14 #include "base/strings/string_util.h" 16 #include "base/strings/string_util.h"
15 #include "components/url_pattern_index/ngram_extractor.h" 17 #include "components/url_pattern_index/ngram_extractor.h"
16 #include "components/url_pattern_index/url_pattern.h" 18 #include "components/url_pattern_index/url_pattern.h"
17 #include "url/gurl.h" 19 #include "url/gurl.h"
18 #include "url/origin.h" 20 #include "url/origin.h"
19 21
20 namespace url_pattern_index { 22 namespace url_pattern_index {
21 23
22 namespace { 24 namespace {
23 25
24 using FlatStringOffset = flatbuffers::Offset<flatbuffers::String>; 26 using FlatStringOffset = flatbuffers::Offset<flatbuffers::String>;
25 using FlatDomains = flatbuffers::Vector<FlatStringOffset>; 27 using FlatDomains = flatbuffers::Vector<FlatStringOffset>;
26 using FlatDomainsOffset = flatbuffers::Offset<FlatDomains>; 28 using FlatDomainsOffset = flatbuffers::Offset<FlatDomains>;
27 29
30 // Maps proto::ActivationType to flat::ActivationType.
31 const std::map<proto::ActivationType, flat::ActivationType> kActivationTypeMap =
pkalinnikov 2017/06/28 15:59:43 For performance reasons (queries should be fast),
karandeepb 2017/06/29 00:58:18 I preferred to use a flat map, since I think it le
pkalinnikov 2017/06/29 09:19:16 Acknowledged.
32 {
33 {proto::ACTIVATION_TYPE_UNSPECIFIED, flat::ActivationType_NONE},
34 {proto::ACTIVATION_TYPE_DOCUMENT, flat::ActivationType_DOCUMENT},
35 // ELEMHIDE is not supported.
36 {proto::ACTIVATION_TYPE_ELEMHIDE, flat::ActivationType_NONE},
37 // GENERICHIDE is not supported.
38 {proto::ACTIVATION_TYPE_GENERICHIDE, flat::ActivationType_NONE},
39 {proto::ACTIVATION_TYPE_GENERICBLOCK,
40 flat::ActivationType_GENERIC_BLOCK},
41 };
42
43 // Maps proto::ElementType to flat::ElementType.
44 const std::map<proto::ElementType, flat::ElementType> kElementTypeMap = {
45 {proto::ELEMENT_TYPE_UNSPECIFIED, flat::ElementType_NONE},
46 {proto::ELEMENT_TYPE_OTHER, flat::ElementType_OTHER},
47 {proto::ELEMENT_TYPE_SCRIPT, flat::ElementType_SCRIPT},
48 {proto::ELEMENT_TYPE_IMAGE, flat::ElementType_IMAGE},
49 {proto::ELEMENT_TYPE_STYLESHEET, flat::ElementType_STYLESHEET},
50 {proto::ELEMENT_TYPE_OBJECT, flat::ElementType_OBJECT},
51 {proto::ELEMENT_TYPE_XMLHTTPREQUEST, flat::ElementType_XMLHTTPREQUEST},
52 {proto::ELEMENT_TYPE_OBJECT_SUBREQUEST,
53 flat::ElementType_OBJECT_SUBREQUEST},
54 {proto::ELEMENT_TYPE_SUBDOCUMENT, flat::ElementType_SUBDOCUMENT},
55 {proto::ELEMENT_TYPE_PING, flat::ElementType_PING},
56 {proto::ELEMENT_TYPE_MEDIA, flat::ElementType_MEDIA},
57 {proto::ELEMENT_TYPE_FONT, flat::ElementType_FONT},
58 // Filterning popups is not supported.
59 {proto::ELEMENT_TYPE_POPUP, flat::ElementType_NONE},
60 {proto::ELEMENT_TYPE_WEBSOCKET, flat::ElementType_WEBSOCKET},
61 };
62
63 flat::ActivationType ProtoToFlatActivationType(proto::ActivationType type) {
64 DCHECK(base::ContainsKey(kActivationTypeMap, type));
65 return kActivationTypeMap.at(type);
66 }
67
68 flat::ElementType ProtoToFlatElementType(proto::ElementType type) {
69 DCHECK(base::ContainsKey(kElementTypeMap, type));
70 return kElementTypeMap.at(type);
71 }
72
28 base::StringPiece ToStringPiece(const flatbuffers::String* string) { 73 base::StringPiece ToStringPiece(const flatbuffers::String* string) {
29 DCHECK(string); 74 DCHECK(string);
30 return base::StringPiece(string->c_str(), string->size()); 75 return base::StringPiece(string->c_str(), string->size());
31 } 76 }
32 77
33 // Performs three-way comparison between two domains. In the total order defined 78 // Performs three-way comparison between two domains. In the total order defined
34 // by this predicate, the lengths of domains will be monotonically decreasing. 79 // by this predicate, the lengths of domains will be monotonically decreasing.
35 int CompareDomains(base::StringPiece lhs_domain, base::StringPiece rhs_domain) { 80 int CompareDomains(base::StringPiece lhs_domain, base::StringPiece rhs_domain) {
36 if (lhs_domain.size() != rhs_domain.size()) 81 if (lhs_domain.size() != rhs_domain.size())
37 return lhs_domain.size() > rhs_domain.size() ? -1 : 1; 82 return lhs_domain.size() > rhs_domain.size() ? -1 : 1;
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after
135 case proto::ANCHOR_TYPE_SUBDOMAIN: 180 case proto::ANCHOR_TYPE_SUBDOMAIN:
136 *result = flat::AnchorType_SUBDOMAIN; 181 *result = flat::AnchorType_SUBDOMAIN;
137 break; 182 break;
138 default: 183 default:
139 return false; // Unsupported anchor type. 184 return false; // Unsupported anchor type.
140 } 185 }
141 return true; 186 return true;
142 } 187 }
143 188
144 bool InitializeOptions() { 189 bool InitializeOptions() {
190 static_assert(flat::OptionFlag_ANY <= std::numeric_limits<uint8_t>::max(),
191 "Option flags can not be stored in uint8_t.");
192
145 if (rule_.semantics() == proto::RULE_SEMANTICS_WHITELIST) { 193 if (rule_.semantics() == proto::RULE_SEMANTICS_WHITELIST) {
146 options_ |= flat::OptionFlag_IS_WHITELIST; 194 options_ |= flat::OptionFlag_IS_WHITELIST;
147 } else if (rule_.semantics() != proto::RULE_SEMANTICS_BLACKLIST) { 195 } else if (rule_.semantics() != proto::RULE_SEMANTICS_BLACKLIST) {
148 return false; // Unsupported semantics. 196 return false; // Unsupported semantics.
149 } 197 }
150 198
151 switch (rule_.source_type()) { 199 switch (rule_.source_type()) {
152 case proto::SOURCE_TYPE_ANY: 200 case proto::SOURCE_TYPE_ANY:
153 options_ |= flat::OptionFlag_APPLIES_TO_THIRD_PARTY; 201 options_ |= flat::OptionFlag_APPLIES_TO_THIRD_PARTY;
154 // Note: fall through here intentionally. 202 // Note: fall through here intentionally.
155 case proto::SOURCE_TYPE_FIRST_PARTY: 203 case proto::SOURCE_TYPE_FIRST_PARTY:
156 options_ |= flat::OptionFlag_APPLIES_TO_FIRST_PARTY; 204 options_ |= flat::OptionFlag_APPLIES_TO_FIRST_PARTY;
157 break; 205 break;
158 case proto::SOURCE_TYPE_THIRD_PARTY: 206 case proto::SOURCE_TYPE_THIRD_PARTY:
159 options_ |= flat::OptionFlag_APPLIES_TO_THIRD_PARTY; 207 options_ |= flat::OptionFlag_APPLIES_TO_THIRD_PARTY;
160 break; 208 break;
161 209
162 default: 210 default:
163 return false; // Unsupported source type. 211 return false; // Unsupported source type.
164 } 212 }
165 213
166 if (rule_.match_case()) 214 if (rule_.match_case())
167 options_ |= flat::OptionFlag_IS_MATCH_CASE; 215 options_ |= flat::OptionFlag_IS_MATCH_CASE;
168 216
169 return true; 217 return true;
170 } 218 }
171 219
172 bool InitializeElementTypes() { 220 bool InitializeElementTypes() {
173 static_assert( 221 static_assert(flat::ElementType_ANY <= std::numeric_limits<uint16_t>::max(),
174 proto::ELEMENT_TYPE_ALL <= std::numeric_limits<uint16_t>::max(), 222 "Element types can not be stored in uint16_t.");
175 "Element types can not be stored in uint16_t.");
176 element_types_ = static_cast<uint16_t>(rule_.element_types());
177 223
178 // Note: Normally we can not distinguish between the main plugin resource 224 // Ensure all proto::ElementType(s) are mapped in |kElementTypeMap|.
179 // and any other loads it makes. We treat them both as OBJECT requests. 225 #if DCHECK_IS_ON()
180 if (element_types_ & proto::ELEMENT_TYPE_OBJECT_SUBREQUEST) 226 uint16_t mask = 0;
pkalinnikov 2017/06/28 15:59:43 nit: Can you make it int? Same below for Activatio
karandeepb 2017/06/29 00:58:18 Isn't this better being a more concrete type? |ele
pkalinnikov 2017/06/29 09:19:16 I agree that *current* proto::ElementType values f
181 element_types_ |= proto::ELEMENT_TYPE_OBJECT; 227 for (const auto& pair : kElementTypeMap)
228 mask |= pair.first;
229 DCHECK_EQ(proto::ELEMENT_TYPE_ALL, mask);
230 #endif // DCHECK_IS_ON()
182 231
183 // Ignore unknown element types. 232 element_types_ = flat::ElementType_NONE;
184 element_types_ &= proto::ELEMENT_TYPE_ALL; 233
185 // Filtering popups is not supported. 234 for (const auto& pair : kElementTypeMap)
186 element_types_ &= ~proto::ELEMENT_TYPE_POPUP; 235 if (rule_.element_types() & pair.first)
236 element_types_ |= pair.second;
237
238 // Normally we can not distinguish between the main plugin resource and any
239 // other loads it makes. We treat them both as OBJECT requests. Hence an
240 // OBJECT request would also match OBJECT_SUBREQUEST rules, but not the
241 // the other way round.
242 if (element_types_ & flat::ElementType_OBJECT_SUBREQUEST)
pkalinnikov 2017/06/28 15:59:42 Should we consider merging the 2 types together? S
karandeepb 2017/06/29 00:58:18 Yeah I thought of that as well. We can add DCHECKs
pkalinnikov 2017/06/29 09:19:16 Acknowledged.
243 element_types_ |= flat::ElementType_OBJECT;
187 244
188 return true; 245 return true;
189 } 246 }
190 247
191 bool InitializeActivationTypes() { 248 bool InitializeActivationTypes() {
192 static_assert( 249 static_assert(
193 proto::ACTIVATION_TYPE_ALL <= std::numeric_limits<uint8_t>::max(), 250 flat::ActivationType_ANY <= std::numeric_limits<uint8_t>::max(),
194 "Activation types can not be stored in uint8_t."); 251 "Activation types can not be stored in uint8_t.");
195 activation_types_ = static_cast<uint8_t>(rule_.activation_types());
196 252
197 // Only the following activation types are supported, ignore the others. 253 // Ensure all proto::ActivationType(s) are mapped in |kActivationTypeMap|.
198 activation_types_ &= 254 #if DCHECK_IS_ON()
199 proto::ACTIVATION_TYPE_DOCUMENT | proto::ACTIVATION_TYPE_GENERICBLOCK; 255 uint16_t mask = 0;
256 for (const auto& pair : kActivationTypeMap)
257 mask |= pair.first;
258 DCHECK_EQ(proto::ACTIVATION_TYPE_ALL, mask);
259 #endif // DCHECK_IS_ON()
260
261 activation_types_ = flat::ActivationType_NONE;
262
263 for (const auto& pair : kActivationTypeMap)
264 if (rule_.activation_types() & pair.first)
265 activation_types_ |= pair.second;
200 266
201 return true; 267 return true;
202 } 268 }
203 269
204 bool InitializeUrlPattern() { 270 bool InitializeUrlPattern() {
205 switch (rule_.url_pattern_type()) { 271 switch (rule_.url_pattern_type()) {
206 case proto::URL_PATTERN_TYPE_SUBSTRING: 272 case proto::URL_PATTERN_TYPE_SUBSTRING:
207 url_pattern_type_ = flat::UrlPatternType_SUBSTRING; 273 url_pattern_type_ = flat::UrlPatternType_SUBSTRING;
208 break; 274 break;
209 case proto::URL_PATTERN_TYPE_WILDCARDED: 275 case proto::URL_PATTERN_TYPE_WILDCARDED:
(...skipping 213 matching lines...) Expand 10 before | Expand all | Expand 10 after
423 } 489 }
424 if (longest_matching_included_domain_length && rule.domains_excluded()) { 490 if (longest_matching_included_domain_length && rule.domains_excluded()) {
425 return GetLongestMatchingSubdomain(origin, *rule.domains_excluded()) < 491 return GetLongestMatchingSubdomain(origin, *rule.domains_excluded()) <
426 longest_matching_included_domain_length; 492 longest_matching_included_domain_length;
427 } 493 }
428 return !!longest_matching_included_domain_length; 494 return !!longest_matching_included_domain_length;
429 } 495 }
430 496
431 // Returns whether the request matches flags of the specified URL |rule|. Takes 497 // Returns whether the request matches flags of the specified URL |rule|. Takes
432 // into account: 498 // into account:
433 // - |element_type| of the requested resource, if not *_UNSPECIFIED. 499 // - |element_type| of the requested resource, if not *_NONE.
434 // - |activation_type| for a subdocument request, if not *_UNSPECIFIED. 500 // - |activation_type| for a subdocument request, if not *_NONE.
435 // - Whether the resource |is_third_party| w.r.t. its embedding document. 501 // - Whether the resource |is_third_party| w.r.t. its embedding document.
436 bool DoesRuleFlagsMatch(const flat::UrlRule& rule, 502 bool DoesRuleFlagsMatch(const flat::UrlRule& rule,
437 proto::ElementType element_type, 503 flat::ElementType element_type,
438 proto::ActivationType activation_type, 504 flat::ActivationType activation_type,
439 bool is_third_party) { 505 bool is_third_party) {
440 DCHECK((element_type == proto::ELEMENT_TYPE_UNSPECIFIED) != 506 DCHECK((element_type == flat::ElementType_NONE) !=
441 (activation_type == proto::ACTIVATION_TYPE_UNSPECIFIED)); 507 (activation_type == flat::ActivationType_NONE));
442 508
443 if (element_type != proto::ELEMENT_TYPE_UNSPECIFIED && 509 if (element_type != flat::ElementType_NONE &&
444 !(rule.element_types() & element_type)) { 510 !(rule.element_types() & element_type)) {
445 return false; 511 return false;
446 } 512 }
447 if (activation_type != proto::ACTIVATION_TYPE_UNSPECIFIED && 513 if (activation_type != flat::ActivationType_NONE &&
448 !(rule.activation_types() & activation_type)) { 514 !(rule.activation_types() & activation_type)) {
449 return false; 515 return false;
450 } 516 }
451 517
452 if (is_third_party && 518 if (is_third_party &&
453 !(rule.options() & flat::OptionFlag_APPLIES_TO_THIRD_PARTY)) { 519 !(rule.options() & flat::OptionFlag_APPLIES_TO_THIRD_PARTY)) {
454 return false; 520 return false;
455 } 521 }
456 if (!is_third_party && 522 if (!is_third_party &&
457 !(rule.options() & flat::OptionFlag_APPLIES_TO_FIRST_PARTY)) { 523 !(rule.options() & flat::OptionFlag_APPLIES_TO_FIRST_PARTY)) {
458 return false; 524 return false;
459 } 525 }
460 526
461 return true; 527 return true;
462 } 528 }
463 529
464 const flat::UrlRule* FindMatchAmongCandidates( 530 const flat::UrlRule* FindMatchAmongCandidates(
465 const FlatUrlRuleList* candidates, 531 const FlatUrlRuleList* candidates,
466 const GURL& url, 532 const GURL& url,
467 const url::Origin& document_origin, 533 const url::Origin& document_origin,
468 proto::ElementType element_type, 534 flat::ElementType element_type,
469 proto::ActivationType activation_type, 535 flat::ActivationType activation_type,
470 bool is_third_party, 536 bool is_third_party,
471 bool disable_generic_rules) { 537 bool disable_generic_rules) {
472 if (!candidates) 538 if (!candidates)
473 return nullptr; 539 return nullptr;
474 for (const flat::UrlRule* rule : *candidates) { 540 for (const flat::UrlRule* rule : *candidates) {
475 DCHECK_NE(rule, nullptr); 541 DCHECK_NE(rule, nullptr);
476 DCHECK_NE(rule->url_pattern_type(), flat::UrlPatternType_REGEXP); 542 DCHECK_NE(rule->url_pattern_type(), flat::UrlPatternType_REGEXP);
477 if (!DoesRuleFlagsMatch(*rule, element_type, activation_type, 543 if (!DoesRuleFlagsMatch(*rule, element_type, activation_type,
478 is_third_party)) { 544 is_third_party)) {
479 continue; 545 continue;
(...skipping 10 matching lines...) Expand all
490 return nullptr; 556 return nullptr;
491 } 557 }
492 558
493 // Returns whether the network request matches a UrlPattern |index| represented 559 // Returns whether the network request matches a UrlPattern |index| represented
494 // in its FlatBuffers format. |is_third_party| should reflect the relation 560 // in its FlatBuffers format. |is_third_party| should reflect the relation
495 // between |url| and |document_origin|. 561 // between |url| and |document_origin|.
496 const flat::UrlRule* FindMatchInFlatUrlPatternIndex( 562 const flat::UrlRule* FindMatchInFlatUrlPatternIndex(
497 const flat::UrlPatternIndex& index, 563 const flat::UrlPatternIndex& index,
498 const GURL& url, 564 const GURL& url,
499 const url::Origin& document_origin, 565 const url::Origin& document_origin,
500 proto::ElementType element_type, 566 flat::ElementType element_type,
501 proto::ActivationType activation_type, 567 flat::ActivationType activation_type,
502 bool is_third_party, 568 bool is_third_party,
503 bool disable_generic_rules) { 569 bool disable_generic_rules) {
504 const FlatNGramIndex* hash_table = index.ngram_index(); 570 const FlatNGramIndex* hash_table = index.ngram_index();
505 const flat::NGramToRules* empty_slot = index.ngram_index_empty_slot(); 571 const flat::NGramToRules* empty_slot = index.ngram_index_empty_slot();
506 DCHECK_NE(hash_table, nullptr); 572 DCHECK_NE(hash_table, nullptr);
507 573
508 NGramHashTableProber prober; 574 NGramHashTableProber prober;
509 575
510 auto ngrams = CreateNGramExtractor<kNGramSize, uint64_t>( 576 auto ngrams = CreateNGramExtractor<kNGramSize, uint64_t>(
511 url.spec(), [](char) { return false; }); 577 url.spec(), [](char) { return false; });
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
545 611
546 UrlPatternIndexMatcher::~UrlPatternIndexMatcher() = default; 612 UrlPatternIndexMatcher::~UrlPatternIndexMatcher() = default;
547 613
548 const flat::UrlRule* UrlPatternIndexMatcher::FindMatch( 614 const flat::UrlRule* UrlPatternIndexMatcher::FindMatch(
549 const GURL& url, 615 const GURL& url,
550 const url::Origin& first_party_origin, 616 const url::Origin& first_party_origin,
551 proto::ElementType element_type, 617 proto::ElementType element_type,
552 proto::ActivationType activation_type, 618 proto::ActivationType activation_type,
553 bool is_third_party, 619 bool is_third_party,
554 bool disable_generic_rules) const { 620 bool disable_generic_rules) const {
621 return FindMatch(url, first_party_origin,
622 ProtoToFlatElementType(element_type),
623 ProtoToFlatActivationType(activation_type), is_third_party,
624 disable_generic_rules);
625 }
626
627 const flat::UrlRule* UrlPatternIndexMatcher::FindMatch(
628 const GURL& url,
629 const url::Origin& first_party_origin,
630 flat::ElementType element_type,
631 flat::ActivationType activation_type,
632 bool is_third_party,
633 bool disable_generic_rules) const {
555 if (!flat_index_ || !url.is_valid()) 634 if (!flat_index_ || !url.is_valid())
556 return nullptr; 635 return nullptr;
557 if ((element_type == proto::ELEMENT_TYPE_UNSPECIFIED) == 636 if ((element_type == flat::ElementType_NONE) ==
558 (activation_type == proto::ACTIVATION_TYPE_UNSPECIFIED)) { 637 (activation_type == flat::ActivationType_NONE)) {
559 return nullptr; 638 return nullptr;
560 } 639 }
561 640
562 return FindMatchInFlatUrlPatternIndex(*flat_index_, url, first_party_origin, 641 return FindMatchInFlatUrlPatternIndex(*flat_index_, url, first_party_origin,
563 element_type, activation_type, 642 element_type, activation_type,
564 is_third_party, disable_generic_rules); 643 is_third_party, disable_generic_rules);
565 } 644 }
566 645
567 } // namespace url_pattern_index 646 } // namespace url_pattern_index
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698