Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(100)

Side by Side Diff: components/subresource_filter/core/common/indexed_ruleset.cc

Issue 2793993002: [subresource_filter] Replace KMP by std::search. (Closed)
Patch Set: Fix DCHECK. Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2016 The Chromium Authors. All rights reserved. 1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/subresource_filter/core/common/indexed_ruleset.h" 5 #include "components/subresource_filter/core/common/indexed_ruleset.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <limits> 8 #include <limits>
9 #include <string> 9 #include <string>
10 10
11 #include "base/logging.h" 11 #include "base/logging.h"
12 #include "base/numerics/safe_conversions.h" 12 #include "base/numerics/safe_conversions.h"
13 #include "components/subresource_filter/core/common/first_party_origin.h" 13 #include "components/subresource_filter/core/common/first_party_origin.h"
14 #include "components/subresource_filter/core/common/ngram_extractor.h" 14 #include "components/subresource_filter/core/common/ngram_extractor.h"
15 #include "components/subresource_filter/core/common/url_pattern.h" 15 #include "components/subresource_filter/core/common/url_pattern.h"
16 #include "components/subresource_filter/core/common/url_pattern_matching.h"
17 #include "third_party/flatbuffers/src/include/flatbuffers/flatbuffers.h" 16 #include "third_party/flatbuffers/src/include/flatbuffers/flatbuffers.h"
18 17
19 namespace subresource_filter { 18 namespace subresource_filter {
20 19
21 namespace { 20 namespace {
22 21
23 using FlatStringOffset = flatbuffers::Offset<flatbuffers::String>; 22 using FlatStringOffset = flatbuffers::Offset<flatbuffers::String>;
24 23
25 // Checks whether a URL |rule| can be converted to its FlatBuffers equivalent, 24 // Checks whether a URL |rule| can be converted to its FlatBuffers equivalent,
26 // and performs the actual conversion. 25 // and performs the actual conversion.
(...skipping 30 matching lines...) Expand all
57 if (domain_list_item.exclude()) 56 if (domain_list_item.exclude())
58 domain += '~'; 57 domain += '~';
59 domain += domain_list_item.domain(); 58 domain += domain_list_item.domain();
60 domains.push_back(builder->CreateSharedString(domain)); 59 domains.push_back(builder->CreateSharedString(domain));
61 } 60 }
62 domains_offset = builder->CreateVector(domains); 61 domains_offset = builder->CreateVector(domains);
63 } 62 }
64 63
65 auto url_pattern_offset = builder->CreateString(rule_.url_pattern()); 64 auto url_pattern_offset = builder->CreateString(rule_.url_pattern());
66 65
67 std::vector<uint8_t> failure_function;
68 BuildFailureFunction(UrlPattern(rule_), &failure_function);
69 auto failure_function_offset =
70 builder->CreateVector(failure_function.data(), failure_function.size());
71
72 return flat::CreateUrlRule(*builder, options_, element_types_, 66 return flat::CreateUrlRule(*builder, options_, element_types_,
73 activation_types_, url_pattern_type_, 67 activation_types_, url_pattern_type_,
74 anchor_left_, anchor_right_, domains_offset, 68 anchor_left_, anchor_right_, domains_offset,
75 url_pattern_offset, failure_function_offset); 69 url_pattern_offset);
76 } 70 }
77 71
78 private: 72 private:
79 static bool ConvertAnchorType(proto::AnchorType anchor_type, 73 static bool ConvertAnchorType(proto::AnchorType anchor_type,
80 flat::AnchorType* result) { 74 flat::AnchorType* result) {
81 switch (anchor_type) { 75 switch (anchor_type) {
82 case proto::ANCHOR_TYPE_NONE: 76 case proto::ANCHOR_TYPE_NONE:
83 *result = flat::AnchorType_NONE; 77 *result = flat::AnchorType_NONE;
84 break; 78 break;
85 case proto::ANCHOR_TYPE_BOUNDARY: 79 case proto::ANCHOR_TYPE_BOUNDARY:
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after
144 "Activation types can not be stored in uint8_t."); 138 "Activation types can not be stored in uint8_t.");
145 if ((rule_.activation_types() & proto::ACTIVATION_TYPE_ALL) != 139 if ((rule_.activation_types() & proto::ACTIVATION_TYPE_ALL) !=
146 rule_.activation_types()) { 140 rule_.activation_types()) {
147 return false; // Unsupported activation types. 141 return false; // Unsupported activation types.
148 } 142 }
149 activation_types_ = static_cast<uint8_t>(rule_.activation_types()); 143 activation_types_ = static_cast<uint8_t>(rule_.activation_types());
150 return true; 144 return true;
151 } 145 }
152 146
153 bool InitializeUrlPattern() { 147 bool InitializeUrlPattern() {
154 if (rule_.url_pattern().size() >
155 static_cast<size_t>(std::numeric_limits<uint8_t>::max())) {
156 // Failure function can not always be stored as an array of uint8_t in
157 // case the pattern's length exceeds 255.
158 return false;
159 }
160
161 switch (rule_.url_pattern_type()) { 148 switch (rule_.url_pattern_type()) {
162 case proto::URL_PATTERN_TYPE_SUBSTRING: 149 case proto::URL_PATTERN_TYPE_SUBSTRING:
163 url_pattern_type_ = flat::UrlPatternType_SUBSTRING; 150 url_pattern_type_ = flat::UrlPatternType_SUBSTRING;
164 break; 151 break;
165 case proto::URL_PATTERN_TYPE_WILDCARDED: 152 case proto::URL_PATTERN_TYPE_WILDCARDED:
166 url_pattern_type_ = flat::UrlPatternType_WILDCARDED; 153 url_pattern_type_ = flat::UrlPatternType_WILDCARDED;
167 break; 154 break;
155
156 // TODO(pkalinnikov): Implement REGEXP rules matching.
168 case proto::URL_PATTERN_TYPE_REGEXP: 157 case proto::URL_PATTERN_TYPE_REGEXP:
169 url_pattern_type_ = flat::UrlPatternType_REGEXP;
170 break;
171
172 default: 158 default:
173 return false; // Unsupported URL pattern type. 159 return false; // Unsupported URL pattern type.
174 } 160 }
175 161
176 if (!ConvertAnchorType(rule_.anchor_left(), &anchor_left_) || 162 if (!ConvertAnchorType(rule_.anchor_left(), &anchor_left_) ||
177 !ConvertAnchorType(rule_.anchor_right(), &anchor_right_)) { 163 !ConvertAnchorType(rule_.anchor_right(), &anchor_right_)) {
178 return false; 164 return false;
179 } 165 }
180 if (anchor_right_ == flat::AnchorType_SUBDOMAIN) 166 if (anchor_right_ == flat::AnchorType_SUBDOMAIN)
181 return false; // Unsupported right anchor. 167 return false; // Unsupported right anchor.
(...skipping 11 matching lines...) Expand all
193 flat::AnchorType anchor_right_ = flat::AnchorType_NONE; 179 flat::AnchorType anchor_right_ = flat::AnchorType_NONE;
194 180
195 bool is_convertible_ = true; 181 bool is_convertible_ = true;
196 }; 182 };
197 183
198 } // namespace 184 } // namespace
199 185
200 // RulesetIndexer -------------------------------------------------------------- 186 // RulesetIndexer --------------------------------------------------------------
201 187
202 // static 188 // static
203 const int RulesetIndexer::kIndexedFormatVersion = 12; 189 const int RulesetIndexer::kIndexedFormatVersion = 13;
204 190
205 RulesetIndexer::MutableUrlPatternIndex::MutableUrlPatternIndex() = default; 191 RulesetIndexer::MutableUrlPatternIndex::MutableUrlPatternIndex() = default;
206 RulesetIndexer::MutableUrlPatternIndex::~MutableUrlPatternIndex() = default; 192 RulesetIndexer::MutableUrlPatternIndex::~MutableUrlPatternIndex() = default;
207 193
208 RulesetIndexer::RulesetIndexer() = default; 194 RulesetIndexer::RulesetIndexer() = default;
209 RulesetIndexer::~RulesetIndexer() = default; 195 RulesetIndexer::~RulesetIndexer() = default;
210 196
211 bool RulesetIndexer::AddUrlRule(const proto::UrlRule& rule) { 197 bool RulesetIndexer::AddUrlRule(const proto::UrlRule& rule) {
212 UrlRuleFlatBufferConverter converter(rule); 198 UrlRuleFlatBufferConverter converter(rule);
213 if (!converter.is_convertible()) 199 if (!converter.is_convertible())
214 return false; 200 return false;
215 auto rule_offset = converter.SerializeConvertedRule(&builder_); 201 auto rule_offset = converter.SerializeConvertedRule(&builder_);
216 202
217 MutableUrlPatternIndex* index_part = 203 MutableUrlPatternIndex* index_part =
218 (rule.semantics() == proto::RULE_SEMANTICS_BLACKLIST ? &blacklist_ 204 (rule.semantics() == proto::RULE_SEMANTICS_BLACKLIST ? &blacklist_
219 : &whitelist_); 205 : &whitelist_);
220 206
221 NGram ngram = 0; 207 DCHECK_NE(rule.url_pattern_type(), proto::URL_PATTERN_TYPE_REGEXP);
222 if (rule.url_pattern_type() != proto::URL_PATTERN_TYPE_REGEXP) { 208 NGram ngram =
223 ngram = 209 GetMostDistinctiveNGram(index_part->ngram_index, rule.url_pattern());
224 GetMostDistinctiveNGram(index_part->ngram_index, rule.url_pattern());
225 }
226 210
227 if (ngram) { 211 if (ngram) {
228 index_part->ngram_index[ngram].push_back(rule_offset); 212 index_part->ngram_index[ngram].push_back(rule_offset);
229 } else { 213 } else {
230 // TODO(pkalinnikov): Index fallback rules as well. 214 // TODO(pkalinnikov): Index fallback rules as well.
231 index_part->fallback_rules.push_back(rule_offset); 215 index_part->fallback_rules.push_back(rule_offset);
232 } 216 }
233 217
234 return true; 218 return true;
235 } 219 }
(...skipping 162 matching lines...) Expand 10 before | Expand all | Expand 10 after
398 const GURL& url, 382 const GURL& url,
399 const url::Origin& initiator, 383 const url::Origin& initiator,
400 proto::ElementType element_type, 384 proto::ElementType element_type,
401 proto::ActivationType activation_type, 385 proto::ActivationType activation_type,
402 bool is_third_party, 386 bool is_third_party,
403 bool disable_generic_rules) { 387 bool disable_generic_rules) {
404 if (!rules) 388 if (!rules)
405 return false; 389 return false;
406 for (const flat::UrlRule* rule : *rules) { 390 for (const flat::UrlRule* rule : *rules) {
407 DCHECK_NE(rule, nullptr); 391 DCHECK_NE(rule, nullptr);
408 392 DCHECK_NE(rule->url_pattern_type(), flat::UrlPatternType_REGEXP);
409 if (rule->url_pattern_type() != flat::UrlPatternType_REGEXP) { 393 if (!UrlPattern(*rule).MatchesUrl(url))
410 const uint8_t* begin = rule->failure_function()->data();
411 const uint8_t* end = begin + rule->failure_function()->size();
412 if (!IsUrlPatternMatch(url, UrlPattern(*rule), begin, end))
413 continue;
414 } else {
415 // TODO(pkalinnikov): Implement REGEXP rules matching.
416 continue; 394 continue;
417 }
418 395
419 // TODO(pkalinnikov): Match the medatada before the URL pattern, but maybe 396 // TODO(pkalinnikov): Match the medatada before the URL pattern, but maybe
420 // excluding the domain list. 397 // excluding the domain list.
421 if (DoesRuleMetadataMatch(*rule, initiator, element_type, activation_type, 398 if (DoesRuleMetadataMatch(*rule, initiator, element_type, activation_type,
422 is_third_party, disable_generic_rules)) { 399 is_third_party, disable_generic_rules)) {
423 return true; 400 return true;
424 } 401 }
425 } 402 }
426 403
427 return false; 404 return false;
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after
511 const bool is_third_party = first_party.IsThirdParty(url); 488 const bool is_third_party = first_party.IsThirdParty(url);
512 return IsMatch(root_->blacklist_index(), url, first_party.origin(), 489 return IsMatch(root_->blacklist_index(), url, first_party.origin(),
513 element_type, proto::ACTIVATION_TYPE_UNSPECIFIED, 490 element_type, proto::ACTIVATION_TYPE_UNSPECIFIED,
514 is_third_party, disable_generic_rules) && 491 is_third_party, disable_generic_rules) &&
515 !IsMatch(root_->whitelist_index(), url, first_party.origin(), 492 !IsMatch(root_->whitelist_index(), url, first_party.origin(),
516 element_type, proto::ACTIVATION_TYPE_UNSPECIFIED, 493 element_type, proto::ACTIVATION_TYPE_UNSPECIFIED,
517 is_third_party, disable_generic_rules); 494 is_third_party, disable_generic_rules);
518 } 495 }
519 496
520 } // namespace subresource_filter 497 } // namespace subresource_filter
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698