| OLD | NEW |
| 1 // Copyright 2016 The Chromium Authors. All rights reserved. | 1 // Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/subresource_filter/core/common/indexed_ruleset.h" | 5 #include "components/subresource_filter/core/common/indexed_ruleset.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <limits> | 8 #include <limits> |
| 9 #include <string> | 9 #include <string> |
| 10 | 10 |
| 11 #include "base/logging.h" | 11 #include "base/logging.h" |
| 12 #include "base/numerics/safe_conversions.h" | 12 #include "base/numerics/safe_conversions.h" |
| 13 #include "components/subresource_filter/core/common/first_party_origin.h" | 13 #include "components/subresource_filter/core/common/first_party_origin.h" |
| 14 #include "components/subresource_filter/core/common/ngram_extractor.h" | 14 #include "components/subresource_filter/core/common/ngram_extractor.h" |
| 15 #include "components/subresource_filter/core/common/url_pattern.h" | 15 #include "components/subresource_filter/core/common/url_pattern.h" |
| 16 #include "components/subresource_filter/core/common/url_pattern_matching.h" | |
| 17 #include "third_party/flatbuffers/src/include/flatbuffers/flatbuffers.h" | 16 #include "third_party/flatbuffers/src/include/flatbuffers/flatbuffers.h" |
| 18 | 17 |
| 19 namespace subresource_filter { | 18 namespace subresource_filter { |
| 20 | 19 |
| 21 namespace { | 20 namespace { |
| 22 | 21 |
| 23 using FlatStringOffset = flatbuffers::Offset<flatbuffers::String>; | 22 using FlatStringOffset = flatbuffers::Offset<flatbuffers::String>; |
| 24 | 23 |
| 25 // Checks whether a URL |rule| can be converted to its FlatBuffers equivalent, | 24 // Checks whether a URL |rule| can be converted to its FlatBuffers equivalent, |
| 26 // and performs the actual conversion. | 25 // and performs the actual conversion. |
| (...skipping 30 matching lines...) Expand all Loading... |
| 57 if (domain_list_item.exclude()) | 56 if (domain_list_item.exclude()) |
| 58 domain += '~'; | 57 domain += '~'; |
| 59 domain += domain_list_item.domain(); | 58 domain += domain_list_item.domain(); |
| 60 domains.push_back(builder->CreateSharedString(domain)); | 59 domains.push_back(builder->CreateSharedString(domain)); |
| 61 } | 60 } |
| 62 domains_offset = builder->CreateVector(domains); | 61 domains_offset = builder->CreateVector(domains); |
| 63 } | 62 } |
| 64 | 63 |
| 65 auto url_pattern_offset = builder->CreateString(rule_.url_pattern()); | 64 auto url_pattern_offset = builder->CreateString(rule_.url_pattern()); |
| 66 | 65 |
| 67 std::vector<uint8_t> failure_function; | |
| 68 BuildFailureFunction(UrlPattern(rule_), &failure_function); | |
| 69 auto failure_function_offset = | |
| 70 builder->CreateVector(failure_function.data(), failure_function.size()); | |
| 71 | |
| 72 return flat::CreateUrlRule(*builder, options_, element_types_, | 66 return flat::CreateUrlRule(*builder, options_, element_types_, |
| 73 activation_types_, url_pattern_type_, | 67 activation_types_, url_pattern_type_, |
| 74 anchor_left_, anchor_right_, domains_offset, | 68 anchor_left_, anchor_right_, domains_offset, |
| 75 url_pattern_offset, failure_function_offset); | 69 url_pattern_offset); |
| 76 } | 70 } |
| 77 | 71 |
| 78 private: | 72 private: |
| 79 static bool ConvertAnchorType(proto::AnchorType anchor_type, | 73 static bool ConvertAnchorType(proto::AnchorType anchor_type, |
| 80 flat::AnchorType* result) { | 74 flat::AnchorType* result) { |
| 81 switch (anchor_type) { | 75 switch (anchor_type) { |
| 82 case proto::ANCHOR_TYPE_NONE: | 76 case proto::ANCHOR_TYPE_NONE: |
| 83 *result = flat::AnchorType_NONE; | 77 *result = flat::AnchorType_NONE; |
| 84 break; | 78 break; |
| 85 case proto::ANCHOR_TYPE_BOUNDARY: | 79 case proto::ANCHOR_TYPE_BOUNDARY: |
| (...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 144 "Activation types can not be stored in uint8_t."); | 138 "Activation types can not be stored in uint8_t."); |
| 145 if ((rule_.activation_types() & proto::ACTIVATION_TYPE_ALL) != | 139 if ((rule_.activation_types() & proto::ACTIVATION_TYPE_ALL) != |
| 146 rule_.activation_types()) { | 140 rule_.activation_types()) { |
| 147 return false; // Unsupported activation types. | 141 return false; // Unsupported activation types. |
| 148 } | 142 } |
| 149 activation_types_ = static_cast<uint8_t>(rule_.activation_types()); | 143 activation_types_ = static_cast<uint8_t>(rule_.activation_types()); |
| 150 return true; | 144 return true; |
| 151 } | 145 } |
| 152 | 146 |
| 153 bool InitializeUrlPattern() { | 147 bool InitializeUrlPattern() { |
| 154 if (rule_.url_pattern().size() > | |
| 155 static_cast<size_t>(std::numeric_limits<uint8_t>::max())) { | |
| 156 // Failure function can not always be stored as an array of uint8_t in | |
| 157 // case the pattern's length exceeds 255. | |
| 158 return false; | |
| 159 } | |
| 160 | |
| 161 switch (rule_.url_pattern_type()) { | 148 switch (rule_.url_pattern_type()) { |
| 162 case proto::URL_PATTERN_TYPE_SUBSTRING: | 149 case proto::URL_PATTERN_TYPE_SUBSTRING: |
| 163 url_pattern_type_ = flat::UrlPatternType_SUBSTRING; | 150 url_pattern_type_ = flat::UrlPatternType_SUBSTRING; |
| 164 break; | 151 break; |
| 165 case proto::URL_PATTERN_TYPE_WILDCARDED: | 152 case proto::URL_PATTERN_TYPE_WILDCARDED: |
| 166 url_pattern_type_ = flat::UrlPatternType_WILDCARDED; | 153 url_pattern_type_ = flat::UrlPatternType_WILDCARDED; |
| 167 break; | 154 break; |
| 155 |
| 156 // TODO(pkalinnikov): Implement REGEXP rules matching. |
| 168 case proto::URL_PATTERN_TYPE_REGEXP: | 157 case proto::URL_PATTERN_TYPE_REGEXP: |
| 169 url_pattern_type_ = flat::UrlPatternType_REGEXP; | |
| 170 break; | |
| 171 | |
| 172 default: | 158 default: |
| 173 return false; // Unsupported URL pattern type. | 159 return false; // Unsupported URL pattern type. |
| 174 } | 160 } |
| 175 | 161 |
| 176 if (!ConvertAnchorType(rule_.anchor_left(), &anchor_left_) || | 162 if (!ConvertAnchorType(rule_.anchor_left(), &anchor_left_) || |
| 177 !ConvertAnchorType(rule_.anchor_right(), &anchor_right_)) { | 163 !ConvertAnchorType(rule_.anchor_right(), &anchor_right_)) { |
| 178 return false; | 164 return false; |
| 179 } | 165 } |
| 180 if (anchor_right_ == flat::AnchorType_SUBDOMAIN) | 166 if (anchor_right_ == flat::AnchorType_SUBDOMAIN) |
| 181 return false; // Unsupported right anchor. | 167 return false; // Unsupported right anchor. |
| (...skipping 11 matching lines...) Expand all Loading... |
| 193 flat::AnchorType anchor_right_ = flat::AnchorType_NONE; | 179 flat::AnchorType anchor_right_ = flat::AnchorType_NONE; |
| 194 | 180 |
| 195 bool is_convertible_ = true; | 181 bool is_convertible_ = true; |
| 196 }; | 182 }; |
| 197 | 183 |
| 198 } // namespace | 184 } // namespace |
| 199 | 185 |
| 200 // RulesetIndexer -------------------------------------------------------------- | 186 // RulesetIndexer -------------------------------------------------------------- |
| 201 | 187 |
| 202 // static | 188 // static |
| 203 const int RulesetIndexer::kIndexedFormatVersion = 12; | 189 const int RulesetIndexer::kIndexedFormatVersion = 13; |
| 204 | 190 |
| 205 RulesetIndexer::MutableUrlPatternIndex::MutableUrlPatternIndex() = default; | 191 RulesetIndexer::MutableUrlPatternIndex::MutableUrlPatternIndex() = default; |
| 206 RulesetIndexer::MutableUrlPatternIndex::~MutableUrlPatternIndex() = default; | 192 RulesetIndexer::MutableUrlPatternIndex::~MutableUrlPatternIndex() = default; |
| 207 | 193 |
| 208 RulesetIndexer::RulesetIndexer() = default; | 194 RulesetIndexer::RulesetIndexer() = default; |
| 209 RulesetIndexer::~RulesetIndexer() = default; | 195 RulesetIndexer::~RulesetIndexer() = default; |
| 210 | 196 |
| 211 bool RulesetIndexer::AddUrlRule(const proto::UrlRule& rule) { | 197 bool RulesetIndexer::AddUrlRule(const proto::UrlRule& rule) { |
| 212 UrlRuleFlatBufferConverter converter(rule); | 198 UrlRuleFlatBufferConverter converter(rule); |
| 213 if (!converter.is_convertible()) | 199 if (!converter.is_convertible()) |
| 214 return false; | 200 return false; |
| 215 auto rule_offset = converter.SerializeConvertedRule(&builder_); | 201 auto rule_offset = converter.SerializeConvertedRule(&builder_); |
| 216 | 202 |
| 217 MutableUrlPatternIndex* index_part = | 203 MutableUrlPatternIndex* index_part = |
| 218 (rule.semantics() == proto::RULE_SEMANTICS_BLACKLIST ? &blacklist_ | 204 (rule.semantics() == proto::RULE_SEMANTICS_BLACKLIST ? &blacklist_ |
| 219 : &whitelist_); | 205 : &whitelist_); |
| 220 | 206 |
| 221 NGram ngram = 0; | 207 DCHECK_NE(rule.url_pattern_type(), proto::URL_PATTERN_TYPE_REGEXP); |
| 222 if (rule.url_pattern_type() != proto::URL_PATTERN_TYPE_REGEXP) { | 208 NGram ngram = |
| 223 ngram = | 209 GetMostDistinctiveNGram(index_part->ngram_index, rule.url_pattern()); |
| 224 GetMostDistinctiveNGram(index_part->ngram_index, rule.url_pattern()); | |
| 225 } | |
| 226 | 210 |
| 227 if (ngram) { | 211 if (ngram) { |
| 228 index_part->ngram_index[ngram].push_back(rule_offset); | 212 index_part->ngram_index[ngram].push_back(rule_offset); |
| 229 } else { | 213 } else { |
| 230 // TODO(pkalinnikov): Index fallback rules as well. | 214 // TODO(pkalinnikov): Index fallback rules as well. |
| 231 index_part->fallback_rules.push_back(rule_offset); | 215 index_part->fallback_rules.push_back(rule_offset); |
| 232 } | 216 } |
| 233 | 217 |
| 234 return true; | 218 return true; |
| 235 } | 219 } |
| (...skipping 162 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 398 const GURL& url, | 382 const GURL& url, |
| 399 const url::Origin& initiator, | 383 const url::Origin& initiator, |
| 400 proto::ElementType element_type, | 384 proto::ElementType element_type, |
| 401 proto::ActivationType activation_type, | 385 proto::ActivationType activation_type, |
| 402 bool is_third_party, | 386 bool is_third_party, |
| 403 bool disable_generic_rules) { | 387 bool disable_generic_rules) { |
| 404 if (!rules) | 388 if (!rules) |
| 405 return false; | 389 return false; |
| 406 for (const flat::UrlRule* rule : *rules) { | 390 for (const flat::UrlRule* rule : *rules) { |
| 407 DCHECK_NE(rule, nullptr); | 391 DCHECK_NE(rule, nullptr); |
| 408 | 392 DCHECK_NE(rule->url_pattern_type(), flat::UrlPatternType_REGEXP); |
| 409 if (rule->url_pattern_type() != flat::UrlPatternType_REGEXP) { | 393 if (!UrlPattern(*rule).MatchesUrl(url)) |
| 410 const uint8_t* begin = rule->failure_function()->data(); | |
| 411 const uint8_t* end = begin + rule->failure_function()->size(); | |
| 412 if (!IsUrlPatternMatch(url, UrlPattern(*rule), begin, end)) | |
| 413 continue; | |
| 414 } else { | |
| 415 // TODO(pkalinnikov): Implement REGEXP rules matching. | |
| 416 continue; | 394 continue; |
| 417 } | |
| 418 | 395 |
| 419 // TODO(pkalinnikov): Match the medatada before the URL pattern, but maybe | 396 // TODO(pkalinnikov): Match the medatada before the URL pattern, but maybe |
| 420 // excluding the domain list. | 397 // excluding the domain list. |
| 421 if (DoesRuleMetadataMatch(*rule, initiator, element_type, activation_type, | 398 if (DoesRuleMetadataMatch(*rule, initiator, element_type, activation_type, |
| 422 is_third_party, disable_generic_rules)) { | 399 is_third_party, disable_generic_rules)) { |
| 423 return true; | 400 return true; |
| 424 } | 401 } |
| 425 } | 402 } |
| 426 | 403 |
| 427 return false; | 404 return false; |
| (...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 511 const bool is_third_party = first_party.IsThirdParty(url); | 488 const bool is_third_party = first_party.IsThirdParty(url); |
| 512 return IsMatch(root_->blacklist_index(), url, first_party.origin(), | 489 return IsMatch(root_->blacklist_index(), url, first_party.origin(), |
| 513 element_type, proto::ACTIVATION_TYPE_UNSPECIFIED, | 490 element_type, proto::ACTIVATION_TYPE_UNSPECIFIED, |
| 514 is_third_party, disable_generic_rules) && | 491 is_third_party, disable_generic_rules) && |
| 515 !IsMatch(root_->whitelist_index(), url, first_party.origin(), | 492 !IsMatch(root_->whitelist_index(), url, first_party.origin(), |
| 516 element_type, proto::ACTIVATION_TYPE_UNSPECIFIED, | 493 element_type, proto::ACTIVATION_TYPE_UNSPECIFIED, |
| 517 is_third_party, disable_generic_rules); | 494 is_third_party, disable_generic_rules); |
| 518 } | 495 } |
| 519 | 496 |
| 520 } // namespace subresource_filter | 497 } // namespace subresource_filter |
| OLD | NEW |