OLD | NEW |
1 // Copyright 2016 The Chromium Authors. All rights reserved. | 1 // Copyright 2016 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/subresource_filter/core/common/indexed_ruleset.h" | 5 #include "components/subresource_filter/core/common/indexed_ruleset.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <limits> | 8 #include <limits> |
9 #include <string> | 9 #include <string> |
10 | 10 |
11 #include "base/logging.h" | 11 #include "base/logging.h" |
12 #include "base/numerics/safe_conversions.h" | 12 #include "base/numerics/safe_conversions.h" |
13 #include "components/subresource_filter/core/common/first_party_origin.h" | 13 #include "components/subresource_filter/core/common/first_party_origin.h" |
14 #include "components/subresource_filter/core/common/ngram_extractor.h" | 14 #include "components/subresource_filter/core/common/ngram_extractor.h" |
15 #include "components/subresource_filter/core/common/url_pattern.h" | 15 #include "components/subresource_filter/core/common/url_pattern.h" |
16 #include "components/subresource_filter/core/common/url_pattern_matching.h" | |
17 #include "third_party/flatbuffers/src/include/flatbuffers/flatbuffers.h" | 16 #include "third_party/flatbuffers/src/include/flatbuffers/flatbuffers.h" |
18 | 17 |
19 namespace subresource_filter { | 18 namespace subresource_filter { |
20 | 19 |
21 namespace { | 20 namespace { |
22 | 21 |
23 using FlatStringOffset = flatbuffers::Offset<flatbuffers::String>; | 22 using FlatStringOffset = flatbuffers::Offset<flatbuffers::String>; |
24 | 23 |
25 // Checks whether a URL |rule| can be converted to its FlatBuffers equivalent, | 24 // Checks whether a URL |rule| can be converted to its FlatBuffers equivalent, |
26 // and performs the actual conversion. | 25 // and performs the actual conversion. |
(...skipping 30 matching lines...) Expand all Loading... |
57 if (domain_list_item.exclude()) | 56 if (domain_list_item.exclude()) |
58 domain += '~'; | 57 domain += '~'; |
59 domain += domain_list_item.domain(); | 58 domain += domain_list_item.domain(); |
60 domains.push_back(builder->CreateSharedString(domain)); | 59 domains.push_back(builder->CreateSharedString(domain)); |
61 } | 60 } |
62 domains_offset = builder->CreateVector(domains); | 61 domains_offset = builder->CreateVector(domains); |
63 } | 62 } |
64 | 63 |
65 auto url_pattern_offset = builder->CreateString(rule_.url_pattern()); | 64 auto url_pattern_offset = builder->CreateString(rule_.url_pattern()); |
66 | 65 |
67 std::vector<uint8_t> failure_function; | |
68 BuildFailureFunction(UrlPattern(rule_), &failure_function); | |
69 auto failure_function_offset = | |
70 builder->CreateVector(failure_function.data(), failure_function.size()); | |
71 | |
72 return flat::CreateUrlRule(*builder, options_, element_types_, | 66 return flat::CreateUrlRule(*builder, options_, element_types_, |
73 activation_types_, url_pattern_type_, | 67 activation_types_, url_pattern_type_, |
74 anchor_left_, anchor_right_, domains_offset, | 68 anchor_left_, anchor_right_, domains_offset, |
75 url_pattern_offset, failure_function_offset); | 69 url_pattern_offset); |
76 } | 70 } |
77 | 71 |
78 private: | 72 private: |
79 static bool ConvertAnchorType(proto::AnchorType anchor_type, | 73 static bool ConvertAnchorType(proto::AnchorType anchor_type, |
80 flat::AnchorType* result) { | 74 flat::AnchorType* result) { |
81 switch (anchor_type) { | 75 switch (anchor_type) { |
82 case proto::ANCHOR_TYPE_NONE: | 76 case proto::ANCHOR_TYPE_NONE: |
83 *result = flat::AnchorType_NONE; | 77 *result = flat::AnchorType_NONE; |
84 break; | 78 break; |
85 case proto::ANCHOR_TYPE_BOUNDARY: | 79 case proto::ANCHOR_TYPE_BOUNDARY: |
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
144 "Activation types can not be stored in uint8_t."); | 138 "Activation types can not be stored in uint8_t."); |
145 if ((rule_.activation_types() & proto::ACTIVATION_TYPE_ALL) != | 139 if ((rule_.activation_types() & proto::ACTIVATION_TYPE_ALL) != |
146 rule_.activation_types()) { | 140 rule_.activation_types()) { |
147 return false; // Unsupported activation types. | 141 return false; // Unsupported activation types. |
148 } | 142 } |
149 activation_types_ = static_cast<uint8_t>(rule_.activation_types()); | 143 activation_types_ = static_cast<uint8_t>(rule_.activation_types()); |
150 return true; | 144 return true; |
151 } | 145 } |
152 | 146 |
153 bool InitializeUrlPattern() { | 147 bool InitializeUrlPattern() { |
154 if (rule_.url_pattern().size() > | |
155 static_cast<size_t>(std::numeric_limits<uint8_t>::max())) { | |
156 // Failure function can not always be stored as an array of uint8_t in | |
157 // case the pattern's length exceeds 255. | |
158 return false; | |
159 } | |
160 | |
161 switch (rule_.url_pattern_type()) { | 148 switch (rule_.url_pattern_type()) { |
162 case proto::URL_PATTERN_TYPE_SUBSTRING: | 149 case proto::URL_PATTERN_TYPE_SUBSTRING: |
163 url_pattern_type_ = flat::UrlPatternType_SUBSTRING; | 150 url_pattern_type_ = flat::UrlPatternType_SUBSTRING; |
164 break; | 151 break; |
165 case proto::URL_PATTERN_TYPE_WILDCARDED: | 152 case proto::URL_PATTERN_TYPE_WILDCARDED: |
166 url_pattern_type_ = flat::UrlPatternType_WILDCARDED; | 153 url_pattern_type_ = flat::UrlPatternType_WILDCARDED; |
167 break; | 154 break; |
| 155 |
| 156 // TODO(pkalinnikov): Implement REGEXP rules matching. |
168 case proto::URL_PATTERN_TYPE_REGEXP: | 157 case proto::URL_PATTERN_TYPE_REGEXP: |
169 url_pattern_type_ = flat::UrlPatternType_REGEXP; | |
170 break; | |
171 | |
172 default: | 158 default: |
173 return false; // Unsupported URL pattern type. | 159 return false; // Unsupported URL pattern type. |
174 } | 160 } |
175 | 161 |
176 if (!ConvertAnchorType(rule_.anchor_left(), &anchor_left_) || | 162 if (!ConvertAnchorType(rule_.anchor_left(), &anchor_left_) || |
177 !ConvertAnchorType(rule_.anchor_right(), &anchor_right_)) { | 163 !ConvertAnchorType(rule_.anchor_right(), &anchor_right_)) { |
178 return false; | 164 return false; |
179 } | 165 } |
180 if (anchor_right_ == flat::AnchorType_SUBDOMAIN) | 166 if (anchor_right_ == flat::AnchorType_SUBDOMAIN) |
181 return false; // Unsupported right anchor. | 167 return false; // Unsupported right anchor. |
(...skipping 11 matching lines...) Expand all Loading... |
193 flat::AnchorType anchor_right_ = flat::AnchorType_NONE; | 179 flat::AnchorType anchor_right_ = flat::AnchorType_NONE; |
194 | 180 |
195 bool is_convertible_ = true; | 181 bool is_convertible_ = true; |
196 }; | 182 }; |
197 | 183 |
198 } // namespace | 184 } // namespace |
199 | 185 |
200 // RulesetIndexer -------------------------------------------------------------- | 186 // RulesetIndexer -------------------------------------------------------------- |
201 | 187 |
202 // static | 188 // static |
203 const int RulesetIndexer::kIndexedFormatVersion = 12; | 189 const int RulesetIndexer::kIndexedFormatVersion = 13; |
204 | 190 |
205 RulesetIndexer::MutableUrlPatternIndex::MutableUrlPatternIndex() = default; | 191 RulesetIndexer::MutableUrlPatternIndex::MutableUrlPatternIndex() = default; |
206 RulesetIndexer::MutableUrlPatternIndex::~MutableUrlPatternIndex() = default; | 192 RulesetIndexer::MutableUrlPatternIndex::~MutableUrlPatternIndex() = default; |
207 | 193 |
208 RulesetIndexer::RulesetIndexer() = default; | 194 RulesetIndexer::RulesetIndexer() = default; |
209 RulesetIndexer::~RulesetIndexer() = default; | 195 RulesetIndexer::~RulesetIndexer() = default; |
210 | 196 |
211 bool RulesetIndexer::AddUrlRule(const proto::UrlRule& rule) { | 197 bool RulesetIndexer::AddUrlRule(const proto::UrlRule& rule) { |
212 UrlRuleFlatBufferConverter converter(rule); | 198 UrlRuleFlatBufferConverter converter(rule); |
213 if (!converter.is_convertible()) | 199 if (!converter.is_convertible()) |
214 return false; | 200 return false; |
215 auto rule_offset = converter.SerializeConvertedRule(&builder_); | 201 auto rule_offset = converter.SerializeConvertedRule(&builder_); |
216 | 202 |
217 MutableUrlPatternIndex* index_part = | 203 MutableUrlPatternIndex* index_part = |
218 (rule.semantics() == proto::RULE_SEMANTICS_BLACKLIST ? &blacklist_ | 204 (rule.semantics() == proto::RULE_SEMANTICS_BLACKLIST ? &blacklist_ |
219 : &whitelist_); | 205 : &whitelist_); |
220 | 206 |
221 NGram ngram = 0; | 207 DCHECK_NE(rule.url_pattern_type(), proto::URL_PATTERN_TYPE_REGEXP); |
222 if (rule.url_pattern_type() != proto::URL_PATTERN_TYPE_REGEXP) { | 208 NGram ngram = |
223 ngram = | 209 GetMostDistinctiveNGram(index_part->ngram_index, rule.url_pattern()); |
224 GetMostDistinctiveNGram(index_part->ngram_index, rule.url_pattern()); | |
225 } | |
226 | 210 |
227 if (ngram) { | 211 if (ngram) { |
228 index_part->ngram_index[ngram].push_back(rule_offset); | 212 index_part->ngram_index[ngram].push_back(rule_offset); |
229 } else { | 213 } else { |
230 // TODO(pkalinnikov): Index fallback rules as well. | 214 // TODO(pkalinnikov): Index fallback rules as well. |
231 index_part->fallback_rules.push_back(rule_offset); | 215 index_part->fallback_rules.push_back(rule_offset); |
232 } | 216 } |
233 | 217 |
234 return true; | 218 return true; |
235 } | 219 } |
(...skipping 162 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
398 const GURL& url, | 382 const GURL& url, |
399 const url::Origin& initiator, | 383 const url::Origin& initiator, |
400 proto::ElementType element_type, | 384 proto::ElementType element_type, |
401 proto::ActivationType activation_type, | 385 proto::ActivationType activation_type, |
402 bool is_third_party, | 386 bool is_third_party, |
403 bool disable_generic_rules) { | 387 bool disable_generic_rules) { |
404 if (!rules) | 388 if (!rules) |
405 return false; | 389 return false; |
406 for (const flat::UrlRule* rule : *rules) { | 390 for (const flat::UrlRule* rule : *rules) { |
407 DCHECK_NE(rule, nullptr); | 391 DCHECK_NE(rule, nullptr); |
408 | 392 DCHECK_NE(rule->url_pattern_type(), flat::UrlPatternType_REGEXP); |
409 if (rule->url_pattern_type() != flat::UrlPatternType_REGEXP) { | 393 if (!UrlPattern(*rule).MatchesUrl(url)) |
410 const uint8_t* begin = rule->failure_function()->data(); | |
411 const uint8_t* end = begin + rule->failure_function()->size(); | |
412 if (!IsUrlPatternMatch(url, UrlPattern(*rule), begin, end)) | |
413 continue; | |
414 } else { | |
415 // TODO(pkalinnikov): Implement REGEXP rules matching. | |
416 continue; | 394 continue; |
417 } | |
418 | 395 |
419 // TODO(pkalinnikov): Match the medatada before the URL pattern, but maybe | 396 // TODO(pkalinnikov): Match the medatada before the URL pattern, but maybe |
420 // excluding the domain list. | 397 // excluding the domain list. |
421 if (DoesRuleMetadataMatch(*rule, initiator, element_type, activation_type, | 398 if (DoesRuleMetadataMatch(*rule, initiator, element_type, activation_type, |
422 is_third_party, disable_generic_rules)) { | 399 is_third_party, disable_generic_rules)) { |
423 return true; | 400 return true; |
424 } | 401 } |
425 } | 402 } |
426 | 403 |
427 return false; | 404 return false; |
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
511 const bool is_third_party = first_party.IsThirdParty(url); | 488 const bool is_third_party = first_party.IsThirdParty(url); |
512 return IsMatch(root_->blacklist_index(), url, first_party.origin(), | 489 return IsMatch(root_->blacklist_index(), url, first_party.origin(), |
513 element_type, proto::ACTIVATION_TYPE_UNSPECIFIED, | 490 element_type, proto::ACTIVATION_TYPE_UNSPECIFIED, |
514 is_third_party, disable_generic_rules) && | 491 is_third_party, disable_generic_rules) && |
515 !IsMatch(root_->whitelist_index(), url, first_party.origin(), | 492 !IsMatch(root_->whitelist_index(), url, first_party.origin(), |
516 element_type, proto::ACTIVATION_TYPE_UNSPECIFIED, | 493 element_type, proto::ACTIVATION_TYPE_UNSPECIFIED, |
517 is_third_party, disable_generic_rules); | 494 is_third_party, disable_generic_rules); |
518 } | 495 } |
519 | 496 |
520 } // namespace subresource_filter | 497 } // namespace subresource_filter |
OLD | NEW |