OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/common/extensions/matcher/url_matcher.h" | 5 #include "chrome/common/extensions/matcher/url_matcher.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <iterator> | 8 #include <iterator> |
9 | 9 |
10 #include "base/logging.h" | 10 #include "base/logging.h" |
11 #include "content/public/common/url_constants.h" | 11 #include "content/public/common/url_constants.h" |
12 #include "googleurl/src/gurl.h" | 12 #include "googleurl/src/gurl.h" |
13 #include "googleurl/src/url_canon.h" | 13 #include "googleurl/src/url_canon.h" |
14 | 14 |
15 namespace extensions { | 15 namespace extensions { |
16 | 16 |
17 // This set of classes implement a mapping of URL Component Patterns, such as | 17 // This set of classes implement a mapping of URL Component Patterns, such as |
18 // host_prefix, host_suffix, host_equals, ..., etc., to SubstringPatterns. | 18 // host_prefix, host_suffix, host_equals, ..., etc., to StringPatterns |
| 19 // for use in substring comparisons. |
19 // | 20 // |
20 // The idea of this mapping is to reduce the problem of comparing many | 21 // The idea of this mapping is to reduce the problem of comparing many |
21 // URL Component Patterns against one URL to the problem of searching many | 22 // URL Component Patterns against one URL to the problem of searching many |
22 // substrings in one string: | 23 // substrings in one string: |
23 // | 24 // |
24 // ---------------------- -------------------- | 25 // ---------------------- ----------------- |
25 // | URL Query operator | ----translate----> | SubstringPattern | | 26 // | URL Query operator | ----translate----> | StringPattern | |
26 // ---------------------- -------------------- | 27 // ---------------------- ----------------- |
27 // ^ | 28 // ^ |
28 // | | 29 // | |
29 // compare | 30 // compare |
30 // | | 31 // | |
31 // v | 32 // v |
32 // ---------------------- -------------------- | 33 // ---------------------- ----------------- |
33 // | URL to compare | | | | 34 // | URL to compare | | | |
34 // | to all URL Query | ----translate----> | String | | 35 // | to all URL Query | ----translate----> | String | |
35 // | operators | | | | 36 // | operators | | | |
36 // ---------------------- -------------------- | 37 // ---------------------- ----------------- |
37 // | 38 // |
38 // The reason for this problem reduction is that there are efficient algorithms | 39 // The reason for this problem reduction is that there are efficient algorithms |
39 // for searching many substrings in one string (see Aho-Corasick algorithm). | 40 // for searching many substrings in one string (see Aho-Corasick algorithm). |
40 // | 41 // |
| 42 // Additionally, some of the same pieces are reused to implement regular |
| 43 // expression comparisons. The FilteredRE2 implementation for matching many |
| 44 // regular expressions against one string uses prefiltering, in which a set |
| 45 // of substrings (derived from the regexes) are first searched for, to reduce |
| 46 // the number of regular expressions to test; the prefiltering step also |
| 47 // uses Aho-Corasick. |
| 48 // |
41 // Case 1: {host,path,query}_{prefix,suffix,equals} searches. | 49 // Case 1: {host,path,query}_{prefix,suffix,equals} searches. |
42 // ========================================================== | 50 // ========================================================== |
43 // | 51 // |
44 // For searches in this class, we normalize URLs as follows: | 52 // For searches in this class, we normalize URLs as follows: |
45 // | 53 // |
46 // Step 1: | 54 // Step 1: |
47 // Remove scheme, port and segment from URL: | 55 // Remove scheme, port and segment from URL: |
48 // -> http://www.example.com:8080/index.html?search=foo#first_match becomes | 56 // -> http://www.example.com:8080/index.html?search=foo#first_match becomes |
49 // www.example.com/index.html?search=foo | 57 // www.example.com/index.html?search=foo |
50 // | 58 // |
(...skipping 26 matching lines...) Expand all Loading... |
77 // | 85 // |
78 // host_suffix(suffix) = suffix ED | 86 // host_suffix(suffix) = suffix ED |
79 // -> host_suffix("example.com") = example.com ED | 87 // -> host_suffix("example.com") = example.com ED |
80 // -> host_suffix(".example.com") = .example.com ED | 88 // -> host_suffix(".example.com") = .example.com ED |
81 // | 89 // |
82 // host_equals(domain) = BU add_missing_dot_prefix(domain) ED | 90 // host_equals(domain) = BU add_missing_dot_prefix(domain) ED |
83 // -> host_equals("www.example.com") = BU .www.example.com ED | 91 // -> host_equals("www.example.com") = BU .www.example.com ED |
84 // | 92 // |
85 // Similarly for path query parameters ({path, query}_{prefix, suffix, equals}). | 93 // Similarly for path query parameters ({path, query}_{prefix, suffix, equals}). |
86 // | 94 // |
87 // With this, we can search the SubstringPatterns in the normalized URL. | 95 // With this, we can search the StringPatterns in the normalized URL. |
88 // | 96 // |
89 // | 97 // |
90 // Case 2: url_{prefix,suffix,equals,contains} searches. | 98 // Case 2: url_{prefix,suffix,equals,contains} searches. |
91 // ===================================================== | 99 // ===================================================== |
92 // | 100 // |
93 // Step 1: as above, except that | 101 // Step 1: as above, except that |
94 // - the scheme is not removed | 102 // - the scheme is not removed |
95 // - the port is not removed if it is specified and does not match the default | 103 // - the port is not removed if it is specified and does not match the default |
96 // port for the given scheme. | 104 // port for the given scheme. |
97 // | 105 // |
(...skipping 19 matching lines...) Expand all Loading... |
117 // | 125 // |
118 // These kinds of searches are not supported directly but can be derived | 126 // These kinds of searches are not supported directly but can be derived |
119 // by a combination of a url_contains() query followed by an explicit test: | 127 // by a combination of a url_contains() query followed by an explicit test: |
120 // | 128 // |
121 // host_contains(str) = url_contains(str) followed by test whether str occurs | 129 // host_contains(str) = url_contains(str) followed by test whether str occurs |
122 // in host component of original URL. | 130 // in host component of original URL. |
123 // -> host_contains("example.co") = example.co | 131 // -> host_contains("example.co") = example.co |
124 // followed by gurl.host().find("example.co"); | 132 // followed by gurl.host().find("example.co"); |
125 // | 133 // |
126 // [similarly for path_contains and query_contains]. | 134 // [similarly for path_contains and query_contains]. |
| 135 // |
| 136 // |
| 137 // Regular expression matching (url_matches searches) |
| 138 // ================================================== |
| 139 // |
| 140 // This class also supports matching regular expressions (RE2 syntax) |
| 141 // against full URLs, which are transformed as in case 2. |
127 | 142 |
| 143 namespace { |
| 144 |
| 145 bool IsRegexCriterion(URLMatcherCondition::Criterion criterion) { |
| 146 return criterion == URLMatcherCondition::URL_MATCHES; |
| 147 } |
| 148 |
| 149 } // namespace |
128 | 150 |
129 // | 151 // |
130 // URLMatcherCondition | 152 // URLMatcherCondition |
131 // | 153 // |
132 | 154 |
133 URLMatcherCondition::URLMatcherCondition() | 155 URLMatcherCondition::URLMatcherCondition() |
134 : criterion_(HOST_PREFIX), | 156 : criterion_(HOST_PREFIX), |
135 substring_pattern_(NULL) {} | 157 string_pattern_(NULL) {} |
136 | 158 |
137 URLMatcherCondition::~URLMatcherCondition() {} | 159 URLMatcherCondition::~URLMatcherCondition() {} |
138 | 160 |
139 URLMatcherCondition::URLMatcherCondition( | 161 URLMatcherCondition::URLMatcherCondition( |
140 Criterion criterion, | 162 Criterion criterion, |
141 const SubstringPattern* substring_pattern) | 163 const StringPattern* string_pattern) |
142 : criterion_(criterion), | 164 : criterion_(criterion), |
143 substring_pattern_(substring_pattern) {} | 165 string_pattern_(string_pattern) {} |
144 | 166 |
145 URLMatcherCondition::URLMatcherCondition(const URLMatcherCondition& rhs) | 167 URLMatcherCondition::URLMatcherCondition(const URLMatcherCondition& rhs) |
146 : criterion_(rhs.criterion_), | 168 : criterion_(rhs.criterion_), |
147 substring_pattern_(rhs.substring_pattern_) {} | 169 string_pattern_(rhs.string_pattern_) {} |
148 | 170 |
149 URLMatcherCondition& URLMatcherCondition::operator=( | 171 URLMatcherCondition& URLMatcherCondition::operator=( |
150 const URLMatcherCondition& rhs) { | 172 const URLMatcherCondition& rhs) { |
151 criterion_ = rhs.criterion_; | 173 criterion_ = rhs.criterion_; |
152 substring_pattern_ = rhs.substring_pattern_; | 174 string_pattern_ = rhs.string_pattern_; |
153 return *this; | 175 return *this; |
154 } | 176 } |
155 | 177 |
156 bool URLMatcherCondition::operator<(const URLMatcherCondition& rhs) const { | 178 bool URLMatcherCondition::operator<(const URLMatcherCondition& rhs) const { |
157 if (criterion_ < rhs.criterion_) return true; | 179 if (criterion_ < rhs.criterion_) return true; |
158 if (criterion_ > rhs.criterion_) return false; | 180 if (criterion_ > rhs.criterion_) return false; |
159 if (substring_pattern_ != NULL && rhs.substring_pattern_ != NULL) | 181 if (string_pattern_ != NULL && rhs.string_pattern_ != NULL) |
160 return *substring_pattern_ < *rhs.substring_pattern_; | 182 return *string_pattern_ < *rhs.string_pattern_; |
161 if (substring_pattern_ == NULL && rhs.substring_pattern_ != NULL) return true; | 183 if (string_pattern_ == NULL && rhs.string_pattern_ != NULL) return true; |
162 // Either substring_pattern_ != NULL && rhs.substring_pattern_ == NULL, | 184 // Either string_pattern_ != NULL && rhs.string_pattern_ == NULL, |
163 // or both are NULL. | 185 // or both are NULL. |
164 return false; | 186 return false; |
165 } | 187 } |
166 | 188 |
167 bool URLMatcherCondition::IsFullURLCondition() const { | 189 bool URLMatcherCondition::IsFullURLCondition() const { |
168 // For these criteria the SubstringMatcher needs to be executed on the | 190 // For these criteria the SubstringMatcher needs to be executed on the |
169 // GURL that is canonicalized with | 191 // GURL that is canonicalized with |
170 // URLMatcherConditionFactory::CanonicalizeURLForFullSearches. | 192 // URLMatcherConditionFactory::CanonicalizeURLForFullSearches. |
171 switch (criterion_) { | 193 switch (criterion_) { |
172 case HOST_CONTAINS: | 194 case HOST_CONTAINS: |
173 case PATH_CONTAINS: | 195 case PATH_CONTAINS: |
174 case QUERY_CONTAINS: | 196 case QUERY_CONTAINS: |
175 case URL_PREFIX: | 197 case URL_PREFIX: |
176 case URL_SUFFIX: | 198 case URL_SUFFIX: |
177 case URL_CONTAINS: | 199 case URL_CONTAINS: |
178 case URL_EQUALS: | 200 case URL_EQUALS: |
179 return true; | 201 return true; |
180 default: | 202 default: |
181 break; | 203 break; |
182 } | 204 } |
183 return false; | 205 return false; |
184 } | 206 } |
185 | 207 |
| 208 bool URLMatcherCondition::IsRegexCondition() const { |
| 209 return IsRegexCriterion(criterion_); |
| 210 } |
| 211 |
186 bool URLMatcherCondition::IsMatch( | 212 bool URLMatcherCondition::IsMatch( |
187 const std::set<SubstringPattern::ID>& matching_substring_patterns, | 213 const std::set<StringPattern::ID>& matching_patterns, |
188 const GURL& url) const { | 214 const GURL& url) const { |
189 DCHECK(substring_pattern_); | 215 DCHECK(string_pattern_); |
190 if (matching_substring_patterns.find(substring_pattern_->id()) == | 216 if (!ContainsKey(matching_patterns, string_pattern_->id())) |
191 matching_substring_patterns.end()) | |
192 return false; | 217 return false; |
193 // The criteria HOST_CONTAINS, PATH_CONTAINS, QUERY_CONTAINS are based on | 218 // The criteria HOST_CONTAINS, PATH_CONTAINS, QUERY_CONTAINS are based on |
194 // a substring match on the raw URL. In case of a match, we need to verify | 219 // a substring match on the raw URL. In case of a match, we need to verify |
195 // that the match was found in the correct component of the URL. | 220 // that the match was found in the correct component of the URL. |
196 switch (criterion_) { | 221 switch (criterion_) { |
197 case HOST_CONTAINS: | 222 case HOST_CONTAINS: |
198 return url.host().find(substring_pattern_->pattern()) != | 223 return url.host().find(string_pattern_->pattern()) != |
199 std::string::npos; | 224 std::string::npos; |
200 case PATH_CONTAINS: | 225 case PATH_CONTAINS: |
201 return url.path().find(substring_pattern_->pattern()) != | 226 return url.path().find(string_pattern_->pattern()) != |
202 std::string::npos; | 227 std::string::npos; |
203 case QUERY_CONTAINS: | 228 case QUERY_CONTAINS: |
204 return url.query().find(substring_pattern_->pattern()) != | 229 return url.query().find(string_pattern_->pattern()) != |
205 std::string::npos; | 230 std::string::npos; |
206 default: | 231 default: |
207 break; | 232 break; |
208 } | 233 } |
209 return true; | 234 return true; |
210 } | 235 } |
211 | 236 |
212 // | 237 // |
213 // URLMatcherConditionFactory | 238 // URLMatcherConditionFactory |
214 // | 239 // |
215 | 240 |
216 namespace { | 241 namespace { |
217 // These are symbols that are not contained in 7-bit ASCII used in GURLs. | 242 // These are symbols that are not contained in 7-bit ASCII used in GURLs. |
218 const char kBeginningOfURL[] = {static_cast<char>(-1), 0}; | 243 const char kBeginningOfURL[] = {static_cast<char>(-1), 0}; |
219 const char kEndOfDomain[] = {static_cast<char>(-2), 0}; | 244 const char kEndOfDomain[] = {static_cast<char>(-2), 0}; |
220 const char kEndOfPath[] = {static_cast<char>(-3), 0}; | 245 const char kEndOfPath[] = {static_cast<char>(-3), 0}; |
221 const char kEndOfURL[] = {static_cast<char>(-4), 0}; | 246 const char kEndOfURL[] = {static_cast<char>(-4), 0}; |
222 } // namespace | 247 } // namespace |
223 | 248 |
224 URLMatcherConditionFactory::URLMatcherConditionFactory() : id_counter_(0) {} | 249 URLMatcherConditionFactory::URLMatcherConditionFactory() : id_counter_(0) {} |
225 | 250 |
226 URLMatcherConditionFactory::~URLMatcherConditionFactory() { | 251 URLMatcherConditionFactory::~URLMatcherConditionFactory() { |
227 STLDeleteElements(&pattern_singletons_); | 252 STLDeleteElements(&substring_pattern_singletons_); |
| 253 STLDeleteElements(®ex_pattern_singletons_); |
228 } | 254 } |
229 | 255 |
230 std::string URLMatcherConditionFactory::CanonicalizeURLForComponentSearches( | 256 std::string URLMatcherConditionFactory::CanonicalizeURLForComponentSearches( |
231 const GURL& url) { | 257 const GURL& url) { |
232 return kBeginningOfURL + CanonicalizeHostname(url.host()) + kEndOfDomain + | 258 return kBeginningOfURL + CanonicalizeHostname(url.host()) + kEndOfDomain + |
233 url.path() + kEndOfPath + (url.has_query() ? "?" + url.query() : "") + | 259 url.path() + kEndOfPath + (url.has_query() ? "?" + url.query() : "") + |
234 kEndOfURL; | 260 kEndOfURL; |
235 } | 261 } |
236 | 262 |
237 URLMatcherCondition URLMatcherConditionFactory::CreateHostPrefixCondition( | 263 URLMatcherCondition URLMatcherConditionFactory::CreateHostPrefixCondition( |
(...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
329 const std::string& port = url.scheme(); | 355 const std::string& port = url.scheme(); |
330 if (url_canon::DefaultPortForScheme(port.c_str(), port.size()) == | 356 if (url_canon::DefaultPortForScheme(port.c_str(), port.size()) == |
331 url.EffectiveIntPort()) { | 357 url.EffectiveIntPort()) { |
332 replacements.ClearPort(); | 358 replacements.ClearPort(); |
333 } | 359 } |
334 } | 360 } |
335 return kBeginningOfURL + url.ReplaceComponents(replacements).spec() + | 361 return kBeginningOfURL + url.ReplaceComponents(replacements).spec() + |
336 kEndOfURL; | 362 kEndOfURL; |
337 } | 363 } |
338 | 364 |
| 365 std::string URLMatcherConditionFactory::CanonicalizeURLForRegexSearches( |
| 366 const GURL& url) { |
| 367 GURL::Replacements replacements; |
| 368 replacements.ClearPassword(); |
| 369 replacements.ClearUsername(); |
| 370 replacements.ClearRef(); |
| 371 // Clear port if it is implicit from scheme. |
| 372 if (url.has_port()) { |
| 373 const std::string& port = url.scheme(); |
| 374 if (url_canon::DefaultPortForScheme(port.c_str(), port.size()) == |
| 375 url.EffectiveIntPort()) { |
| 376 replacements.ClearPort(); |
| 377 } |
| 378 } |
| 379 return url.ReplaceComponents(replacements).spec(); |
| 380 } |
| 381 |
339 URLMatcherCondition URLMatcherConditionFactory::CreateURLPrefixCondition( | 382 URLMatcherCondition URLMatcherConditionFactory::CreateURLPrefixCondition( |
340 const std::string& prefix) { | 383 const std::string& prefix) { |
341 return CreateCondition(URLMatcherCondition::URL_PREFIX, | 384 return CreateCondition(URLMatcherCondition::URL_PREFIX, |
342 kBeginningOfURL + prefix); | 385 kBeginningOfURL + prefix); |
343 } | 386 } |
344 | 387 |
345 URLMatcherCondition URLMatcherConditionFactory::CreateURLSuffixCondition( | 388 URLMatcherCondition URLMatcherConditionFactory::CreateURLSuffixCondition( |
346 const std::string& suffix) { | 389 const std::string& suffix) { |
347 return CreateCondition(URLMatcherCondition::URL_SUFFIX, suffix + kEndOfURL); | 390 return CreateCondition(URLMatcherCondition::URL_SUFFIX, suffix + kEndOfURL); |
348 } | 391 } |
349 | 392 |
350 URLMatcherCondition URLMatcherConditionFactory::CreateURLContainsCondition( | 393 URLMatcherCondition URLMatcherConditionFactory::CreateURLContainsCondition( |
351 const std::string& str) { | 394 const std::string& str) { |
352 return CreateCondition(URLMatcherCondition::URL_CONTAINS, str); | 395 return CreateCondition(URLMatcherCondition::URL_CONTAINS, str); |
353 } | 396 } |
354 | 397 |
355 URLMatcherCondition URLMatcherConditionFactory::CreateURLEqualsCondition( | 398 URLMatcherCondition URLMatcherConditionFactory::CreateURLEqualsCondition( |
356 const std::string& str) { | 399 const std::string& str) { |
357 return CreateCondition(URLMatcherCondition::URL_EQUALS, | 400 return CreateCondition(URLMatcherCondition::URL_EQUALS, |
358 kBeginningOfURL + str + kEndOfURL); | 401 kBeginningOfURL + str + kEndOfURL); |
359 } | 402 } |
360 | 403 |
| 404 URLMatcherCondition URLMatcherConditionFactory::CreateURLMatchesCondition( |
| 405 const std::string& regex) { |
| 406 return CreateCondition(URLMatcherCondition::URL_MATCHES, regex); |
| 407 } |
| 408 |
361 void URLMatcherConditionFactory::ForgetUnusedPatterns( | 409 void URLMatcherConditionFactory::ForgetUnusedPatterns( |
362 const std::set<SubstringPattern::ID>& used_patterns) { | 410 const std::set<StringPattern::ID>& used_patterns) { |
363 PatternSingletons::iterator i = pattern_singletons_.begin(); | 411 PatternSingletons::iterator i = substring_pattern_singletons_.begin(); |
364 while (i != pattern_singletons_.end()) { | 412 while (i != substring_pattern_singletons_.end()) { |
365 if (used_patterns.find((*i)->id()) != used_patterns.end()) { | 413 if (used_patterns.find((*i)->id()) != used_patterns.end()) { |
366 ++i; | 414 ++i; |
367 } else { | 415 } else { |
368 delete *i; | 416 delete *i; |
369 pattern_singletons_.erase(i++); | 417 substring_pattern_singletons_.erase(i++); |
| 418 } |
| 419 } |
| 420 i = regex_pattern_singletons_.begin(); |
| 421 while (i != regex_pattern_singletons_.end()) { |
| 422 if (used_patterns.find((*i)->id()) != used_patterns.end()) { |
| 423 ++i; |
| 424 } else { |
| 425 delete *i; |
| 426 regex_pattern_singletons_.erase(i++); |
370 } | 427 } |
371 } | 428 } |
372 } | 429 } |
373 | 430 |
374 bool URLMatcherConditionFactory::IsEmpty() const { | 431 bool URLMatcherConditionFactory::IsEmpty() const { |
375 return pattern_singletons_.empty(); | 432 return substring_pattern_singletons_.empty() && |
| 433 regex_pattern_singletons_.empty(); |
376 } | 434 } |
377 | 435 |
378 URLMatcherCondition URLMatcherConditionFactory::CreateCondition( | 436 URLMatcherCondition URLMatcherConditionFactory::CreateCondition( |
379 URLMatcherCondition::Criterion criterion, | 437 URLMatcherCondition::Criterion criterion, |
380 const std::string& pattern) { | 438 const std::string& pattern) { |
381 SubstringPattern search_pattern(pattern, 0); | 439 StringPattern search_pattern(pattern, 0); |
| 440 PatternSingletons* pattern_singletons = |
| 441 IsRegexCriterion(criterion) ? ®ex_pattern_singletons_ |
| 442 : &substring_pattern_singletons_; |
| 443 |
382 PatternSingletons::const_iterator iter = | 444 PatternSingletons::const_iterator iter = |
383 pattern_singletons_.find(&search_pattern); | 445 pattern_singletons->find(&search_pattern); |
384 if (iter != pattern_singletons_.end()) { | 446 |
| 447 if (iter != pattern_singletons->end()) { |
385 return URLMatcherCondition(criterion, *iter); | 448 return URLMatcherCondition(criterion, *iter); |
386 } else { | 449 } else { |
387 SubstringPattern* new_pattern = | 450 StringPattern* new_pattern = |
388 new SubstringPattern(pattern, id_counter_++); | 451 new StringPattern(pattern, id_counter_++); |
389 pattern_singletons_.insert(new_pattern); | 452 pattern_singletons->insert(new_pattern); |
390 return URLMatcherCondition(criterion, new_pattern); | 453 return URLMatcherCondition(criterion, new_pattern); |
391 } | 454 } |
392 } | 455 } |
393 | 456 |
394 std::string URLMatcherConditionFactory::CanonicalizeHostname( | 457 std::string URLMatcherConditionFactory::CanonicalizeHostname( |
395 const std::string& hostname) const { | 458 const std::string& hostname) const { |
396 if (!hostname.empty() && hostname[0] == '.') | 459 if (!hostname.empty() && hostname[0] == '.') |
397 return hostname; | 460 return hostname; |
398 else | 461 else |
399 return "." + hostname; | 462 return "." + hostname; |
400 } | 463 } |
401 | 464 |
402 bool URLMatcherConditionFactory::SubstringPatternPointerCompare::operator()( | 465 bool URLMatcherConditionFactory::StringPatternPointerCompare::operator()( |
403 SubstringPattern* lhs, | 466 StringPattern* lhs, |
404 SubstringPattern* rhs) const { | 467 StringPattern* rhs) const { |
405 if (lhs == NULL && rhs != NULL) return true; | 468 if (lhs == NULL && rhs != NULL) return true; |
406 if (lhs != NULL && rhs != NULL) | 469 if (lhs != NULL && rhs != NULL) |
407 return lhs->pattern() < rhs->pattern(); | 470 return lhs->pattern() < rhs->pattern(); |
408 // Either both are NULL or only rhs is NULL. | 471 // Either both are NULL or only rhs is NULL. |
409 return false; | 472 return false; |
410 } | 473 } |
411 | 474 |
412 // | 475 // |
413 // URLMatcherSchemeFilter | 476 // URLMatcherSchemeFilter |
414 // | 477 // |
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
476 ID id, | 539 ID id, |
477 const Conditions& conditions, | 540 const Conditions& conditions, |
478 scoped_ptr<URLMatcherSchemeFilter> scheme_filter, | 541 scoped_ptr<URLMatcherSchemeFilter> scheme_filter, |
479 scoped_ptr<URLMatcherPortFilter> port_filter) | 542 scoped_ptr<URLMatcherPortFilter> port_filter) |
480 : id_(id), | 543 : id_(id), |
481 conditions_(conditions), | 544 conditions_(conditions), |
482 scheme_filter_(scheme_filter.Pass()), | 545 scheme_filter_(scheme_filter.Pass()), |
483 port_filter_(port_filter.Pass()) {} | 546 port_filter_(port_filter.Pass()) {} |
484 | 547 |
485 bool URLMatcherConditionSet::IsMatch( | 548 bool URLMatcherConditionSet::IsMatch( |
486 const std::set<SubstringPattern::ID>& matching_substring_patterns, | 549 const std::set<StringPattern::ID>& matching_patterns, |
487 const GURL& url) const { | 550 const GURL& url) const { |
488 for (Conditions::const_iterator i = conditions_.begin(); | 551 for (Conditions::const_iterator i = conditions_.begin(); |
489 i != conditions_.end(); ++i) { | 552 i != conditions_.end(); ++i) { |
490 if (!i->IsMatch(matching_substring_patterns, url)) | 553 if (!i->IsMatch(matching_patterns, url)) |
491 return false; | 554 return false; |
492 } | 555 } |
493 if (scheme_filter_.get() && !scheme_filter_->IsMatch(url)) | 556 if (scheme_filter_.get() && !scheme_filter_->IsMatch(url)) |
494 return false; | 557 return false; |
495 if (port_filter_.get() && !port_filter_->IsMatch(url)) | 558 if (port_filter_.get() && !port_filter_->IsMatch(url)) |
496 return false; | 559 return false; |
497 return true; | 560 return true; |
498 } | 561 } |
499 | 562 |
500 | |
501 // | 563 // |
502 // URLMatcher | 564 // URLMatcher |
503 // | 565 // |
504 | 566 |
505 URLMatcher::URLMatcher() {} | 567 URLMatcher::URLMatcher() {} |
506 | 568 |
507 URLMatcher::~URLMatcher() {} | 569 URLMatcher::~URLMatcher() {} |
508 | 570 |
509 void URLMatcher::AddConditionSets( | 571 void URLMatcher::AddConditionSets( |
510 const URLMatcherConditionSet::Vector& condition_sets) { | 572 const URLMatcherConditionSet::Vector& condition_sets) { |
(...skipping 15 matching lines...) Expand all Loading... |
526 url_matcher_condition_sets_.erase(*i); | 588 url_matcher_condition_sets_.erase(*i); |
527 } | 589 } |
528 UpdateInternalDatastructures(); | 590 UpdateInternalDatastructures(); |
529 } | 591 } |
530 | 592 |
531 void URLMatcher::ClearUnusedConditionSets() { | 593 void URLMatcher::ClearUnusedConditionSets() { |
532 UpdateConditionFactory(); | 594 UpdateConditionFactory(); |
533 } | 595 } |
534 | 596 |
535 std::set<URLMatcherConditionSet::ID> URLMatcher::MatchURL(const GURL& url) { | 597 std::set<URLMatcherConditionSet::ID> URLMatcher::MatchURL(const GURL& url) { |
536 // Find all IDs of SubstringPatterns that match |url|. | 598 // Find all IDs of StringPatterns that match |url|. |
537 // See URLMatcherConditionFactory for the canonicalization of URLs and the | 599 // See URLMatcherConditionFactory for the canonicalization of URLs and the |
538 // distinction between full url searches and url component searches. | 600 // distinction between full url searches and url component searches. |
539 std::set<SubstringPattern::ID> matches; | 601 std::set<StringPattern::ID> matches; |
540 full_url_matcher_.Match( | 602 full_url_matcher_.Match( |
541 condition_factory_.CanonicalizeURLForFullSearches(url), &matches); | 603 condition_factory_.CanonicalizeURLForFullSearches(url), &matches); |
542 url_component_matcher_.Match( | 604 url_component_matcher_.Match( |
543 condition_factory_.CanonicalizeURLForComponentSearches(url), &matches); | 605 condition_factory_.CanonicalizeURLForComponentSearches(url), &matches); |
| 606 regex_set_matcher_.Match( |
| 607 condition_factory_.CanonicalizeURLForRegexSearches(url), &matches); |
544 | 608 |
545 // Calculate all URLMatcherConditionSets for which all URLMatcherConditions | 609 // Calculate all URLMatcherConditionSets for which all URLMatcherConditions |
546 // were fulfilled. | 610 // were fulfilled. |
547 std::set<URLMatcherConditionSet::ID> result; | 611 std::set<URLMatcherConditionSet::ID> result; |
548 for (std::set<SubstringPattern::ID>::const_iterator i = matches.begin(); | 612 for (std::set<StringPattern::ID>::const_iterator i = matches.begin(); |
549 i != matches.end(); ++i) { | 613 i != matches.end(); ++i) { |
550 // For each URLMatcherConditionSet there is exactly one condition | 614 // For each URLMatcherConditionSet there is exactly one condition |
551 // registered in substring_match_triggers_. This means that the following | 615 // registered in substring_match_triggers_. This means that the following |
552 // logic tests each URLMatcherConditionSet exactly once if it can be | 616 // logic tests each URLMatcherConditionSet exactly once if it can be |
553 // completely fulfilled. | 617 // completely fulfilled. |
554 std::set<URLMatcherConditionSet::ID>& condition_sets = | 618 std::set<URLMatcherConditionSet::ID>& condition_sets = |
555 substring_match_triggers_[*i]; | 619 substring_match_triggers_[*i]; |
556 for (std::set<URLMatcherConditionSet::ID>::const_iterator j = | 620 for (std::set<URLMatcherConditionSet::ID>::const_iterator j = |
557 condition_sets.begin(); j != condition_sets.end(); ++j) { | 621 condition_sets.begin(); j != condition_sets.end(); ++j) { |
558 if (url_matcher_condition_sets_[*j]->IsMatch(matches, url)) | 622 if (url_matcher_condition_sets_[*j]->IsMatch(matches, url)) |
(...skipping 14 matching lines...) Expand all Loading... |
573 registered_url_component_patterns_.empty(); | 637 registered_url_component_patterns_.empty(); |
574 } | 638 } |
575 | 639 |
576 void URLMatcher::UpdateSubstringSetMatcher(bool full_url_conditions) { | 640 void URLMatcher::UpdateSubstringSetMatcher(bool full_url_conditions) { |
577 // The purpose of |full_url_conditions| is just that we need to execute | 641 // The purpose of |full_url_conditions| is just that we need to execute |
578 // the same logic once for Full URL searches and once for URL Component | 642 // the same logic once for Full URL searches and once for URL Component |
579 // searches (see URLMatcherConditionFactory). | 643 // searches (see URLMatcherConditionFactory). |
580 | 644 |
581 // Determine which patterns need to be registered when this function | 645 // Determine which patterns need to be registered when this function |
582 // terminates. | 646 // terminates. |
583 std::set<const SubstringPattern*> new_patterns; | 647 std::set<const StringPattern*> new_patterns; |
584 for (URLMatcherConditionSets::const_iterator condition_set_iter = | 648 for (URLMatcherConditionSets::const_iterator condition_set_iter = |
585 url_matcher_condition_sets_.begin(); | 649 url_matcher_condition_sets_.begin(); |
586 condition_set_iter != url_matcher_condition_sets_.end(); | 650 condition_set_iter != url_matcher_condition_sets_.end(); |
587 ++condition_set_iter) { | 651 ++condition_set_iter) { |
588 const URLMatcherConditionSet::Conditions& conditions = | 652 const URLMatcherConditionSet::Conditions& conditions = |
589 condition_set_iter->second->conditions(); | 653 condition_set_iter->second->conditions(); |
590 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter = | 654 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter = |
591 conditions.begin(); condition_iter != conditions.end(); | 655 conditions.begin(); condition_iter != conditions.end(); |
592 ++condition_iter) { | 656 ++condition_iter) { |
593 // If we are called to process Full URL searches, ignore all others, | 657 // If we are called to process Full URL searches, ignore others, and |
594 // and vice versa. | 658 // vice versa. (Regex conditions are updated in UpdateRegexSetMatcher.) |
595 if (full_url_conditions == condition_iter->IsFullURLCondition()) | 659 if (!condition_iter->IsRegexCondition() && |
596 new_patterns.insert(condition_iter->substring_pattern()); | 660 full_url_conditions == condition_iter->IsFullURLCondition()) |
| 661 new_patterns.insert(condition_iter->string_pattern()); |
597 } | 662 } |
598 } | 663 } |
599 | 664 |
600 // This is the set of patterns that were registered before this function | 665 // This is the set of patterns that were registered before this function |
601 // is called. | 666 // is called. |
602 std::set<const SubstringPattern*>& registered_patterns = | 667 std::set<const StringPattern*>& registered_patterns = |
603 full_url_conditions ? registered_full_url_patterns_ | 668 full_url_conditions ? registered_full_url_patterns_ |
604 : registered_url_component_patterns_; | 669 : registered_url_component_patterns_; |
605 | 670 |
606 // Add all patterns that are in new_patterns but not in registered_patterns. | 671 // Add all patterns that are in new_patterns but not in registered_patterns. |
607 std::vector<const SubstringPattern*> patterns_to_register; | 672 std::vector<const StringPattern*> patterns_to_register; |
608 std::set_difference( | 673 std::set_difference( |
609 new_patterns.begin(), new_patterns.end(), | 674 new_patterns.begin(), new_patterns.end(), |
610 registered_patterns.begin(), registered_patterns.end(), | 675 registered_patterns.begin(), registered_patterns.end(), |
611 std::back_inserter(patterns_to_register)); | 676 std::back_inserter(patterns_to_register)); |
612 | 677 |
613 // Remove all patterns that are in registered_patterns but not in | 678 // Remove all patterns that are in registered_patterns but not in |
614 // new_patterns. | 679 // new_patterns. |
615 std::vector<const SubstringPattern*> patterns_to_unregister; | 680 std::vector<const StringPattern*> patterns_to_unregister; |
616 std::set_difference( | 681 std::set_difference( |
617 registered_patterns.begin(), registered_patterns.end(), | 682 registered_patterns.begin(), registered_patterns.end(), |
618 new_patterns.begin(), new_patterns.end(), | 683 new_patterns.begin(), new_patterns.end(), |
619 std::back_inserter(patterns_to_unregister)); | 684 std::back_inserter(patterns_to_unregister)); |
620 | 685 |
621 // Update the SubstringSetMatcher. | 686 // Update the SubstringSetMatcher. |
622 SubstringSetMatcher& url_matcher = | 687 SubstringSetMatcher& url_matcher = |
623 full_url_conditions ? full_url_matcher_ : url_component_matcher_; | 688 full_url_conditions ? full_url_matcher_ : url_component_matcher_; |
624 url_matcher.RegisterAndUnregisterPatterns(patterns_to_register, | 689 url_matcher.RegisterAndUnregisterPatterns(patterns_to_register, |
625 patterns_to_unregister); | 690 patterns_to_unregister); |
626 | 691 |
627 // Update the set of registered_patterns for the next time this function | 692 // Update the set of registered_patterns for the next time this function |
628 // is being called. | 693 // is being called. |
629 registered_patterns.swap(new_patterns); | 694 registered_patterns.swap(new_patterns); |
630 } | 695 } |
631 | 696 |
| 697 void URLMatcher::UpdateRegexSetMatcher() { |
| 698 std::vector<const StringPattern*> new_patterns; |
| 699 |
| 700 for (URLMatcherConditionSets::const_iterator condition_set_iter = |
| 701 url_matcher_condition_sets_.begin(); |
| 702 condition_set_iter != url_matcher_condition_sets_.end(); |
| 703 ++condition_set_iter) { |
| 704 const URLMatcherConditionSet::Conditions& conditions = |
| 705 condition_set_iter->second->conditions(); |
| 706 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter = |
| 707 conditions.begin(); condition_iter != conditions.end(); |
| 708 ++condition_iter) { |
| 709 if (condition_iter->IsRegexCondition()) |
| 710 new_patterns.push_back(condition_iter->string_pattern()); |
| 711 } |
| 712 } |
| 713 |
| 714 // Start over from scratch. We can't really do better than this, since the |
| 715 // FilteredRE2 backend doesn't support incremental updates. |
| 716 regex_set_matcher_.ClearPatterns(); |
| 717 regex_set_matcher_.AddPatterns(new_patterns); |
| 718 } |
| 719 |
632 void URLMatcher::UpdateTriggers() { | 720 void URLMatcher::UpdateTriggers() { |
633 // Count substring pattern frequencies. | 721 // Count substring pattern frequencies. |
634 std::map<SubstringPattern::ID, size_t> substring_pattern_frequencies; | 722 std::map<StringPattern::ID, size_t> substring_pattern_frequencies; |
635 for (URLMatcherConditionSets::const_iterator condition_set_iter = | 723 for (URLMatcherConditionSets::const_iterator condition_set_iter = |
636 url_matcher_condition_sets_.begin(); | 724 url_matcher_condition_sets_.begin(); |
637 condition_set_iter != url_matcher_condition_sets_.end(); | 725 condition_set_iter != url_matcher_condition_sets_.end(); |
638 ++condition_set_iter) { | 726 ++condition_set_iter) { |
639 const URLMatcherConditionSet::Conditions& conditions = | 727 const URLMatcherConditionSet::Conditions& conditions = |
640 condition_set_iter->second->conditions(); | 728 condition_set_iter->second->conditions(); |
641 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter = | 729 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter = |
642 conditions.begin(); condition_iter != conditions.end(); | 730 conditions.begin(); condition_iter != conditions.end(); |
643 ++condition_iter) { | 731 ++condition_iter) { |
644 const SubstringPattern* pattern = condition_iter->substring_pattern(); | 732 const StringPattern* pattern = condition_iter->string_pattern(); |
645 substring_pattern_frequencies[pattern->id()]++; | 733 substring_pattern_frequencies[pattern->id()]++; |
646 } | 734 } |
647 } | 735 } |
648 | 736 |
649 // Update trigger conditions: Determine for each URLMatcherConditionSet which | 737 // Update trigger conditions: Determine for each URLMatcherConditionSet which |
650 // URLMatcherCondition contains a SubstringPattern that occurs least | 738 // URLMatcherCondition contains a StringPattern that occurs least |
651 // frequently in this URLMatcher. We assume that this condition is very | 739 // frequently in this URLMatcher. We assume that this condition is very |
652 // specific and occurs rarely in URLs. If a match occurs for this | 740 // specific and occurs rarely in URLs. If a match occurs for this |
653 // URLMatcherCondition, we want to test all other URLMatcherCondition in the | 741 // URLMatcherCondition, we want to test all other URLMatcherCondition in the |
654 // respective URLMatcherConditionSet as well to see whether the entire | 742 // respective URLMatcherConditionSet as well to see whether the entire |
655 // URLMatcherConditionSet is considered matching. | 743 // URLMatcherConditionSet is considered matching. |
656 substring_match_triggers_.clear(); | 744 substring_match_triggers_.clear(); |
657 for (URLMatcherConditionSets::const_iterator condition_set_iter = | 745 for (URLMatcherConditionSets::const_iterator condition_set_iter = |
658 url_matcher_condition_sets_.begin(); | 746 url_matcher_condition_sets_.begin(); |
659 condition_set_iter != url_matcher_condition_sets_.end(); | 747 condition_set_iter != url_matcher_condition_sets_.end(); |
660 ++condition_set_iter) { | 748 ++condition_set_iter) { |
661 const URLMatcherConditionSet::Conditions& conditions = | 749 const URLMatcherConditionSet::Conditions& conditions = |
662 condition_set_iter->second->conditions(); | 750 condition_set_iter->second->conditions(); |
663 if (conditions.empty()) | 751 if (conditions.empty()) |
664 continue; | 752 continue; |
665 URLMatcherConditionSet::Conditions::const_iterator condition_iter = | 753 URLMatcherConditionSet::Conditions::const_iterator condition_iter = |
666 conditions.begin(); | 754 conditions.begin(); |
667 SubstringPattern::ID trigger = condition_iter->substring_pattern()->id(); | 755 StringPattern::ID trigger = condition_iter->string_pattern()->id(); |
668 // We skip the first element in the following loop. | 756 // We skip the first element in the following loop. |
669 ++condition_iter; | 757 ++condition_iter; |
670 for (; condition_iter != conditions.end(); ++condition_iter) { | 758 for (; condition_iter != conditions.end(); ++condition_iter) { |
671 SubstringPattern::ID current_id = | 759 StringPattern::ID current_id = |
672 condition_iter->substring_pattern()->id(); | 760 condition_iter->string_pattern()->id(); |
673 if (substring_pattern_frequencies[trigger] > | 761 if (substring_pattern_frequencies[trigger] > |
674 substring_pattern_frequencies[current_id]) { | 762 substring_pattern_frequencies[current_id]) { |
675 trigger = current_id; | 763 trigger = current_id; |
676 } | 764 } |
677 } | 765 } |
678 substring_match_triggers_[trigger].insert(condition_set_iter->second->id()); | 766 substring_match_triggers_[trigger].insert(condition_set_iter->second->id()); |
679 } | 767 } |
680 } | 768 } |
681 | 769 |
682 void URLMatcher::UpdateConditionFactory() { | 770 void URLMatcher::UpdateConditionFactory() { |
683 std::set<SubstringPattern::ID> used_patterns; | 771 std::set<StringPattern::ID> used_patterns; |
684 for (URLMatcherConditionSets::const_iterator condition_set_iter = | 772 for (URLMatcherConditionSets::const_iterator condition_set_iter = |
685 url_matcher_condition_sets_.begin(); | 773 url_matcher_condition_sets_.begin(); |
686 condition_set_iter != url_matcher_condition_sets_.end(); | 774 condition_set_iter != url_matcher_condition_sets_.end(); |
687 ++condition_set_iter) { | 775 ++condition_set_iter) { |
688 const URLMatcherConditionSet::Conditions& conditions = | 776 const URLMatcherConditionSet::Conditions& conditions = |
689 condition_set_iter->second->conditions(); | 777 condition_set_iter->second->conditions(); |
690 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter = | 778 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter = |
691 conditions.begin(); condition_iter != conditions.end(); | 779 conditions.begin(); condition_iter != conditions.end(); |
692 ++condition_iter) { | 780 ++condition_iter) { |
693 used_patterns.insert(condition_iter->substring_pattern()->id()); | 781 used_patterns.insert(condition_iter->string_pattern()->id()); |
694 } | 782 } |
695 } | 783 } |
696 condition_factory_.ForgetUnusedPatterns(used_patterns); | 784 condition_factory_.ForgetUnusedPatterns(used_patterns); |
697 } | 785 } |
698 | 786 |
699 void URLMatcher::UpdateInternalDatastructures() { | 787 void URLMatcher::UpdateInternalDatastructures() { |
700 UpdateSubstringSetMatcher(false); | 788 UpdateSubstringSetMatcher(false); |
701 UpdateSubstringSetMatcher(true); | 789 UpdateSubstringSetMatcher(true); |
| 790 UpdateRegexSetMatcher(); |
702 UpdateTriggers(); | 791 UpdateTriggers(); |
703 UpdateConditionFactory(); | 792 UpdateConditionFactory(); |
704 } | 793 } |
705 | 794 |
706 } // namespace extensions | 795 } // namespace extensions |
OLD | NEW |