Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(770)

Side by Side Diff: chrome/common/extensions/matcher/url_matcher.cc

Issue 10910179: Event matching by regular expression matching on URLs. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: ascii artiste Created 8 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/common/extensions/matcher/url_matcher.h" 5 #include "chrome/common/extensions/matcher/url_matcher.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <iterator> 8 #include <iterator>
9 9
10 #include "base/logging.h" 10 #include "base/logging.h"
11 #include "content/public/common/url_constants.h" 11 #include "content/public/common/url_constants.h"
12 #include "googleurl/src/gurl.h" 12 #include "googleurl/src/gurl.h"
13 #include "googleurl/src/url_canon.h" 13 #include "googleurl/src/url_canon.h"
14 14
15 namespace extensions { 15 namespace extensions {
16 16
17 // This set of classes implement a mapping of URL Component Patterns, such as 17 // This set of classes implement a mapping of URL Component Patterns, such as
18 // host_prefix, host_suffix, host_equals, ..., etc., to SubstringPatterns. 18 // host_prefix, host_suffix, host_equals, ..., etc., to StringPatterns
19 // for use in substring comparisons.
19 // 20 //
20 // The idea of this mapping is to reduce the problem of comparing many 21 // The idea of this mapping is to reduce the problem of comparing many
21 // URL Component Patterns against one URL to the problem of searching many 22 // URL Component Patterns against one URL to the problem of searching many
22 // substrings in one string: 23 // substrings in one string:
23 // 24 //
24 // ---------------------- -------------------- 25 // ---------------------- -----------------
25 // | URL Query operator | ----translate----> | SubstringPattern | 26 // | URL Query operator | ----translate----> | StringPattern |
26 // ---------------------- -------------------- 27 // ---------------------- -----------------
27 // ^ 28 // ^
28 // | 29 // |
29 // compare 30 // compare
30 // | 31 // |
31 // v 32 // v
32 // ---------------------- -------------------- 33 // ---------------------- -----------------
33 // | URL to compare | | | 34 // | URL to compare | | |
34 // | to all URL Query | ----translate----> | String | 35 // | to all URL Query | ----translate----> | String |
35 // | operators | | | 36 // | operators | | |
36 // ---------------------- -------------------- 37 // ---------------------- -----------------
37 // 38 //
38 // The reason for this problem reduction is that there are efficient algorithms 39 // The reason for this problem reduction is that there are efficient algorithms
39 // for searching many substrings in one string (see Aho-Corasick algorithm). 40 // for searching many substrings in one string (see Aho-Corasick algorithm).
40 // 41 //
42 // Additionally, some of the same pieces are reused to implement regular
43 // expression comparisons. The FilteredRE2 implementation for matching many
44 // regular expressions against one string uses prefiltering, in which a set
45 // of substrings (derived from the regexes) are first searched for, to reduce
46 // the number of regular expressions to test; the prefiltering step also
47 // uses Aho-Corasick.
48 //
41 // Case 1: {host,path,query}_{prefix,suffix,equals} searches. 49 // Case 1: {host,path,query}_{prefix,suffix,equals} searches.
42 // ========================================================== 50 // ==========================================================
43 // 51 //
44 // For searches in this class, we normalize URLs as follows: 52 // For searches in this class, we normalize URLs as follows:
45 // 53 //
46 // Step 1: 54 // Step 1:
47 // Remove scheme, port and segment from URL: 55 // Remove scheme, port and segment from URL:
48 // -> http://www.example.com:8080/index.html?search=foo#first_match becomes 56 // -> http://www.example.com:8080/index.html?search=foo#first_match becomes
49 // www.example.com/index.html?search=foo 57 // www.example.com/index.html?search=foo
50 // 58 //
(...skipping 26 matching lines...) Expand all
77 // 85 //
78 // host_suffix(suffix) = suffix ED 86 // host_suffix(suffix) = suffix ED
79 // -> host_suffix("example.com") = example.com ED 87 // -> host_suffix("example.com") = example.com ED
80 // -> host_suffix(".example.com") = .example.com ED 88 // -> host_suffix(".example.com") = .example.com ED
81 // 89 //
82 // host_equals(domain) = BU add_missing_dot_prefix(domain) ED 90 // host_equals(domain) = BU add_missing_dot_prefix(domain) ED
83 // -> host_equals("www.example.com") = BU .www.example.com ED 91 // -> host_equals("www.example.com") = BU .www.example.com ED
84 // 92 //
85 // Similarly for path query parameters ({path, query}_{prefix, suffix, equals}). 93 // Similarly for path query parameters ({path, query}_{prefix, suffix, equals}).
86 // 94 //
87 // With this, we can search the SubstringPatterns in the normalized URL. 95 // With this, we can search the StringPatterns in the normalized URL.
88 // 96 //
89 // 97 //
90 // Case 2: url_{prefix,suffix,equals,contains} searches. 98 // Case 2: url_{prefix,suffix,equals,contains} searches.
91 // ===================================================== 99 // =====================================================
92 // 100 //
93 // Step 1: as above, except that 101 // Step 1: as above, except that
94 // - the scheme is not removed 102 // - the scheme is not removed
95 // - the port is not removed if it is specified and does not match the default 103 // - the port is not removed if it is specified and does not match the default
96 // port for the given scheme. 104 // port for the given scheme.
97 // 105 //
(...skipping 19 matching lines...) Expand all
117 // 125 //
118 // These kinds of searches are not supported directly but can be derived 126 // These kinds of searches are not supported directly but can be derived
119 // by a combination of a url_contains() query followed by an explicit test: 127 // by a combination of a url_contains() query followed by an explicit test:
120 // 128 //
121 // host_contains(str) = url_contains(str) followed by test whether str occurs 129 // host_contains(str) = url_contains(str) followed by test whether str occurs
122 // in host component of original URL. 130 // in host component of original URL.
123 // -> host_contains("example.co") = example.co 131 // -> host_contains("example.co") = example.co
124 // followed by gurl.host().find("example.co"); 132 // followed by gurl.host().find("example.co");
125 // 133 //
126 // [similarly for path_contains and query_contains]. 134 // [similarly for path_contains and query_contains].
135 //
136 //
137 // Regular expression matching (url_matches searches)
138 // ==================================================
139 //
140 // This class also supports matching regular expressions (RE2 syntax)
141 // against full URLs, which are transformed as in case 2.
127 142
143 namespace {
144
145 bool IsRegexCriterion(URLMatcherCondition::Criterion criterion) {
146 return criterion == URLMatcherCondition::URL_MATCHES;
147 }
148
149 } // namespace
128 150
129 // 151 //
130 // URLMatcherCondition 152 // URLMatcherCondition
131 // 153 //
132 154
133 URLMatcherCondition::URLMatcherCondition() 155 URLMatcherCondition::URLMatcherCondition()
134 : criterion_(HOST_PREFIX), 156 : criterion_(HOST_PREFIX),
135 substring_pattern_(NULL) {} 157 string_pattern_(NULL) {}
136 158
137 URLMatcherCondition::~URLMatcherCondition() {} 159 URLMatcherCondition::~URLMatcherCondition() {}
138 160
139 URLMatcherCondition::URLMatcherCondition( 161 URLMatcherCondition::URLMatcherCondition(
140 Criterion criterion, 162 Criterion criterion,
141 const SubstringPattern* substring_pattern) 163 const StringPattern* string_pattern)
142 : criterion_(criterion), 164 : criterion_(criterion),
143 substring_pattern_(substring_pattern) {} 165 string_pattern_(string_pattern) {}
144 166
145 URLMatcherCondition::URLMatcherCondition(const URLMatcherCondition& rhs) 167 URLMatcherCondition::URLMatcherCondition(const URLMatcherCondition& rhs)
146 : criterion_(rhs.criterion_), 168 : criterion_(rhs.criterion_),
147 substring_pattern_(rhs.substring_pattern_) {} 169 string_pattern_(rhs.string_pattern_) {}
148 170
149 URLMatcherCondition& URLMatcherCondition::operator=( 171 URLMatcherCondition& URLMatcherCondition::operator=(
150 const URLMatcherCondition& rhs) { 172 const URLMatcherCondition& rhs) {
151 criterion_ = rhs.criterion_; 173 criterion_ = rhs.criterion_;
152 substring_pattern_ = rhs.substring_pattern_; 174 string_pattern_ = rhs.string_pattern_;
153 return *this; 175 return *this;
154 } 176 }
155 177
156 bool URLMatcherCondition::operator<(const URLMatcherCondition& rhs) const { 178 bool URLMatcherCondition::operator<(const URLMatcherCondition& rhs) const {
157 if (criterion_ < rhs.criterion_) return true; 179 if (criterion_ < rhs.criterion_) return true;
158 if (criterion_ > rhs.criterion_) return false; 180 if (criterion_ > rhs.criterion_) return false;
159 if (substring_pattern_ != NULL && rhs.substring_pattern_ != NULL) 181 if (string_pattern_ != NULL && rhs.string_pattern_ != NULL)
160 return *substring_pattern_ < *rhs.substring_pattern_; 182 return *string_pattern_ < *rhs.string_pattern_;
161 if (substring_pattern_ == NULL && rhs.substring_pattern_ != NULL) return true; 183 if (string_pattern_ == NULL && rhs.string_pattern_ != NULL) return true;
162 // Either substring_pattern_ != NULL && rhs.substring_pattern_ == NULL, 184 // Either string_pattern_ != NULL && rhs.string_pattern_ == NULL,
163 // or both are NULL. 185 // or both are NULL.
164 return false; 186 return false;
165 } 187 }
166 188
167 bool URLMatcherCondition::IsFullURLCondition() const { 189 bool URLMatcherCondition::IsFullURLCondition() const {
168 // For these criteria the SubstringMatcher needs to be executed on the 190 // For these criteria the SubstringMatcher needs to be executed on the
169 // GURL that is canonicalized with 191 // GURL that is canonicalized with
170 // URLMatcherConditionFactory::CanonicalizeURLForFullSearches. 192 // URLMatcherConditionFactory::CanonicalizeURLForFullSearches.
171 switch (criterion_) { 193 switch (criterion_) {
172 case HOST_CONTAINS: 194 case HOST_CONTAINS:
173 case PATH_CONTAINS: 195 case PATH_CONTAINS:
174 case QUERY_CONTAINS: 196 case QUERY_CONTAINS:
175 case URL_PREFIX: 197 case URL_PREFIX:
176 case URL_SUFFIX: 198 case URL_SUFFIX:
177 case URL_CONTAINS: 199 case URL_CONTAINS:
178 case URL_EQUALS: 200 case URL_EQUALS:
179 return true; 201 return true;
180 default: 202 default:
181 break; 203 break;
182 } 204 }
183 return false; 205 return false;
184 } 206 }
185 207
208 bool URLMatcherCondition::IsRegexCondition() const {
209 return IsRegexCriterion(criterion_);
210 }
211
186 bool URLMatcherCondition::IsMatch( 212 bool URLMatcherCondition::IsMatch(
187 const std::set<SubstringPattern::ID>& matching_substring_patterns, 213 const std::set<StringPattern::ID>& matching_patterns,
188 const GURL& url) const { 214 const GURL& url) const {
189 DCHECK(substring_pattern_); 215 DCHECK(string_pattern_);
190 if (matching_substring_patterns.find(substring_pattern_->id()) == 216 if (!ContainsKey(matching_patterns, string_pattern_->id()))
191 matching_substring_patterns.end())
192 return false; 217 return false;
193 // The criteria HOST_CONTAINS, PATH_CONTAINS, QUERY_CONTAINS are based on 218 // The criteria HOST_CONTAINS, PATH_CONTAINS, QUERY_CONTAINS are based on
194 // a substring match on the raw URL. In case of a match, we need to verify 219 // a substring match on the raw URL. In case of a match, we need to verify
195 // that the match was found in the correct component of the URL. 220 // that the match was found in the correct component of the URL.
196 switch (criterion_) { 221 switch (criterion_) {
197 case HOST_CONTAINS: 222 case HOST_CONTAINS:
198 return url.host().find(substring_pattern_->pattern()) != 223 return url.host().find(string_pattern_->pattern()) !=
199 std::string::npos; 224 std::string::npos;
200 case PATH_CONTAINS: 225 case PATH_CONTAINS:
201 return url.path().find(substring_pattern_->pattern()) != 226 return url.path().find(string_pattern_->pattern()) !=
202 std::string::npos; 227 std::string::npos;
203 case QUERY_CONTAINS: 228 case QUERY_CONTAINS:
204 return url.query().find(substring_pattern_->pattern()) != 229 return url.query().find(string_pattern_->pattern()) !=
205 std::string::npos; 230 std::string::npos;
206 default: 231 default:
207 break; 232 break;
208 } 233 }
209 return true; 234 return true;
210 } 235 }
211 236
212 // 237 //
213 // URLMatcherConditionFactory 238 // URLMatcherConditionFactory
214 // 239 //
215 240
216 namespace { 241 namespace {
217 // These are symbols that are not contained in 7-bit ASCII used in GURLs. 242 // These are symbols that are not contained in 7-bit ASCII used in GURLs.
218 const char kBeginningOfURL[] = {static_cast<char>(-1), 0}; 243 const char kBeginningOfURL[] = {static_cast<char>(-1), 0};
219 const char kEndOfDomain[] = {static_cast<char>(-2), 0}; 244 const char kEndOfDomain[] = {static_cast<char>(-2), 0};
220 const char kEndOfPath[] = {static_cast<char>(-3), 0}; 245 const char kEndOfPath[] = {static_cast<char>(-3), 0};
221 const char kEndOfURL[] = {static_cast<char>(-4), 0}; 246 const char kEndOfURL[] = {static_cast<char>(-4), 0};
222 } // namespace 247 } // namespace
223 248
224 URLMatcherConditionFactory::URLMatcherConditionFactory() : id_counter_(0) {} 249 URLMatcherConditionFactory::URLMatcherConditionFactory() : id_counter_(0) {}
225 250
226 URLMatcherConditionFactory::~URLMatcherConditionFactory() { 251 URLMatcherConditionFactory::~URLMatcherConditionFactory() {
227 STLDeleteElements(&pattern_singletons_); 252 STLDeleteElements(&substring_pattern_singletons_);
253 STLDeleteElements(&regex_pattern_singletons_);
228 } 254 }
229 255
230 std::string URLMatcherConditionFactory::CanonicalizeURLForComponentSearches( 256 std::string URLMatcherConditionFactory::CanonicalizeURLForComponentSearches(
231 const GURL& url) { 257 const GURL& url) {
232 return kBeginningOfURL + CanonicalizeHostname(url.host()) + kEndOfDomain + 258 return kBeginningOfURL + CanonicalizeHostname(url.host()) + kEndOfDomain +
233 url.path() + kEndOfPath + (url.has_query() ? "?" + url.query() : "") + 259 url.path() + kEndOfPath + (url.has_query() ? "?" + url.query() : "") +
234 kEndOfURL; 260 kEndOfURL;
235 } 261 }
236 262
237 URLMatcherCondition URLMatcherConditionFactory::CreateHostPrefixCondition( 263 URLMatcherCondition URLMatcherConditionFactory::CreateHostPrefixCondition(
(...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after
329 const std::string& port = url.scheme(); 355 const std::string& port = url.scheme();
330 if (url_canon::DefaultPortForScheme(port.c_str(), port.size()) == 356 if (url_canon::DefaultPortForScheme(port.c_str(), port.size()) ==
331 url.EffectiveIntPort()) { 357 url.EffectiveIntPort()) {
332 replacements.ClearPort(); 358 replacements.ClearPort();
333 } 359 }
334 } 360 }
335 return kBeginningOfURL + url.ReplaceComponents(replacements).spec() + 361 return kBeginningOfURL + url.ReplaceComponents(replacements).spec() +
336 kEndOfURL; 362 kEndOfURL;
337 } 363 }
338 364
365 std::string URLMatcherConditionFactory::CanonicalizeURLForRegexSearches(
366 const GURL& url) {
367 GURL::Replacements replacements;
368 replacements.ClearPassword();
369 replacements.ClearUsername();
370 replacements.ClearRef();
371 // Clear port if it is implicit from scheme.
372 if (url.has_port()) {
373 const std::string& port = url.scheme();
374 if (url_canon::DefaultPortForScheme(port.c_str(), port.size()) ==
375 url.EffectiveIntPort()) {
376 replacements.ClearPort();
377 }
378 }
379 return url.ReplaceComponents(replacements).spec();
380 }
381
339 URLMatcherCondition URLMatcherConditionFactory::CreateURLPrefixCondition( 382 URLMatcherCondition URLMatcherConditionFactory::CreateURLPrefixCondition(
340 const std::string& prefix) { 383 const std::string& prefix) {
341 return CreateCondition(URLMatcherCondition::URL_PREFIX, 384 return CreateCondition(URLMatcherCondition::URL_PREFIX,
342 kBeginningOfURL + prefix); 385 kBeginningOfURL + prefix);
343 } 386 }
344 387
345 URLMatcherCondition URLMatcherConditionFactory::CreateURLSuffixCondition( 388 URLMatcherCondition URLMatcherConditionFactory::CreateURLSuffixCondition(
346 const std::string& suffix) { 389 const std::string& suffix) {
347 return CreateCondition(URLMatcherCondition::URL_SUFFIX, suffix + kEndOfURL); 390 return CreateCondition(URLMatcherCondition::URL_SUFFIX, suffix + kEndOfURL);
348 } 391 }
349 392
350 URLMatcherCondition URLMatcherConditionFactory::CreateURLContainsCondition( 393 URLMatcherCondition URLMatcherConditionFactory::CreateURLContainsCondition(
351 const std::string& str) { 394 const std::string& str) {
352 return CreateCondition(URLMatcherCondition::URL_CONTAINS, str); 395 return CreateCondition(URLMatcherCondition::URL_CONTAINS, str);
353 } 396 }
354 397
355 URLMatcherCondition URLMatcherConditionFactory::CreateURLEqualsCondition( 398 URLMatcherCondition URLMatcherConditionFactory::CreateURLEqualsCondition(
356 const std::string& str) { 399 const std::string& str) {
357 return CreateCondition(URLMatcherCondition::URL_EQUALS, 400 return CreateCondition(URLMatcherCondition::URL_EQUALS,
358 kBeginningOfURL + str + kEndOfURL); 401 kBeginningOfURL + str + kEndOfURL);
359 } 402 }
360 403
404 URLMatcherCondition URLMatcherConditionFactory::CreateURLMatchesCondition(
405 const std::string& regex) {
406 return CreateCondition(URLMatcherCondition::URL_MATCHES, regex);
407 }
408
361 void URLMatcherConditionFactory::ForgetUnusedPatterns( 409 void URLMatcherConditionFactory::ForgetUnusedPatterns(
362 const std::set<SubstringPattern::ID>& used_patterns) { 410 const std::set<StringPattern::ID>& used_patterns) {
363 PatternSingletons::iterator i = pattern_singletons_.begin(); 411 PatternSingletons::iterator i = substring_pattern_singletons_.begin();
364 while (i != pattern_singletons_.end()) { 412 while (i != substring_pattern_singletons_.end()) {
365 if (used_patterns.find((*i)->id()) != used_patterns.end()) { 413 if (used_patterns.find((*i)->id()) != used_patterns.end()) {
366 ++i; 414 ++i;
367 } else { 415 } else {
368 delete *i; 416 delete *i;
369 pattern_singletons_.erase(i++); 417 substring_pattern_singletons_.erase(i++);
418 }
419 }
420 i = regex_pattern_singletons_.begin();
421 while (i != regex_pattern_singletons_.end()) {
422 if (used_patterns.find((*i)->id()) != used_patterns.end()) {
423 ++i;
424 } else {
425 delete *i;
426 regex_pattern_singletons_.erase(i++);
370 } 427 }
371 } 428 }
372 } 429 }
373 430
374 bool URLMatcherConditionFactory::IsEmpty() const { 431 bool URLMatcherConditionFactory::IsEmpty() const {
375 return pattern_singletons_.empty(); 432 return substring_pattern_singletons_.empty() &&
433 regex_pattern_singletons_.empty();
376 } 434 }
377 435
378 URLMatcherCondition URLMatcherConditionFactory::CreateCondition( 436 URLMatcherCondition URLMatcherConditionFactory::CreateCondition(
379 URLMatcherCondition::Criterion criterion, 437 URLMatcherCondition::Criterion criterion,
380 const std::string& pattern) { 438 const std::string& pattern) {
381 SubstringPattern search_pattern(pattern, 0); 439 StringPattern search_pattern(pattern, 0);
440 PatternSingletons* pattern_singletons =
441 IsRegexCriterion(criterion) ? &regex_pattern_singletons_
442 : &substring_pattern_singletons_;
443
382 PatternSingletons::const_iterator iter = 444 PatternSingletons::const_iterator iter =
383 pattern_singletons_.find(&search_pattern); 445 pattern_singletons->find(&search_pattern);
384 if (iter != pattern_singletons_.end()) { 446
447 if (iter != pattern_singletons->end()) {
385 return URLMatcherCondition(criterion, *iter); 448 return URLMatcherCondition(criterion, *iter);
386 } else { 449 } else {
387 SubstringPattern* new_pattern = 450 StringPattern* new_pattern =
388 new SubstringPattern(pattern, id_counter_++); 451 new StringPattern(pattern, id_counter_++);
389 pattern_singletons_.insert(new_pattern); 452 pattern_singletons->insert(new_pattern);
390 return URLMatcherCondition(criterion, new_pattern); 453 return URLMatcherCondition(criterion, new_pattern);
391 } 454 }
392 } 455 }
393 456
394 std::string URLMatcherConditionFactory::CanonicalizeHostname( 457 std::string URLMatcherConditionFactory::CanonicalizeHostname(
395 const std::string& hostname) const { 458 const std::string& hostname) const {
396 if (!hostname.empty() && hostname[0] == '.') 459 if (!hostname.empty() && hostname[0] == '.')
397 return hostname; 460 return hostname;
398 else 461 else
399 return "." + hostname; 462 return "." + hostname;
400 } 463 }
401 464
402 bool URLMatcherConditionFactory::SubstringPatternPointerCompare::operator()( 465 bool URLMatcherConditionFactory::StringPatternPointerCompare::operator()(
403 SubstringPattern* lhs, 466 StringPattern* lhs,
404 SubstringPattern* rhs) const { 467 StringPattern* rhs) const {
405 if (lhs == NULL && rhs != NULL) return true; 468 if (lhs == NULL && rhs != NULL) return true;
406 if (lhs != NULL && rhs != NULL) 469 if (lhs != NULL && rhs != NULL)
407 return lhs->pattern() < rhs->pattern(); 470 return lhs->pattern() < rhs->pattern();
408 // Either both are NULL or only rhs is NULL. 471 // Either both are NULL or only rhs is NULL.
409 return false; 472 return false;
410 } 473 }
411 474
412 // 475 //
413 // URLMatcherSchemeFilter 476 // URLMatcherSchemeFilter
414 // 477 //
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after
476 ID id, 539 ID id,
477 const Conditions& conditions, 540 const Conditions& conditions,
478 scoped_ptr<URLMatcherSchemeFilter> scheme_filter, 541 scoped_ptr<URLMatcherSchemeFilter> scheme_filter,
479 scoped_ptr<URLMatcherPortFilter> port_filter) 542 scoped_ptr<URLMatcherPortFilter> port_filter)
480 : id_(id), 543 : id_(id),
481 conditions_(conditions), 544 conditions_(conditions),
482 scheme_filter_(scheme_filter.Pass()), 545 scheme_filter_(scheme_filter.Pass()),
483 port_filter_(port_filter.Pass()) {} 546 port_filter_(port_filter.Pass()) {}
484 547
485 bool URLMatcherConditionSet::IsMatch( 548 bool URLMatcherConditionSet::IsMatch(
486 const std::set<SubstringPattern::ID>& matching_substring_patterns, 549 const std::set<StringPattern::ID>& matching_patterns,
487 const GURL& url) const { 550 const GURL& url) const {
488 for (Conditions::const_iterator i = conditions_.begin(); 551 for (Conditions::const_iterator i = conditions_.begin();
489 i != conditions_.end(); ++i) { 552 i != conditions_.end(); ++i) {
490 if (!i->IsMatch(matching_substring_patterns, url)) 553 if (!i->IsMatch(matching_patterns, url))
491 return false; 554 return false;
492 } 555 }
493 if (scheme_filter_.get() && !scheme_filter_->IsMatch(url)) 556 if (scheme_filter_.get() && !scheme_filter_->IsMatch(url))
494 return false; 557 return false;
495 if (port_filter_.get() && !port_filter_->IsMatch(url)) 558 if (port_filter_.get() && !port_filter_->IsMatch(url))
496 return false; 559 return false;
497 return true; 560 return true;
498 } 561 }
499 562
500
501 // 563 //
502 // URLMatcher 564 // URLMatcher
503 // 565 //
504 566
505 URLMatcher::URLMatcher() {} 567 URLMatcher::URLMatcher() {}
506 568
507 URLMatcher::~URLMatcher() {} 569 URLMatcher::~URLMatcher() {}
508 570
509 void URLMatcher::AddConditionSets( 571 void URLMatcher::AddConditionSets(
510 const URLMatcherConditionSet::Vector& condition_sets) { 572 const URLMatcherConditionSet::Vector& condition_sets) {
(...skipping 15 matching lines...) Expand all
526 url_matcher_condition_sets_.erase(*i); 588 url_matcher_condition_sets_.erase(*i);
527 } 589 }
528 UpdateInternalDatastructures(); 590 UpdateInternalDatastructures();
529 } 591 }
530 592
531 void URLMatcher::ClearUnusedConditionSets() { 593 void URLMatcher::ClearUnusedConditionSets() {
532 UpdateConditionFactory(); 594 UpdateConditionFactory();
533 } 595 }
534 596
535 std::set<URLMatcherConditionSet::ID> URLMatcher::MatchURL(const GURL& url) { 597 std::set<URLMatcherConditionSet::ID> URLMatcher::MatchURL(const GURL& url) {
536 // Find all IDs of SubstringPatterns that match |url|. 598 // Find all IDs of StringPatterns that match |url|.
537 // See URLMatcherConditionFactory for the canonicalization of URLs and the 599 // See URLMatcherConditionFactory for the canonicalization of URLs and the
538 // distinction between full url searches and url component searches. 600 // distinction between full url searches and url component searches.
539 std::set<SubstringPattern::ID> matches; 601 std::set<StringPattern::ID> matches;
540 full_url_matcher_.Match( 602 full_url_matcher_.Match(
541 condition_factory_.CanonicalizeURLForFullSearches(url), &matches); 603 condition_factory_.CanonicalizeURLForFullSearches(url), &matches);
542 url_component_matcher_.Match( 604 url_component_matcher_.Match(
543 condition_factory_.CanonicalizeURLForComponentSearches(url), &matches); 605 condition_factory_.CanonicalizeURLForComponentSearches(url), &matches);
606 regex_set_matcher_.Match(
607 condition_factory_.CanonicalizeURLForRegexSearches(url), &matches);
544 608
545 // Calculate all URLMatcherConditionSets for which all URLMatcherConditions 609 // Calculate all URLMatcherConditionSets for which all URLMatcherConditions
546 // were fulfilled. 610 // were fulfilled.
547 std::set<URLMatcherConditionSet::ID> result; 611 std::set<URLMatcherConditionSet::ID> result;
548 for (std::set<SubstringPattern::ID>::const_iterator i = matches.begin(); 612 for (std::set<StringPattern::ID>::const_iterator i = matches.begin();
549 i != matches.end(); ++i) { 613 i != matches.end(); ++i) {
550 // For each URLMatcherConditionSet there is exactly one condition 614 // For each URLMatcherConditionSet there is exactly one condition
551 // registered in substring_match_triggers_. This means that the following 615 // registered in substring_match_triggers_. This means that the following
552 // logic tests each URLMatcherConditionSet exactly once if it can be 616 // logic tests each URLMatcherConditionSet exactly once if it can be
553 // completely fulfilled. 617 // completely fulfilled.
554 std::set<URLMatcherConditionSet::ID>& condition_sets = 618 std::set<URLMatcherConditionSet::ID>& condition_sets =
555 substring_match_triggers_[*i]; 619 substring_match_triggers_[*i];
556 for (std::set<URLMatcherConditionSet::ID>::const_iterator j = 620 for (std::set<URLMatcherConditionSet::ID>::const_iterator j =
557 condition_sets.begin(); j != condition_sets.end(); ++j) { 621 condition_sets.begin(); j != condition_sets.end(); ++j) {
558 if (url_matcher_condition_sets_[*j]->IsMatch(matches, url)) 622 if (url_matcher_condition_sets_[*j]->IsMatch(matches, url))
(...skipping 14 matching lines...) Expand all
573 registered_url_component_patterns_.empty(); 637 registered_url_component_patterns_.empty();
574 } 638 }
575 639
576 void URLMatcher::UpdateSubstringSetMatcher(bool full_url_conditions) { 640 void URLMatcher::UpdateSubstringSetMatcher(bool full_url_conditions) {
577 // The purpose of |full_url_conditions| is just that we need to execute 641 // The purpose of |full_url_conditions| is just that we need to execute
578 // the same logic once for Full URL searches and once for URL Component 642 // the same logic once for Full URL searches and once for URL Component
579 // searches (see URLMatcherConditionFactory). 643 // searches (see URLMatcherConditionFactory).
580 644
581 // Determine which patterns need to be registered when this function 645 // Determine which patterns need to be registered when this function
582 // terminates. 646 // terminates.
583 std::set<const SubstringPattern*> new_patterns; 647 std::set<const StringPattern*> new_patterns;
584 for (URLMatcherConditionSets::const_iterator condition_set_iter = 648 for (URLMatcherConditionSets::const_iterator condition_set_iter =
585 url_matcher_condition_sets_.begin(); 649 url_matcher_condition_sets_.begin();
586 condition_set_iter != url_matcher_condition_sets_.end(); 650 condition_set_iter != url_matcher_condition_sets_.end();
587 ++condition_set_iter) { 651 ++condition_set_iter) {
588 const URLMatcherConditionSet::Conditions& conditions = 652 const URLMatcherConditionSet::Conditions& conditions =
589 condition_set_iter->second->conditions(); 653 condition_set_iter->second->conditions();
590 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter = 654 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter =
591 conditions.begin(); condition_iter != conditions.end(); 655 conditions.begin(); condition_iter != conditions.end();
592 ++condition_iter) { 656 ++condition_iter) {
593 // If we are called to process Full URL searches, ignore all others, 657 // If we are called to process Full URL searches, ignore others, and
594 // and vice versa. 658 // vice versa. (Regex conditions are updated in UpdateRegexSetMatcher.)
595 if (full_url_conditions == condition_iter->IsFullURLCondition()) 659 if (!condition_iter->IsRegexCondition() &&
596 new_patterns.insert(condition_iter->substring_pattern()); 660 full_url_conditions == condition_iter->IsFullURLCondition())
661 new_patterns.insert(condition_iter->string_pattern());
597 } 662 }
598 } 663 }
599 664
600 // This is the set of patterns that were registered before this function 665 // This is the set of patterns that were registered before this function
601 // is called. 666 // is called.
602 std::set<const SubstringPattern*>& registered_patterns = 667 std::set<const StringPattern*>& registered_patterns =
603 full_url_conditions ? registered_full_url_patterns_ 668 full_url_conditions ? registered_full_url_patterns_
604 : registered_url_component_patterns_; 669 : registered_url_component_patterns_;
605 670
606 // Add all patterns that are in new_patterns but not in registered_patterns. 671 // Add all patterns that are in new_patterns but not in registered_patterns.
607 std::vector<const SubstringPattern*> patterns_to_register; 672 std::vector<const StringPattern*> patterns_to_register;
608 std::set_difference( 673 std::set_difference(
609 new_patterns.begin(), new_patterns.end(), 674 new_patterns.begin(), new_patterns.end(),
610 registered_patterns.begin(), registered_patterns.end(), 675 registered_patterns.begin(), registered_patterns.end(),
611 std::back_inserter(patterns_to_register)); 676 std::back_inserter(patterns_to_register));
612 677
613 // Remove all patterns that are in registered_patterns but not in 678 // Remove all patterns that are in registered_patterns but not in
614 // new_patterns. 679 // new_patterns.
615 std::vector<const SubstringPattern*> patterns_to_unregister; 680 std::vector<const StringPattern*> patterns_to_unregister;
616 std::set_difference( 681 std::set_difference(
617 registered_patterns.begin(), registered_patterns.end(), 682 registered_patterns.begin(), registered_patterns.end(),
618 new_patterns.begin(), new_patterns.end(), 683 new_patterns.begin(), new_patterns.end(),
619 std::back_inserter(patterns_to_unregister)); 684 std::back_inserter(patterns_to_unregister));
620 685
621 // Update the SubstringSetMatcher. 686 // Update the SubstringSetMatcher.
622 SubstringSetMatcher& url_matcher = 687 SubstringSetMatcher& url_matcher =
623 full_url_conditions ? full_url_matcher_ : url_component_matcher_; 688 full_url_conditions ? full_url_matcher_ : url_component_matcher_;
624 url_matcher.RegisterAndUnregisterPatterns(patterns_to_register, 689 url_matcher.RegisterAndUnregisterPatterns(patterns_to_register,
625 patterns_to_unregister); 690 patterns_to_unregister);
626 691
627 // Update the set of registered_patterns for the next time this function 692 // Update the set of registered_patterns for the next time this function
628 // is being called. 693 // is being called.
629 registered_patterns.swap(new_patterns); 694 registered_patterns.swap(new_patterns);
630 } 695 }
631 696
697 void URLMatcher::UpdateRegexSetMatcher() {
698 std::vector<const StringPattern*> new_patterns;
699
700 for (URLMatcherConditionSets::const_iterator condition_set_iter =
701 url_matcher_condition_sets_.begin();
702 condition_set_iter != url_matcher_condition_sets_.end();
703 ++condition_set_iter) {
704 const URLMatcherConditionSet::Conditions& conditions =
705 condition_set_iter->second->conditions();
706 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter =
707 conditions.begin(); condition_iter != conditions.end();
708 ++condition_iter) {
709 if (condition_iter->IsRegexCondition())
710 new_patterns.push_back(condition_iter->string_pattern());
711 }
712 }
713
714 // Start over from scratch. We can't really do better than this, since the
715 // FilteredRE2 backend doesn't support incremental updates.
716 regex_set_matcher_.ClearPatterns();
717 regex_set_matcher_.AddPatterns(new_patterns);
718 }
719
632 void URLMatcher::UpdateTriggers() { 720 void URLMatcher::UpdateTriggers() {
633 // Count substring pattern frequencies. 721 // Count substring pattern frequencies.
634 std::map<SubstringPattern::ID, size_t> substring_pattern_frequencies; 722 std::map<StringPattern::ID, size_t> substring_pattern_frequencies;
635 for (URLMatcherConditionSets::const_iterator condition_set_iter = 723 for (URLMatcherConditionSets::const_iterator condition_set_iter =
636 url_matcher_condition_sets_.begin(); 724 url_matcher_condition_sets_.begin();
637 condition_set_iter != url_matcher_condition_sets_.end(); 725 condition_set_iter != url_matcher_condition_sets_.end();
638 ++condition_set_iter) { 726 ++condition_set_iter) {
639 const URLMatcherConditionSet::Conditions& conditions = 727 const URLMatcherConditionSet::Conditions& conditions =
640 condition_set_iter->second->conditions(); 728 condition_set_iter->second->conditions();
641 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter = 729 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter =
642 conditions.begin(); condition_iter != conditions.end(); 730 conditions.begin(); condition_iter != conditions.end();
643 ++condition_iter) { 731 ++condition_iter) {
644 const SubstringPattern* pattern = condition_iter->substring_pattern(); 732 const StringPattern* pattern = condition_iter->string_pattern();
645 substring_pattern_frequencies[pattern->id()]++; 733 substring_pattern_frequencies[pattern->id()]++;
646 } 734 }
647 } 735 }
648 736
649 // Update trigger conditions: Determine for each URLMatcherConditionSet which 737 // Update trigger conditions: Determine for each URLMatcherConditionSet which
650 // URLMatcherCondition contains a SubstringPattern that occurs least 738 // URLMatcherCondition contains a StringPattern that occurs least
651 // frequently in this URLMatcher. We assume that this condition is very 739 // frequently in this URLMatcher. We assume that this condition is very
652 // specific and occurs rarely in URLs. If a match occurs for this 740 // specific and occurs rarely in URLs. If a match occurs for this
653 // URLMatcherCondition, we want to test all other URLMatcherCondition in the 741 // URLMatcherCondition, we want to test all other URLMatcherCondition in the
654 // respective URLMatcherConditionSet as well to see whether the entire 742 // respective URLMatcherConditionSet as well to see whether the entire
655 // URLMatcherConditionSet is considered matching. 743 // URLMatcherConditionSet is considered matching.
656 substring_match_triggers_.clear(); 744 substring_match_triggers_.clear();
657 for (URLMatcherConditionSets::const_iterator condition_set_iter = 745 for (URLMatcherConditionSets::const_iterator condition_set_iter =
658 url_matcher_condition_sets_.begin(); 746 url_matcher_condition_sets_.begin();
659 condition_set_iter != url_matcher_condition_sets_.end(); 747 condition_set_iter != url_matcher_condition_sets_.end();
660 ++condition_set_iter) { 748 ++condition_set_iter) {
661 const URLMatcherConditionSet::Conditions& conditions = 749 const URLMatcherConditionSet::Conditions& conditions =
662 condition_set_iter->second->conditions(); 750 condition_set_iter->second->conditions();
663 if (conditions.empty()) 751 if (conditions.empty())
664 continue; 752 continue;
665 URLMatcherConditionSet::Conditions::const_iterator condition_iter = 753 URLMatcherConditionSet::Conditions::const_iterator condition_iter =
666 conditions.begin(); 754 conditions.begin();
667 SubstringPattern::ID trigger = condition_iter->substring_pattern()->id(); 755 StringPattern::ID trigger = condition_iter->string_pattern()->id();
668 // We skip the first element in the following loop. 756 // We skip the first element in the following loop.
669 ++condition_iter; 757 ++condition_iter;
670 for (; condition_iter != conditions.end(); ++condition_iter) { 758 for (; condition_iter != conditions.end(); ++condition_iter) {
671 SubstringPattern::ID current_id = 759 StringPattern::ID current_id =
672 condition_iter->substring_pattern()->id(); 760 condition_iter->string_pattern()->id();
673 if (substring_pattern_frequencies[trigger] > 761 if (substring_pattern_frequencies[trigger] >
674 substring_pattern_frequencies[current_id]) { 762 substring_pattern_frequencies[current_id]) {
675 trigger = current_id; 763 trigger = current_id;
676 } 764 }
677 } 765 }
678 substring_match_triggers_[trigger].insert(condition_set_iter->second->id()); 766 substring_match_triggers_[trigger].insert(condition_set_iter->second->id());
679 } 767 }
680 } 768 }
681 769
682 void URLMatcher::UpdateConditionFactory() { 770 void URLMatcher::UpdateConditionFactory() {
683 std::set<SubstringPattern::ID> used_patterns; 771 std::set<StringPattern::ID> used_patterns;
684 for (URLMatcherConditionSets::const_iterator condition_set_iter = 772 for (URLMatcherConditionSets::const_iterator condition_set_iter =
685 url_matcher_condition_sets_.begin(); 773 url_matcher_condition_sets_.begin();
686 condition_set_iter != url_matcher_condition_sets_.end(); 774 condition_set_iter != url_matcher_condition_sets_.end();
687 ++condition_set_iter) { 775 ++condition_set_iter) {
688 const URLMatcherConditionSet::Conditions& conditions = 776 const URLMatcherConditionSet::Conditions& conditions =
689 condition_set_iter->second->conditions(); 777 condition_set_iter->second->conditions();
690 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter = 778 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter =
691 conditions.begin(); condition_iter != conditions.end(); 779 conditions.begin(); condition_iter != conditions.end();
692 ++condition_iter) { 780 ++condition_iter) {
693 used_patterns.insert(condition_iter->substring_pattern()->id()); 781 used_patterns.insert(condition_iter->string_pattern()->id());
694 } 782 }
695 } 783 }
696 condition_factory_.ForgetUnusedPatterns(used_patterns); 784 condition_factory_.ForgetUnusedPatterns(used_patterns);
697 } 785 }
698 786
699 void URLMatcher::UpdateInternalDatastructures() { 787 void URLMatcher::UpdateInternalDatastructures() {
700 UpdateSubstringSetMatcher(false); 788 UpdateSubstringSetMatcher(false);
701 UpdateSubstringSetMatcher(true); 789 UpdateSubstringSetMatcher(true);
790 UpdateRegexSetMatcher();
702 UpdateTriggers(); 791 UpdateTriggers();
703 UpdateConditionFactory(); 792 UpdateConditionFactory();
704 } 793 }
705 794
706 } // namespace extensions 795 } // namespace extensions
OLDNEW
« no previous file with comments | « chrome/common/extensions/matcher/url_matcher.h ('k') | chrome/common/extensions/matcher/url_matcher_constants.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698