Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(226)

Side by Side Diff: chrome/common/extensions/matcher/url_matcher.cc

Issue 10823313: Let url filter test the scheme in urlContains/Equals/Prefix/Suffix criteria (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Addressed comments Created 8 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/common/extensions/matcher/url_matcher.h" 5 #include "chrome/common/extensions/matcher/url_matcher.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <iterator> 8 #include <iterator>
9 9
10 #include "base/logging.h" 10 #include "base/logging.h"
11 #include "content/public/common/url_constants.h"
11 #include "googleurl/src/gurl.h" 12 #include "googleurl/src/gurl.h"
13 #include "googleurl/src/url_canon.h"
12 14
13 namespace extensions { 15 namespace extensions {
14 16
15 // This set of classes implement a mapping of URL Component Patterns, such as 17 // This set of classes implement a mapping of URL Component Patterns, such as
16 // host_prefix, host_suffix, host_equals, ..., etc., to SubstringPatterns. 18 // host_prefix, host_suffix, host_equals, ..., etc., to SubstringPatterns.
17 // 19 //
18 // The idea of this mapping is to reduce the problem of comparing many 20 // The idea of this mapping is to reduce the problem of comparing many
19 // URL Component Patterns against one URL to the problem of searching many 21 // URL Component Patterns against one URL to the problem of searching many
20 // substrings in one string: 22 // substrings in one string:
21 // 23 //
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after
81 // -> host_equals("www.example.com") = BU .www.example.com ED 83 // -> host_equals("www.example.com") = BU .www.example.com ED
82 // 84 //
83 // Similarly for path query parameters ({path, query}_{prefix, suffix, equals}). 85 // Similarly for path query parameters ({path, query}_{prefix, suffix, equals}).
84 // 86 //
85 // With this, we can search the SubstringPatterns in the normalized URL. 87 // With this, we can search the SubstringPatterns in the normalized URL.
86 // 88 //
87 // 89 //
88 // Case 2: url_{prefix,suffix,equals,contains} searches. 90 // Case 2: url_{prefix,suffix,equals,contains} searches.
89 // ===================================================== 91 // =====================================================
90 // 92 //
91 // Step 1: as above 93 // Step 1: as above, except that
94 // - the scheme is not removed
95 // - the port is not removed if it is specified and does not match the default
96 // port for the given scheme.
92 // 97 //
93 // Step 2: 98 // Step 2:
94 // Translate URL to String and add the following position markers: 99 // Translate URL to String and add the following position markers:
95 // - BU = Beginning of URL 100 // - BU = Beginning of URL
96 // - EU = End of URL 101 // - EU = End of URL
97 // Furthermore, the hostname is canonicalized to start with a ".".
98 // 102 //
99 // -> www.example.com/index.html?search=foo becomes 103 // -> http://www.example.com:8080/index.html?search=foo#first_match becomes
100 // BU .www.example.com/index.html?search=foo EU 104 // BU http://www.example.com:8080/index.html?search=foo EU
105 // -> http://www.example.com:80/index.html?search=foo#first_match becomes
106 // BU http://www.example.com/index.html?search=foo EU
101 // 107 //
102 // url_prefix(prefix) = BU add_missing_dot_prefix(prefix) 108 // url_prefix(prefix) = BU prefix
103 // -> url_prefix("www.example") = BU .www.example 109 // -> url_prefix("http://www.example") = BU http://www.example
104 // 110 //
105 // url_contains(substring) = substring 111 // url_contains(substring) = substring
106 // -> url_contains("index") = index 112 // -> url_contains("index") = index
107 // 113 //
108 // 114 //
109 // Case 3: {host,path,query}_contains searches. 115 // Case 3: {host,path,query}_contains searches.
110 // ============================================ 116 // ============================================
111 // 117 //
112 // These kinds of searches are not supported directly but can be derived 118 // These kinds of searches are not supported directly but can be derived
113 // by a combination of a url_contains() query followed by an explicit test: 119 // by a combination of a url_contains() query followed by an explicit test:
114 // 120 //
115 // host_contains(str) = url_contains(str) followed by test whether str occurs 121 // host_contains(str) = url_contains(str) followed by test whether str occurs
116 // in host comonent of original URL. 122 // in host component of original URL.
117 // -> host_contains("example.co") = example.co 123 // -> host_contains("example.co") = example.co
118 // followed by gurl.host().find("example.co"); 124 // followed by gurl.host().find("example.co");
119 // 125 //
120 // [similarly for path_contains and query_contains]. 126 // [similarly for path_contains and query_contains].
121 127
122 128
123 // 129 //
124 // URLMatcherCondition 130 // URLMatcherCondition
125 // 131 //
126 132
(...skipping 26 matching lines...) Expand all
153 if (substring_pattern_ != NULL && rhs.substring_pattern_ != NULL) 159 if (substring_pattern_ != NULL && rhs.substring_pattern_ != NULL)
154 return *substring_pattern_ < *rhs.substring_pattern_; 160 return *substring_pattern_ < *rhs.substring_pattern_;
155 if (substring_pattern_ == NULL && rhs.substring_pattern_ != NULL) return true; 161 if (substring_pattern_ == NULL && rhs.substring_pattern_ != NULL) return true;
156 // Either substring_pattern_ != NULL && rhs.substring_pattern_ == NULL, 162 // Either substring_pattern_ != NULL && rhs.substring_pattern_ == NULL,
157 // or both are NULL. 163 // or both are NULL.
158 return false; 164 return false;
159 } 165 }
160 166
161 bool URLMatcherCondition::IsFullURLCondition() const { 167 bool URLMatcherCondition::IsFullURLCondition() const {
162 // For these criteria the SubstringMatcher needs to be executed on the 168 // For these criteria the SubstringMatcher needs to be executed on the
163 // GURL that is canonlizaliced with 169 // GURL that is canonicalized with
164 // URLMatcherConditionFactory::CanonicalizeURLForFullSearches. 170 // URLMatcherConditionFactory::CanonicalizeURLForFullSearches.
165 switch (criterion_) { 171 switch (criterion_) {
166 case HOST_CONTAINS: 172 case HOST_CONTAINS:
167 case PATH_CONTAINS: 173 case PATH_CONTAINS:
168 case QUERY_CONTAINS: 174 case QUERY_CONTAINS:
169 case URL_PREFIX: 175 case URL_PREFIX:
170 case URL_SUFFIX: 176 case URL_SUFFIX:
171 case URL_CONTAINS: 177 case URL_CONTAINS:
172 case URL_EQUALS: 178 case URL_EQUALS:
173 return true; 179 return true;
(...skipping 133 matching lines...) Expand 10 before | Expand all | Expand 10 after
307 URLMatcherConditionFactory::CreateHostEqualsPathPrefixCondition( 313 URLMatcherConditionFactory::CreateHostEqualsPathPrefixCondition(
308 const std::string& host, 314 const std::string& host,
309 const std::string& path_prefix) { 315 const std::string& path_prefix) {
310 return CreateCondition(URLMatcherCondition::HOST_EQUALS_PATH_PREFIX, 316 return CreateCondition(URLMatcherCondition::HOST_EQUALS_PATH_PREFIX,
311 kBeginningOfURL + CanonicalizeHostname(host) + kEndOfDomain + 317 kBeginningOfURL + CanonicalizeHostname(host) + kEndOfDomain +
312 path_prefix); 318 path_prefix);
313 } 319 }
314 320
315 std::string URLMatcherConditionFactory::CanonicalizeURLForFullSearches( 321 std::string URLMatcherConditionFactory::CanonicalizeURLForFullSearches(
316 const GURL& url) { 322 const GURL& url) {
317 return kBeginningOfURL + CanonicalizeHostname(url.host()) + url.path() + 323 GURL::Replacements replacements;
318 (url.has_query() ? "?" + url.query() : "") + kEndOfURL; 324 replacements.ClearPassword();
325 replacements.ClearUsername();
326 replacements.ClearRef();
327 // Clear port if it is implicit from scheme.
328 if (url.has_port()) {
329 const std::string& port = url.scheme();
330 if (url_canon::DefaultPortForScheme(port.c_str(), port.size()) ==
331 url.EffectiveIntPort()) {
332 replacements.ClearPort();
333 }
334 }
335 return kBeginningOfURL + url.ReplaceComponents(replacements).spec() +
336 kEndOfURL;
319 } 337 }
320 338
321 URLMatcherCondition URLMatcherConditionFactory::CreateURLPrefixCondition( 339 URLMatcherCondition URLMatcherConditionFactory::CreateURLPrefixCondition(
322 const std::string& prefix) { 340 const std::string& prefix) {
323 return CreateCondition(URLMatcherCondition::URL_PREFIX, 341 return CreateCondition(URLMatcherCondition::URL_PREFIX,
324 kBeginningOfURL + CanonicalizeHostname(prefix)); 342 kBeginningOfURL + prefix);
325 } 343 }
326 344
327 URLMatcherCondition URLMatcherConditionFactory::CreateURLSuffixCondition( 345 URLMatcherCondition URLMatcherConditionFactory::CreateURLSuffixCondition(
328 const std::string& suffix) { 346 const std::string& suffix) {
329 return CreateCondition(URLMatcherCondition::URL_SUFFIX, suffix + kEndOfURL); 347 return CreateCondition(URLMatcherCondition::URL_SUFFIX, suffix + kEndOfURL);
330 } 348 }
331 349
332 URLMatcherCondition URLMatcherConditionFactory::CreateURLContainsCondition( 350 URLMatcherCondition URLMatcherConditionFactory::CreateURLContainsCondition(
333 const std::string& str) { 351 const std::string& str) {
334 return CreateCondition(URLMatcherCondition::URL_CONTAINS, str); 352 return CreateCondition(URLMatcherCondition::URL_CONTAINS, str);
335 } 353 }
336 354
337 URLMatcherCondition URLMatcherConditionFactory::CreateURLEqualsCondition( 355 URLMatcherCondition URLMatcherConditionFactory::CreateURLEqualsCondition(
338 const std::string& str) { 356 const std::string& str) {
339 return CreateCondition(URLMatcherCondition::URL_EQUALS, 357 return CreateCondition(URLMatcherCondition::URL_EQUALS,
340 kBeginningOfURL + CanonicalizeHostname(str) + kEndOfURL); 358 kBeginningOfURL + str + kEndOfURL);
341 } 359 }
342 360
343 void URLMatcherConditionFactory::ForgetUnusedPatterns( 361 void URLMatcherConditionFactory::ForgetUnusedPatterns(
344 const std::set<SubstringPattern::ID>& used_patterns) { 362 const std::set<SubstringPattern::ID>& used_patterns) {
345 PatternSingletons::iterator i = pattern_singletons_.begin(); 363 PatternSingletons::iterator i = pattern_singletons_.begin();
346 while (i != pattern_singletons_.end()) { 364 while (i != pattern_singletons_.end()) {
347 if (used_patterns.find((*i)->id()) != used_patterns.end()) { 365 if (used_patterns.find((*i)->id()) != used_patterns.end()) {
348 ++i; 366 ++i;
349 } else { 367 } else {
350 delete *i; 368 delete *i;
(...skipping 328 matching lines...) Expand 10 before | Expand all | Expand 10 after
679 } 697 }
680 698
681 void URLMatcher::UpdateInternalDatastructures() { 699 void URLMatcher::UpdateInternalDatastructures() {
682 UpdateSubstringSetMatcher(false); 700 UpdateSubstringSetMatcher(false);
683 UpdateSubstringSetMatcher(true); 701 UpdateSubstringSetMatcher(true);
684 UpdateTriggers(); 702 UpdateTriggers();
685 UpdateConditionFactory(); 703 UpdateConditionFactory();
686 } 704 }
687 705
688 } // namespace extensions 706 } // namespace extensions
OLDNEW
« no previous file with comments | « chrome/common/extensions/docs/samples.json ('k') | chrome/common/extensions/matcher/url_matcher_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698