Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(476)

Side by Side Diff: chrome/common/extensions/matcher/url_matcher.cc

Issue 10823313: Let url filter test the scheme in urlContains/Equals/Prefix/Suffix criteria (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Merged with ToT Created 8 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/common/extensions/matcher/url_matcher.h" 5 #include "chrome/common/extensions/matcher/url_matcher.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <iterator> 8 #include <iterator>
9 9
10 #include "base/logging.h" 10 #include "base/logging.h"
11 #include "content/public/common/url_constants.h"
11 #include "googleurl/src/gurl.h" 12 #include "googleurl/src/gurl.h"
12 13
13 namespace extensions { 14 namespace extensions {
14 15
15 // This set of classes implement a mapping of URL Component Patterns, such as 16 // This set of classes implement a mapping of URL Component Patterns, such as
16 // host_prefix, host_suffix, host_equals, ..., etc., to SubstringPatterns. 17 // host_prefix, host_suffix, host_equals, ..., etc., to SubstringPatterns.
17 // 18 //
18 // The idea of this mapping is to reduce the problem of comparing many 19 // The idea of this mapping is to reduce the problem of comparing many
19 // URL Component Patterns against one URL to the problem of searching many 20 // URL Component Patterns against one URL to the problem of searching many
20 // substrings in one string: 21 // substrings in one string:
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after
81 // -> host_equals("www.example.com") = BU .www.example.com ED 82 // -> host_equals("www.example.com") = BU .www.example.com ED
82 // 83 //
83 // Similarly for path query parameters ({path, query}_{prefix, suffix, equals}). 84 // Similarly for path query parameters ({path, query}_{prefix, suffix, equals}).
84 // 85 //
85 // With this, we can search the SubstringPatterns in the normalized URL. 86 // With this, we can search the SubstringPatterns in the normalized URL.
86 // 87 //
87 // 88 //
88 // Case 2: url_{prefix,suffix,equals,contains} searches. 89 // Case 2: url_{prefix,suffix,equals,contains} searches.
89 // ===================================================== 90 // =====================================================
90 // 91 //
91 // Step 1: as above 92 // Step 1: as above, except that the scheme is not removed.
92 // 93 //
93 // Step 2: 94 // Step 2:
94 // Translate URL to String and add the following position markers: 95 // Translate URL to String and add the following position markers:
95 // - BU = Beginning of URL 96 // - BU = Beginning of URL
96 // - EU = End of URL 97 // - EU = End of URL
97 // Furthermore, the hostname is canonicalized to start with a ".".
98 // 98 //
99 // -> www.example.com/index.html?search=foo becomes 99 // -> http://www.example.com:8080/index.html?search=foo#first_match becomes
100 // BU .www.example.com/index.html?search=foo EU 100 // BU http://www.example.com/index.html?search=foo EU
101 // 101 //
102 // url_prefix(prefix) = BU add_missing_dot_prefix(prefix) 102 // url_prefix(prefix) = BU prefix
103 // -> url_prefix("www.example") = BU .www.example 103 // -> url_prefix("http://www.example") = BU http://www.example
104 // 104 //
105 // url_contains(substring) = substring 105 // url_contains(substring) = substring
106 // -> url_contains("index") = index 106 // -> url_contains("index") = index
107 // 107 //
108 // 108 //
109 // Case 3: {host,path,query}_contains searches. 109 // Case 3: {host,path,query}_contains searches.
110 // ============================================ 110 // ============================================
111 // 111 //
112 // These kinds of searches are not supported directly but can be derived 112 // These kinds of searches are not supported directly but can be derived
113 // by a combination of a url_contains() query followed by an explicit test: 113 // by a combination of a url_contains() query followed by an explicit test:
114 // 114 //
115 // host_contains(str) = url_contains(str) followed by test whether str occurs 115 // host_contains(str) = url_contains(str) followed by test whether str occurs
116 // in host comonent of original URL. 116 // in host component of original URL.
117 // -> host_contains("example.co") = example.co 117 // -> host_contains("example.co") = example.co
118 // followed by gurl.host().find("example.co"); 118 // followed by gurl.host().find("example.co");
119 // 119 //
120 // [similarly for path_contains and query_contains]. 120 // [similarly for path_contains and query_contains].
121 121
122 122
123 // 123 //
124 // URLMatcherCondition 124 // URLMatcherCondition
125 // 125 //
126 126
(...skipping 26 matching lines...) Expand all
153 if (substring_pattern_ != NULL && rhs.substring_pattern_ != NULL) 153 if (substring_pattern_ != NULL && rhs.substring_pattern_ != NULL)
154 return *substring_pattern_ < *rhs.substring_pattern_; 154 return *substring_pattern_ < *rhs.substring_pattern_;
155 if (substring_pattern_ == NULL && rhs.substring_pattern_ != NULL) return true; 155 if (substring_pattern_ == NULL && rhs.substring_pattern_ != NULL) return true;
156 // Either substring_pattern_ != NULL && rhs.substring_pattern_ == NULL, 156 // Either substring_pattern_ != NULL && rhs.substring_pattern_ == NULL,
157 // or both are NULL. 157 // or both are NULL.
158 return false; 158 return false;
159 } 159 }
160 160
161 bool URLMatcherCondition::IsFullURLCondition() const { 161 bool URLMatcherCondition::IsFullURLCondition() const {
162 // For these criteria the SubstringMatcher needs to be executed on the 162 // For these criteria the SubstringMatcher needs to be executed on the
163 // GURL that is canonlizaliced with 163 // GURL that is canonicalized with
164 // URLMatcherConditionFactory::CanonicalizeURLForFullSearches. 164 // URLMatcherConditionFactory::CanonicalizeURLForFullSearches.
165 switch (criterion_) { 165 switch (criterion_) {
166 case HOST_CONTAINS: 166 case HOST_CONTAINS:
167 case PATH_CONTAINS: 167 case PATH_CONTAINS:
168 case QUERY_CONTAINS: 168 case QUERY_CONTAINS:
169 case URL_PREFIX: 169 case URL_PREFIX:
170 case URL_SUFFIX: 170 case URL_SUFFIX:
171 case URL_CONTAINS: 171 case URL_CONTAINS:
172 case URL_EQUALS: 172 case URL_EQUALS:
173 return true; 173 return true;
(...skipping 133 matching lines...) Expand 10 before | Expand all | Expand 10 after
307 URLMatcherConditionFactory::CreateHostEqualsPathPrefixCondition( 307 URLMatcherConditionFactory::CreateHostEqualsPathPrefixCondition(
308 const std::string& host, 308 const std::string& host,
309 const std::string& path_prefix) { 309 const std::string& path_prefix) {
310 return CreateCondition(URLMatcherCondition::HOST_EQUALS_PATH_PREFIX, 310 return CreateCondition(URLMatcherCondition::HOST_EQUALS_PATH_PREFIX,
311 kBeginningOfURL + CanonicalizeHostname(host) + kEndOfDomain + 311 kBeginningOfURL + CanonicalizeHostname(host) + kEndOfDomain +
312 path_prefix); 312 path_prefix);
313 } 313 }
314 314
315 std::string URLMatcherConditionFactory::CanonicalizeURLForFullSearches( 315 std::string URLMatcherConditionFactory::CanonicalizeURLForFullSearches(
316 const GURL& url) { 316 const GURL& url) {
317 return kBeginningOfURL + CanonicalizeHostname(url.host()) + url.path() + 317 return kBeginningOfURL + url.scheme() + content::kStandardSchemeSeparator +
Yoyo Zhou 2012/08/15 17:53:12 By the way, I just learned about GURL::Replacement
battre 2012/08/16 13:16:25 I was aware of this. I cannot use the GURL::Replac
318 (url.has_query() ? "?" + url.query() : "") + kEndOfURL; 318 url.host() + url.path() + (url.has_query() ? "?" + url.query() : "") +
319 kEndOfURL;
319 } 320 }
320 321
321 URLMatcherCondition URLMatcherConditionFactory::CreateURLPrefixCondition( 322 URLMatcherCondition URLMatcherConditionFactory::CreateURLPrefixCondition(
322 const std::string& prefix) { 323 const std::string& prefix) {
323 return CreateCondition(URLMatcherCondition::URL_PREFIX, 324 return CreateCondition(URLMatcherCondition::URL_PREFIX,
324 kBeginningOfURL + CanonicalizeHostname(prefix)); 325 kBeginningOfURL + prefix);
325 } 326 }
326 327
327 URLMatcherCondition URLMatcherConditionFactory::CreateURLSuffixCondition( 328 URLMatcherCondition URLMatcherConditionFactory::CreateURLSuffixCondition(
328 const std::string& suffix) { 329 const std::string& suffix) {
329 return CreateCondition(URLMatcherCondition::URL_SUFFIX, suffix + kEndOfURL); 330 return CreateCondition(URLMatcherCondition::URL_SUFFIX, suffix + kEndOfURL);
330 } 331 }
331 332
332 URLMatcherCondition URLMatcherConditionFactory::CreateURLContainsCondition( 333 URLMatcherCondition URLMatcherConditionFactory::CreateURLContainsCondition(
333 const std::string& str) { 334 const std::string& str) {
334 return CreateCondition(URLMatcherCondition::URL_CONTAINS, str); 335 return CreateCondition(URLMatcherCondition::URL_CONTAINS, str);
335 } 336 }
336 337
337 URLMatcherCondition URLMatcherConditionFactory::CreateURLEqualsCondition( 338 URLMatcherCondition URLMatcherConditionFactory::CreateURLEqualsCondition(
338 const std::string& str) { 339 const std::string& str) {
339 return CreateCondition(URLMatcherCondition::URL_EQUALS, 340 return CreateCondition(URLMatcherCondition::URL_EQUALS,
340 kBeginningOfURL + CanonicalizeHostname(str) + kEndOfURL); 341 kBeginningOfURL + str + kEndOfURL);
341 } 342 }
342 343
343 void URLMatcherConditionFactory::ForgetUnusedPatterns( 344 void URLMatcherConditionFactory::ForgetUnusedPatterns(
344 const std::set<SubstringPattern::ID>& used_patterns) { 345 const std::set<SubstringPattern::ID>& used_patterns) {
345 PatternSingletons::iterator i = pattern_singletons_.begin(); 346 PatternSingletons::iterator i = pattern_singletons_.begin();
346 while (i != pattern_singletons_.end()) { 347 while (i != pattern_singletons_.end()) {
347 if (used_patterns.find((*i)->id()) != used_patterns.end()) { 348 if (used_patterns.find((*i)->id()) != used_patterns.end()) {
348 ++i; 349 ++i;
349 } else { 350 } else {
350 delete *i; 351 delete *i;
(...skipping 328 matching lines...) Expand 10 before | Expand all | Expand 10 after
679 } 680 }
680 681
681 void URLMatcher::UpdateInternalDatastructures() { 682 void URLMatcher::UpdateInternalDatastructures() {
682 UpdateSubstringSetMatcher(false); 683 UpdateSubstringSetMatcher(false);
683 UpdateSubstringSetMatcher(true); 684 UpdateSubstringSetMatcher(true);
684 UpdateTriggers(); 685 UpdateTriggers();
685 UpdateConditionFactory(); 686 UpdateConditionFactory();
686 } 687 }
687 688
688 } // namespace extensions 689 } // namespace extensions
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698