OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/common/extensions/matcher/url_matcher.h" | 5 #include "chrome/common/extensions/matcher/url_matcher.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <iterator> | 8 #include <iterator> |
9 | 9 |
10 #include "base/logging.h" | 10 #include "base/logging.h" |
| 11 #include "content/public/common/url_constants.h" |
11 #include "googleurl/src/gurl.h" | 12 #include "googleurl/src/gurl.h" |
| 13 #include "googleurl/src/url_canon.h" |
12 | 14 |
13 namespace extensions { | 15 namespace extensions { |
14 | 16 |
15 // This set of classes implement a mapping of URL Component Patterns, such as | 17 // This set of classes implement a mapping of URL Component Patterns, such as |
16 // host_prefix, host_suffix, host_equals, ..., etc., to SubstringPatterns. | 18 // host_prefix, host_suffix, host_equals, ..., etc., to SubstringPatterns. |
17 // | 19 // |
18 // The idea of this mapping is to reduce the problem of comparing many | 20 // The idea of this mapping is to reduce the problem of comparing many |
19 // URL Component Patterns against one URL to the problem of searching many | 21 // URL Component Patterns against one URL to the problem of searching many |
20 // substrings in one string: | 22 // substrings in one string: |
21 // | 23 // |
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
81 // -> host_equals("www.example.com") = BU .www.example.com ED | 83 // -> host_equals("www.example.com") = BU .www.example.com ED |
82 // | 84 // |
83 // Similarly for path query parameters ({path, query}_{prefix, suffix, equals}). | 85 // Similarly for path query parameters ({path, query}_{prefix, suffix, equals}). |
84 // | 86 // |
85 // With this, we can search the SubstringPatterns in the normalized URL. | 87 // With this, we can search the SubstringPatterns in the normalized URL. |
86 // | 88 // |
87 // | 89 // |
88 // Case 2: url_{prefix,suffix,equals,contains} searches. | 90 // Case 2: url_{prefix,suffix,equals,contains} searches. |
89 // ===================================================== | 91 // ===================================================== |
90 // | 92 // |
91 // Step 1: as above | 93 // Step 1: as above, except that |
| 94 // - the scheme is not removed |
| 95 // - the port is not removed if it is specified and does not match the default |
| 96 // port for the given scheme. |
92 // | 97 // |
93 // Step 2: | 98 // Step 2: |
94 // Translate URL to String and add the following position markers: | 99 // Translate URL to String and add the following position markers: |
95 // - BU = Beginning of URL | 100 // - BU = Beginning of URL |
96 // - EU = End of URL | 101 // - EU = End of URL |
97 // Furthermore, the hostname is canonicalized to start with a ".". | |
98 // | 102 // |
99 // -> www.example.com/index.html?search=foo becomes | 103 // -> http://www.example.com:8080/index.html?search=foo#first_match becomes |
100 // BU .www.example.com/index.html?search=foo EU | 104 // BU http://www.example.com:8080/index.html?search=foo EU |
| 105 // -> http://www.example.com:80/index.html?search=foo#first_match becomes |
| 106 // BU http://www.example.com/index.html?search=foo EU |
101 // | 107 // |
102 // url_prefix(prefix) = BU add_missing_dot_prefix(prefix) | 108 // url_prefix(prefix) = BU prefix |
103 // -> url_prefix("www.example") = BU .www.example | 109 // -> url_prefix("http://www.example") = BU http://www.example |
104 // | 110 // |
105 // url_contains(substring) = substring | 111 // url_contains(substring) = substring |
106 // -> url_contains("index") = index | 112 // -> url_contains("index") = index |
107 // | 113 // |
108 // | 114 // |
109 // Case 3: {host,path,query}_contains searches. | 115 // Case 3: {host,path,query}_contains searches. |
110 // ============================================ | 116 // ============================================ |
111 // | 117 // |
112 // These kinds of searches are not supported directly but can be derived | 118 // These kinds of searches are not supported directly but can be derived |
113 // by a combination of a url_contains() query followed by an explicit test: | 119 // by a combination of a url_contains() query followed by an explicit test: |
114 // | 120 // |
115 // host_contains(str) = url_contains(str) followed by test whether str occurs | 121 // host_contains(str) = url_contains(str) followed by test whether str occurs |
116 // in host comonent of original URL. | 122 // in host component of original URL. |
117 // -> host_contains("example.co") = example.co | 123 // -> host_contains("example.co") = example.co |
118 // followed by gurl.host().find("example.co"); | 124 // followed by gurl.host().find("example.co"); |
119 // | 125 // |
120 // [similarly for path_contains and query_contains]. | 126 // [similarly for path_contains and query_contains]. |
121 | 127 |
122 | 128 |
123 // | 129 // |
124 // URLMatcherCondition | 130 // URLMatcherCondition |
125 // | 131 // |
126 | 132 |
(...skipping 26 matching lines...) Expand all Loading... |
153 if (substring_pattern_ != NULL && rhs.substring_pattern_ != NULL) | 159 if (substring_pattern_ != NULL && rhs.substring_pattern_ != NULL) |
154 return *substring_pattern_ < *rhs.substring_pattern_; | 160 return *substring_pattern_ < *rhs.substring_pattern_; |
155 if (substring_pattern_ == NULL && rhs.substring_pattern_ != NULL) return true; | 161 if (substring_pattern_ == NULL && rhs.substring_pattern_ != NULL) return true; |
156 // Either substring_pattern_ != NULL && rhs.substring_pattern_ == NULL, | 162 // Either substring_pattern_ != NULL && rhs.substring_pattern_ == NULL, |
157 // or both are NULL. | 163 // or both are NULL. |
158 return false; | 164 return false; |
159 } | 165 } |
160 | 166 |
161 bool URLMatcherCondition::IsFullURLCondition() const { | 167 bool URLMatcherCondition::IsFullURLCondition() const { |
162 // For these criteria the SubstringMatcher needs to be executed on the | 168 // For these criteria the SubstringMatcher needs to be executed on the |
163 // GURL that is canonlizaliced with | 169 // GURL that is canonicalized with |
164 // URLMatcherConditionFactory::CanonicalizeURLForFullSearches. | 170 // URLMatcherConditionFactory::CanonicalizeURLForFullSearches. |
165 switch (criterion_) { | 171 switch (criterion_) { |
166 case HOST_CONTAINS: | 172 case HOST_CONTAINS: |
167 case PATH_CONTAINS: | 173 case PATH_CONTAINS: |
168 case QUERY_CONTAINS: | 174 case QUERY_CONTAINS: |
169 case URL_PREFIX: | 175 case URL_PREFIX: |
170 case URL_SUFFIX: | 176 case URL_SUFFIX: |
171 case URL_CONTAINS: | 177 case URL_CONTAINS: |
172 case URL_EQUALS: | 178 case URL_EQUALS: |
173 return true; | 179 return true; |
(...skipping 133 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
307 URLMatcherConditionFactory::CreateHostEqualsPathPrefixCondition( | 313 URLMatcherConditionFactory::CreateHostEqualsPathPrefixCondition( |
308 const std::string& host, | 314 const std::string& host, |
309 const std::string& path_prefix) { | 315 const std::string& path_prefix) { |
310 return CreateCondition(URLMatcherCondition::HOST_EQUALS_PATH_PREFIX, | 316 return CreateCondition(URLMatcherCondition::HOST_EQUALS_PATH_PREFIX, |
311 kBeginningOfURL + CanonicalizeHostname(host) + kEndOfDomain + | 317 kBeginningOfURL + CanonicalizeHostname(host) + kEndOfDomain + |
312 path_prefix); | 318 path_prefix); |
313 } | 319 } |
314 | 320 |
315 std::string URLMatcherConditionFactory::CanonicalizeURLForFullSearches( | 321 std::string URLMatcherConditionFactory::CanonicalizeURLForFullSearches( |
316 const GURL& url) { | 322 const GURL& url) { |
317 return kBeginningOfURL + CanonicalizeHostname(url.host()) + url.path() + | 323 GURL::Replacements replacements; |
318 (url.has_query() ? "?" + url.query() : "") + kEndOfURL; | 324 replacements.ClearPassword(); |
| 325 replacements.ClearUsername(); |
| 326 replacements.ClearRef(); |
| 327 // Clear port if it is implicit from scheme. |
| 328 if (url.has_port()) { |
| 329 const std::string& port = url.scheme(); |
| 330 if (url_canon::DefaultPortForScheme(port.c_str(), port.size()) == |
| 331 url.EffectiveIntPort()) { |
| 332 replacements.ClearPort(); |
| 333 } |
| 334 } |
| 335 return kBeginningOfURL + url.ReplaceComponents(replacements).spec() + |
| 336 kEndOfURL; |
319 } | 337 } |
320 | 338 |
321 URLMatcherCondition URLMatcherConditionFactory::CreateURLPrefixCondition( | 339 URLMatcherCondition URLMatcherConditionFactory::CreateURLPrefixCondition( |
322 const std::string& prefix) { | 340 const std::string& prefix) { |
323 return CreateCondition(URLMatcherCondition::URL_PREFIX, | 341 return CreateCondition(URLMatcherCondition::URL_PREFIX, |
324 kBeginningOfURL + CanonicalizeHostname(prefix)); | 342 kBeginningOfURL + prefix); |
325 } | 343 } |
326 | 344 |
327 URLMatcherCondition URLMatcherConditionFactory::CreateURLSuffixCondition( | 345 URLMatcherCondition URLMatcherConditionFactory::CreateURLSuffixCondition( |
328 const std::string& suffix) { | 346 const std::string& suffix) { |
329 return CreateCondition(URLMatcherCondition::URL_SUFFIX, suffix + kEndOfURL); | 347 return CreateCondition(URLMatcherCondition::URL_SUFFIX, suffix + kEndOfURL); |
330 } | 348 } |
331 | 349 |
332 URLMatcherCondition URLMatcherConditionFactory::CreateURLContainsCondition( | 350 URLMatcherCondition URLMatcherConditionFactory::CreateURLContainsCondition( |
333 const std::string& str) { | 351 const std::string& str) { |
334 return CreateCondition(URLMatcherCondition::URL_CONTAINS, str); | 352 return CreateCondition(URLMatcherCondition::URL_CONTAINS, str); |
335 } | 353 } |
336 | 354 |
337 URLMatcherCondition URLMatcherConditionFactory::CreateURLEqualsCondition( | 355 URLMatcherCondition URLMatcherConditionFactory::CreateURLEqualsCondition( |
338 const std::string& str) { | 356 const std::string& str) { |
339 return CreateCondition(URLMatcherCondition::URL_EQUALS, | 357 return CreateCondition(URLMatcherCondition::URL_EQUALS, |
340 kBeginningOfURL + CanonicalizeHostname(str) + kEndOfURL); | 358 kBeginningOfURL + str + kEndOfURL); |
341 } | 359 } |
342 | 360 |
343 void URLMatcherConditionFactory::ForgetUnusedPatterns( | 361 void URLMatcherConditionFactory::ForgetUnusedPatterns( |
344 const std::set<SubstringPattern::ID>& used_patterns) { | 362 const std::set<SubstringPattern::ID>& used_patterns) { |
345 PatternSingletons::iterator i = pattern_singletons_.begin(); | 363 PatternSingletons::iterator i = pattern_singletons_.begin(); |
346 while (i != pattern_singletons_.end()) { | 364 while (i != pattern_singletons_.end()) { |
347 if (used_patterns.find((*i)->id()) != used_patterns.end()) { | 365 if (used_patterns.find((*i)->id()) != used_patterns.end()) { |
348 ++i; | 366 ++i; |
349 } else { | 367 } else { |
350 delete *i; | 368 delete *i; |
(...skipping 328 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
679 } | 697 } |
680 | 698 |
681 void URLMatcher::UpdateInternalDatastructures() { | 699 void URLMatcher::UpdateInternalDatastructures() { |
682 UpdateSubstringSetMatcher(false); | 700 UpdateSubstringSetMatcher(false); |
683 UpdateSubstringSetMatcher(true); | 701 UpdateSubstringSetMatcher(true); |
684 UpdateTriggers(); | 702 UpdateTriggers(); |
685 UpdateConditionFactory(); | 703 UpdateConditionFactory(); |
686 } | 704 } |
687 | 705 |
688 } // namespace extensions | 706 } // namespace extensions |
OLD | NEW |