Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/common/extensions/matcher/url_matcher.h" | 5 #include "chrome/common/extensions/matcher/url_matcher.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <iterator> | 8 #include <iterator> |
| 9 | 9 |
| 10 #include "base/logging.h" | 10 #include "base/logging.h" |
| 11 #include "content/public/common/url_constants.h" | |
| 11 #include "googleurl/src/gurl.h" | 12 #include "googleurl/src/gurl.h" |
| 12 | 13 |
| 13 namespace extensions { | 14 namespace extensions { |
| 14 | 15 |
| 15 // This set of classes implement a mapping of URL Component Patterns, such as | 16 // This set of classes implement a mapping of URL Component Patterns, such as |
| 16 // host_prefix, host_suffix, host_equals, ..., etc., to SubstringPatterns. | 17 // host_prefix, host_suffix, host_equals, ..., etc., to SubstringPatterns. |
| 17 // | 18 // |
| 18 // The idea of this mapping is to reduce the problem of comparing many | 19 // The idea of this mapping is to reduce the problem of comparing many |
| 19 // URL Component Patterns against one URL to the problem of searching many | 20 // URL Component Patterns against one URL to the problem of searching many |
| 20 // substrings in one string: | 21 // substrings in one string: |
| (...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 81 // -> host_equals("www.example.com") = BU .www.example.com ED | 82 // -> host_equals("www.example.com") = BU .www.example.com ED |
| 82 // | 83 // |
| 83 // Similarly for path query parameters ({path, query}_{prefix, suffix, equals}). | 84 // Similarly for path query parameters ({path, query}_{prefix, suffix, equals}). |
| 84 // | 85 // |
| 85 // With this, we can search the SubstringPatterns in the normalized URL. | 86 // With this, we can search the SubstringPatterns in the normalized URL. |
| 86 // | 87 // |
| 87 // | 88 // |
| 88 // Case 2: url_{prefix,suffix,equals,contains} searches. | 89 // Case 2: url_{prefix,suffix,equals,contains} searches. |
| 89 // ===================================================== | 90 // ===================================================== |
| 90 // | 91 // |
| 91 // Step 1: as above | 92 // Step 1: as above, except that the scheme is not removed. |
| 92 // | 93 // |
| 93 // Step 2: | 94 // Step 2: |
| 94 // Translate URL to String and add the following position markers: | 95 // Translate URL to String and add the following position markers: |
| 95 // - BU = Beginning of URL | 96 // - BU = Beginning of URL |
| 96 // - EU = End of URL | 97 // - EU = End of URL |
| 97 // Furthermore, the hostname is canonicalized to start with a ".". | |
| 98 // | 98 // |
| 99 // -> www.example.com/index.html?search=foo becomes | 99 // -> http://www.example.com:8080/index.html?search=foo#first_match becomes |
| 100 // BU .www.example.com/index.html?search=foo EU | 100 // BU http://www.example.com/index.html?search=foo EU |
| 101 // | 101 // |
| 102 // url_prefix(prefix) = BU add_missing_dot_prefix(prefix) | 102 // url_prefix(prefix) = BU prefix |
| 103 // -> url_prefix("www.example") = BU .www.example | 103 // -> url_prefix("http://www.example") = BU http://www.example |
| 104 // | 104 // |
| 105 // url_contains(substring) = substring | 105 // url_contains(substring) = substring |
| 106 // -> url_contains("index") = index | 106 // -> url_contains("index") = index |
| 107 // | 107 // |
| 108 // | 108 // |
| 109 // Case 3: {host,path,query}_contains searches. | 109 // Case 3: {host,path,query}_contains searches. |
| 110 // ============================================ | 110 // ============================================ |
| 111 // | 111 // |
| 112 // These kinds of searches are not supported directly but can be derived | 112 // These kinds of searches are not supported directly but can be derived |
| 113 // by a combination of a url_contains() query followed by an explicit test: | 113 // by a combination of a url_contains() query followed by an explicit test: |
| 114 // | 114 // |
| 115 // host_contains(str) = url_contains(str) followed by test whether str occurs | 115 // host_contains(str) = url_contains(str) followed by test whether str occurs |
| 116 // in host comonent of original URL. | 116 // in host component of original URL. |
| 117 // -> host_contains("example.co") = example.co | 117 // -> host_contains("example.co") = example.co |
| 118 // followed by gurl.host().find("example.co"); | 118 // followed by gurl.host().find("example.co"); |
| 119 // | 119 // |
| 120 // [similarly for path_contains and query_contains]. | 120 // [similarly for path_contains and query_contains]. |
| 121 | 121 |
| 122 | 122 |
| 123 // | 123 // |
| 124 // URLMatcherCondition | 124 // URLMatcherCondition |
| 125 // | 125 // |
| 126 | 126 |
| (...skipping 26 matching lines...) Expand all Loading... | |
| 153 if (substring_pattern_ != NULL && rhs.substring_pattern_ != NULL) | 153 if (substring_pattern_ != NULL && rhs.substring_pattern_ != NULL) |
| 154 return *substring_pattern_ < *rhs.substring_pattern_; | 154 return *substring_pattern_ < *rhs.substring_pattern_; |
| 155 if (substring_pattern_ == NULL && rhs.substring_pattern_ != NULL) return true; | 155 if (substring_pattern_ == NULL && rhs.substring_pattern_ != NULL) return true; |
| 156 // Either substring_pattern_ != NULL && rhs.substring_pattern_ == NULL, | 156 // Either substring_pattern_ != NULL && rhs.substring_pattern_ == NULL, |
| 157 // or both are NULL. | 157 // or both are NULL. |
| 158 return false; | 158 return false; |
| 159 } | 159 } |
| 160 | 160 |
| 161 bool URLMatcherCondition::IsFullURLCondition() const { | 161 bool URLMatcherCondition::IsFullURLCondition() const { |
| 162 // For these criteria the SubstringMatcher needs to be executed on the | 162 // For these criteria the SubstringMatcher needs to be executed on the |
| 163 // GURL that is canonlizaliced with | 163 // GURL that is canonicalized with |
| 164 // URLMatcherConditionFactory::CanonicalizeURLForFullSearches. | 164 // URLMatcherConditionFactory::CanonicalizeURLForFullSearches. |
| 165 switch (criterion_) { | 165 switch (criterion_) { |
| 166 case HOST_CONTAINS: | 166 case HOST_CONTAINS: |
| 167 case PATH_CONTAINS: | 167 case PATH_CONTAINS: |
| 168 case QUERY_CONTAINS: | 168 case QUERY_CONTAINS: |
| 169 case URL_PREFIX: | 169 case URL_PREFIX: |
| 170 case URL_SUFFIX: | 170 case URL_SUFFIX: |
| 171 case URL_CONTAINS: | 171 case URL_CONTAINS: |
| 172 case URL_EQUALS: | 172 case URL_EQUALS: |
| 173 return true; | 173 return true; |
| (...skipping 133 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 307 URLMatcherConditionFactory::CreateHostEqualsPathPrefixCondition( | 307 URLMatcherConditionFactory::CreateHostEqualsPathPrefixCondition( |
| 308 const std::string& host, | 308 const std::string& host, |
| 309 const std::string& path_prefix) { | 309 const std::string& path_prefix) { |
| 310 return CreateCondition(URLMatcherCondition::HOST_EQUALS_PATH_PREFIX, | 310 return CreateCondition(URLMatcherCondition::HOST_EQUALS_PATH_PREFIX, |
| 311 kBeginningOfURL + CanonicalizeHostname(host) + kEndOfDomain + | 311 kBeginningOfURL + CanonicalizeHostname(host) + kEndOfDomain + |
| 312 path_prefix); | 312 path_prefix); |
| 313 } | 313 } |
| 314 | 314 |
| 315 std::string URLMatcherConditionFactory::CanonicalizeURLForFullSearches( | 315 std::string URLMatcherConditionFactory::CanonicalizeURLForFullSearches( |
| 316 const GURL& url) { | 316 const GURL& url) { |
| 317 return kBeginningOfURL + CanonicalizeHostname(url.host()) + url.path() + | 317 return kBeginningOfURL + url.scheme() + content::kStandardSchemeSeparator + |
|
Yoyo Zhou
2012/08/15 17:53:12
By the way, I just learned about GURL::Replacement
battre
2012/08/16 13:16:25
I was aware of this. I cannot use the GURL::Replac
| |
| 318 (url.has_query() ? "?" + url.query() : "") + kEndOfURL; | 318 url.host() + url.path() + (url.has_query() ? "?" + url.query() : "") + |
| 319 kEndOfURL; | |
| 319 } | 320 } |
| 320 | 321 |
| 321 URLMatcherCondition URLMatcherConditionFactory::CreateURLPrefixCondition( | 322 URLMatcherCondition URLMatcherConditionFactory::CreateURLPrefixCondition( |
| 322 const std::string& prefix) { | 323 const std::string& prefix) { |
| 323 return CreateCondition(URLMatcherCondition::URL_PREFIX, | 324 return CreateCondition(URLMatcherCondition::URL_PREFIX, |
| 324 kBeginningOfURL + CanonicalizeHostname(prefix)); | 325 kBeginningOfURL + prefix); |
| 325 } | 326 } |
| 326 | 327 |
| 327 URLMatcherCondition URLMatcherConditionFactory::CreateURLSuffixCondition( | 328 URLMatcherCondition URLMatcherConditionFactory::CreateURLSuffixCondition( |
| 328 const std::string& suffix) { | 329 const std::string& suffix) { |
| 329 return CreateCondition(URLMatcherCondition::URL_SUFFIX, suffix + kEndOfURL); | 330 return CreateCondition(URLMatcherCondition::URL_SUFFIX, suffix + kEndOfURL); |
| 330 } | 331 } |
| 331 | 332 |
| 332 URLMatcherCondition URLMatcherConditionFactory::CreateURLContainsCondition( | 333 URLMatcherCondition URLMatcherConditionFactory::CreateURLContainsCondition( |
| 333 const std::string& str) { | 334 const std::string& str) { |
| 334 return CreateCondition(URLMatcherCondition::URL_CONTAINS, str); | 335 return CreateCondition(URLMatcherCondition::URL_CONTAINS, str); |
| 335 } | 336 } |
| 336 | 337 |
| 337 URLMatcherCondition URLMatcherConditionFactory::CreateURLEqualsCondition( | 338 URLMatcherCondition URLMatcherConditionFactory::CreateURLEqualsCondition( |
| 338 const std::string& str) { | 339 const std::string& str) { |
| 339 return CreateCondition(URLMatcherCondition::URL_EQUALS, | 340 return CreateCondition(URLMatcherCondition::URL_EQUALS, |
| 340 kBeginningOfURL + CanonicalizeHostname(str) + kEndOfURL); | 341 kBeginningOfURL + str + kEndOfURL); |
| 341 } | 342 } |
| 342 | 343 |
| 343 void URLMatcherConditionFactory::ForgetUnusedPatterns( | 344 void URLMatcherConditionFactory::ForgetUnusedPatterns( |
| 344 const std::set<SubstringPattern::ID>& used_patterns) { | 345 const std::set<SubstringPattern::ID>& used_patterns) { |
| 345 PatternSingletons::iterator i = pattern_singletons_.begin(); | 346 PatternSingletons::iterator i = pattern_singletons_.begin(); |
| 346 while (i != pattern_singletons_.end()) { | 347 while (i != pattern_singletons_.end()) { |
| 347 if (used_patterns.find((*i)->id()) != used_patterns.end()) { | 348 if (used_patterns.find((*i)->id()) != used_patterns.end()) { |
| 348 ++i; | 349 ++i; |
| 349 } else { | 350 } else { |
| 350 delete *i; | 351 delete *i; |
| (...skipping 328 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 679 } | 680 } |
| 680 | 681 |
| 681 void URLMatcher::UpdateInternalDatastructures() { | 682 void URLMatcher::UpdateInternalDatastructures() { |
| 682 UpdateSubstringSetMatcher(false); | 683 UpdateSubstringSetMatcher(false); |
| 683 UpdateSubstringSetMatcher(true); | 684 UpdateSubstringSetMatcher(true); |
| 684 UpdateTriggers(); | 685 UpdateTriggers(); |
| 685 UpdateConditionFactory(); | 686 UpdateConditionFactory(); |
| 686 } | 687 } |
| 687 | 688 |
| 688 } // namespace extensions | 689 } // namespace extensions |
| OLD | NEW |