OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/common/extensions/matcher/url_matcher.h" | 5 #include "chrome/common/extensions/matcher/url_matcher.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <iterator> | 8 #include <iterator> |
9 | 9 |
10 #include "base/logging.h" | 10 #include "base/logging.h" |
11 #include "content/public/common/url_constants.h" | |
11 #include "googleurl/src/gurl.h" | 12 #include "googleurl/src/gurl.h" |
12 | 13 |
13 namespace extensions { | 14 namespace extensions { |
14 | 15 |
15 // This set of classes implement a mapping of URL Component Patterns, such as | 16 // This set of classes implement a mapping of URL Component Patterns, such as |
16 // host_prefix, host_suffix, host_equals, ..., etc., to SubstringPatterns. | 17 // host_prefix, host_suffix, host_equals, ..., etc., to SubstringPatterns. |
17 // | 18 // |
18 // The idea of this mapping is to reduce the problem of comparing many | 19 // The idea of this mapping is to reduce the problem of comparing many |
19 // URL Component Patterns against one URL to the problem of searching many | 20 // URL Component Patterns against one URL to the problem of searching many |
20 // substrings in one string: | 21 // substrings in one string: |
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
81 // -> host_equals("www.example.com") = BU .www.example.com ED | 82 // -> host_equals("www.example.com") = BU .www.example.com ED |
82 // | 83 // |
83 // Similarly for path query parameters ({path, query}_{prefix, suffix, equals}). | 84 // Similarly for path query parameters ({path, query}_{prefix, suffix, equals}). |
84 // | 85 // |
85 // With this, we can search the SubstringPatterns in the normalized URL. | 86 // With this, we can search the SubstringPatterns in the normalized URL. |
86 // | 87 // |
87 // | 88 // |
88 // Case 2: url_{prefix,suffix,equals,contains} searches. | 89 // Case 2: url_{prefix,suffix,equals,contains} searches. |
89 // ===================================================== | 90 // ===================================================== |
90 // | 91 // |
91 // Step 1: as above | 92 // Step 1: as above, except that the scheme is not removed. |
92 // | 93 // |
93 // Step 2: | 94 // Step 2: |
94 // Translate URL to String and add the following position markers: | 95 // Translate URL to String and add the following position markers: |
95 // - BU = Beginning of URL | 96 // - BU = Beginning of URL |
96 // - EU = End of URL | 97 // - EU = End of URL |
97 // Furthermore, the hostname is canonicalized to start with a ".". | |
98 // | 98 // |
99 // -> www.example.com/index.html?search=foo becomes | 99 // -> http://www.example.com:8080/index.html?search=foo#first_match becomes |
100 // BU .www.example.com/index.html?search=foo EU | 100 // BU http://www.example.com/index.html?search=foo EU |
101 // | 101 // |
102 // url_prefix(prefix) = BU add_missing_dot_prefix(prefix) | 102 // url_prefix(prefix) = BU prefix |
103 // -> url_prefix("www.example") = BU .www.example | 103 // -> url_prefix("http://www.example") = BU http://www.example |
104 // | 104 // |
105 // url_contains(substring) = substring | 105 // url_contains(substring) = substring |
106 // -> url_contains("index") = index | 106 // -> url_contains("index") = index |
107 // | 107 // |
108 // | 108 // |
109 // Case 3: {host,path,query}_contains searches. | 109 // Case 3: {host,path,query}_contains searches. |
110 // ============================================ | 110 // ============================================ |
111 // | 111 // |
112 // These kinds of searches are not supported directly but can be derived | 112 // These kinds of searches are not supported directly but can be derived |
113 // by a combination of a url_contains() query followed by an explicit test: | 113 // by a combination of a url_contains() query followed by an explicit test: |
114 // | 114 // |
115 // host_contains(str) = url_contains(str) followed by test whether str occurs | 115 // host_contains(str) = url_contains(str) followed by test whether str occurs |
116 // in host comonent of original URL. | 116 // in host component of original URL. |
117 // -> host_contains("example.co") = example.co | 117 // -> host_contains("example.co") = example.co |
118 // followed by gurl.host().find("example.co"); | 118 // followed by gurl.host().find("example.co"); |
119 // | 119 // |
120 // [similarly for path_contains and query_contains]. | 120 // [similarly for path_contains and query_contains]. |
121 | 121 |
122 | 122 |
123 // | 123 // |
124 // URLMatcherCondition | 124 // URLMatcherCondition |
125 // | 125 // |
126 | 126 |
(...skipping 26 matching lines...) Expand all Loading... | |
153 if (substring_pattern_ != NULL && rhs.substring_pattern_ != NULL) | 153 if (substring_pattern_ != NULL && rhs.substring_pattern_ != NULL) |
154 return *substring_pattern_ < *rhs.substring_pattern_; | 154 return *substring_pattern_ < *rhs.substring_pattern_; |
155 if (substring_pattern_ == NULL && rhs.substring_pattern_ != NULL) return true; | 155 if (substring_pattern_ == NULL && rhs.substring_pattern_ != NULL) return true; |
156 // Either substring_pattern_ != NULL && rhs.substring_pattern_ == NULL, | 156 // Either substring_pattern_ != NULL && rhs.substring_pattern_ == NULL, |
157 // or both are NULL. | 157 // or both are NULL. |
158 return false; | 158 return false; |
159 } | 159 } |
160 | 160 |
161 bool URLMatcherCondition::IsFullURLCondition() const { | 161 bool URLMatcherCondition::IsFullURLCondition() const { |
162 // For these criteria the SubstringMatcher needs to be executed on the | 162 // For these criteria the SubstringMatcher needs to be executed on the |
163 // GURL that is canonlizaliced with | 163 // GURL that is canonicalized with |
164 // URLMatcherConditionFactory::CanonicalizeURLForFullSearches. | 164 // URLMatcherConditionFactory::CanonicalizeURLForFullSearches. |
165 switch (criterion_) { | 165 switch (criterion_) { |
166 case HOST_CONTAINS: | 166 case HOST_CONTAINS: |
167 case PATH_CONTAINS: | 167 case PATH_CONTAINS: |
168 case QUERY_CONTAINS: | 168 case QUERY_CONTAINS: |
169 case URL_PREFIX: | 169 case URL_PREFIX: |
170 case URL_SUFFIX: | 170 case URL_SUFFIX: |
171 case URL_CONTAINS: | 171 case URL_CONTAINS: |
172 case URL_EQUALS: | 172 case URL_EQUALS: |
173 return true; | 173 return true; |
(...skipping 133 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
307 URLMatcherConditionFactory::CreateHostEqualsPathPrefixCondition( | 307 URLMatcherConditionFactory::CreateHostEqualsPathPrefixCondition( |
308 const std::string& host, | 308 const std::string& host, |
309 const std::string& path_prefix) { | 309 const std::string& path_prefix) { |
310 return CreateCondition(URLMatcherCondition::HOST_EQUALS_PATH_PREFIX, | 310 return CreateCondition(URLMatcherCondition::HOST_EQUALS_PATH_PREFIX, |
311 kBeginningOfURL + CanonicalizeHostname(host) + kEndOfDomain + | 311 kBeginningOfURL + CanonicalizeHostname(host) + kEndOfDomain + |
312 path_prefix); | 312 path_prefix); |
313 } | 313 } |
314 | 314 |
315 std::string URLMatcherConditionFactory::CanonicalizeURLForFullSearches( | 315 std::string URLMatcherConditionFactory::CanonicalizeURLForFullSearches( |
316 const GURL& url) { | 316 const GURL& url) { |
317 return kBeginningOfURL + CanonicalizeHostname(url.host()) + url.path() + | 317 return kBeginningOfURL + url.scheme() + content::kStandardSchemeSeparator + |
Yoyo Zhou
2012/08/15 17:53:12
By the way, I just learned about GURL::Replacement
battre
2012/08/16 13:16:25
I was aware of this. I cannot use the GURL::Replac
| |
318 (url.has_query() ? "?" + url.query() : "") + kEndOfURL; | 318 url.host() + url.path() + (url.has_query() ? "?" + url.query() : "") + |
319 kEndOfURL; | |
319 } | 320 } |
320 | 321 |
321 URLMatcherCondition URLMatcherConditionFactory::CreateURLPrefixCondition( | 322 URLMatcherCondition URLMatcherConditionFactory::CreateURLPrefixCondition( |
322 const std::string& prefix) { | 323 const std::string& prefix) { |
323 return CreateCondition(URLMatcherCondition::URL_PREFIX, | 324 return CreateCondition(URLMatcherCondition::URL_PREFIX, |
324 kBeginningOfURL + CanonicalizeHostname(prefix)); | 325 kBeginningOfURL + prefix); |
325 } | 326 } |
326 | 327 |
327 URLMatcherCondition URLMatcherConditionFactory::CreateURLSuffixCondition( | 328 URLMatcherCondition URLMatcherConditionFactory::CreateURLSuffixCondition( |
328 const std::string& suffix) { | 329 const std::string& suffix) { |
329 return CreateCondition(URLMatcherCondition::URL_SUFFIX, suffix + kEndOfURL); | 330 return CreateCondition(URLMatcherCondition::URL_SUFFIX, suffix + kEndOfURL); |
330 } | 331 } |
331 | 332 |
332 URLMatcherCondition URLMatcherConditionFactory::CreateURLContainsCondition( | 333 URLMatcherCondition URLMatcherConditionFactory::CreateURLContainsCondition( |
333 const std::string& str) { | 334 const std::string& str) { |
334 return CreateCondition(URLMatcherCondition::URL_CONTAINS, str); | 335 return CreateCondition(URLMatcherCondition::URL_CONTAINS, str); |
335 } | 336 } |
336 | 337 |
337 URLMatcherCondition URLMatcherConditionFactory::CreateURLEqualsCondition( | 338 URLMatcherCondition URLMatcherConditionFactory::CreateURLEqualsCondition( |
338 const std::string& str) { | 339 const std::string& str) { |
339 return CreateCondition(URLMatcherCondition::URL_EQUALS, | 340 return CreateCondition(URLMatcherCondition::URL_EQUALS, |
340 kBeginningOfURL + CanonicalizeHostname(str) + kEndOfURL); | 341 kBeginningOfURL + str + kEndOfURL); |
341 } | 342 } |
342 | 343 |
343 void URLMatcherConditionFactory::ForgetUnusedPatterns( | 344 void URLMatcherConditionFactory::ForgetUnusedPatterns( |
344 const std::set<SubstringPattern::ID>& used_patterns) { | 345 const std::set<SubstringPattern::ID>& used_patterns) { |
345 PatternSingletons::iterator i = pattern_singletons_.begin(); | 346 PatternSingletons::iterator i = pattern_singletons_.begin(); |
346 while (i != pattern_singletons_.end()) { | 347 while (i != pattern_singletons_.end()) { |
347 if (used_patterns.find((*i)->id()) != used_patterns.end()) { | 348 if (used_patterns.find((*i)->id()) != used_patterns.end()) { |
348 ++i; | 349 ++i; |
349 } else { | 350 } else { |
350 delete *i; | 351 delete *i; |
(...skipping 328 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
679 } | 680 } |
680 | 681 |
681 void URLMatcher::UpdateInternalDatastructures() { | 682 void URLMatcher::UpdateInternalDatastructures() { |
682 UpdateSubstringSetMatcher(false); | 683 UpdateSubstringSetMatcher(false); |
683 UpdateSubstringSetMatcher(true); | 684 UpdateSubstringSetMatcher(true); |
684 UpdateTriggers(); | 685 UpdateTriggers(); |
685 UpdateConditionFactory(); | 686 UpdateConditionFactory(); |
686 } | 687 } |
687 | 688 |
688 } // namespace extensions | 689 } // namespace extensions |
OLD | NEW |