| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef CHROME_COMMON_EXTENSIONS_MATCHER_URL_MATCHER_H_ | 5 #ifndef CHROME_COMMON_EXTENSIONS_MATCHER_URL_MATCHER_H_ |
| 6 #define CHROME_COMMON_EXTENSIONS_MATCHER_URL_MATCHER_H_ | 6 #define CHROME_COMMON_EXTENSIONS_MATCHER_URL_MATCHER_H_ |
| 7 | 7 |
| 8 #include <set> | 8 #include <set> |
| 9 #include <vector> | 9 #include <vector> |
| 10 | 10 |
| 11 #include "base/memory/ref_counted.h" | 11 #include "base/memory/ref_counted.h" |
| 12 #include "base/memory/scoped_ptr.h" | 12 #include "base/memory/scoped_ptr.h" |
| 13 #include "base/memory/scoped_vector.h" | 13 #include "base/memory/scoped_vector.h" |
| 14 #include "chrome/common/extensions/matcher/regex_set_matcher.h" |
| 14 #include "chrome/common/extensions/matcher/substring_set_matcher.h" | 15 #include "chrome/common/extensions/matcher/substring_set_matcher.h" |
| 15 | 16 |
| 16 class GURL; | 17 class GURL; |
| 17 | 18 |
| 18 namespace base { | 19 namespace base { |
| 19 class DictionaryValue; | 20 class DictionaryValue; |
| 20 } | 21 } |
| 21 | 22 |
| 22 namespace extensions { | 23 namespace extensions { |
| 23 | 24 |
| 24 // This class represents a single URL matching condition, e.g. a match on the | 25 // This class represents a single URL matching condition, e.g. a match on the |
| 25 // host suffix or the containment of a string in the query component of a GURL. | 26 // host suffix or the containment of a string in the query component of a GURL. |
| 26 // | 27 // |
| 27 // The difference from a simple SubstringPattern is that this also supports | 28 // The difference from a simple StringPattern is that this also supports |
| 28 // checking whether the {Host, Path, Query} of a URL contains a string. The | 29 // checking whether the {Host, Path, Query} of a URL contains a string. The |
| 29 // reduction of URL matching conditions to StringPatterns conducted by | 30 // reduction of URL matching conditions to StringPatterns conducted by |
| 30 // URLMatcherConditionFactory is not capable of expressing that alone. | 31 // URLMatcherConditionFactory is not capable of expressing that alone. |
| 32 // |
| 33 // Also supported is matching regular expressions against the URL (URL_MATCHES). |
| 31 class URLMatcherCondition { | 34 class URLMatcherCondition { |
| 32 public: | 35 public: |
| 33 enum Criterion { | 36 enum Criterion { |
| 34 HOST_PREFIX, | 37 HOST_PREFIX, |
| 35 HOST_SUFFIX, | 38 HOST_SUFFIX, |
| 36 HOST_CONTAINS, | 39 HOST_CONTAINS, |
| 37 HOST_EQUALS, | 40 HOST_EQUALS, |
| 38 PATH_PREFIX, | 41 PATH_PREFIX, |
| 39 PATH_SUFFIX, | 42 PATH_SUFFIX, |
| 40 PATH_CONTAINS, | 43 PATH_CONTAINS, |
| 41 PATH_EQUALS, | 44 PATH_EQUALS, |
| 42 QUERY_PREFIX, | 45 QUERY_PREFIX, |
| 43 QUERY_SUFFIX, | 46 QUERY_SUFFIX, |
| 44 QUERY_CONTAINS, | 47 QUERY_CONTAINS, |
| 45 QUERY_EQUALS, | 48 QUERY_EQUALS, |
| 46 HOST_SUFFIX_PATH_PREFIX, | 49 HOST_SUFFIX_PATH_PREFIX, |
| 47 HOST_EQUALS_PATH_PREFIX, | 50 HOST_EQUALS_PATH_PREFIX, |
| 48 URL_PREFIX, | 51 URL_PREFIX, |
| 49 URL_SUFFIX, | 52 URL_SUFFIX, |
| 50 URL_CONTAINS, | 53 URL_CONTAINS, |
| 51 URL_EQUALS, | 54 URL_EQUALS, |
| 55 URL_MATCHES, |
| 52 }; | 56 }; |
| 53 | 57 |
| 54 URLMatcherCondition(); | 58 URLMatcherCondition(); |
| 55 ~URLMatcherCondition(); | 59 ~URLMatcherCondition(); |
| 56 URLMatcherCondition(Criterion criterion, | 60 URLMatcherCondition(Criterion criterion, |
| 57 const SubstringPattern* substring_pattern); | 61 const StringPattern* substring_pattern); |
| 58 URLMatcherCondition(const URLMatcherCondition& rhs); | 62 URLMatcherCondition(const URLMatcherCondition& rhs); |
| 59 URLMatcherCondition& operator=(const URLMatcherCondition& rhs); | 63 URLMatcherCondition& operator=(const URLMatcherCondition& rhs); |
| 60 bool operator<(const URLMatcherCondition& rhs) const; | 64 bool operator<(const URLMatcherCondition& rhs) const; |
| 61 | 65 |
| 62 Criterion criterion() const { return criterion_; } | 66 Criterion criterion() const { return criterion_; } |
| 63 const SubstringPattern* substring_pattern() const { | 67 const StringPattern* string_pattern() const { |
| 64 return substring_pattern_; | 68 return string_pattern_; |
| 65 } | 69 } |
| 66 | 70 |
| 67 // Returns whether this URLMatcherCondition needs to be executed on a | 71 // Returns whether this URLMatcherCondition needs to be executed on a |
| 68 // full URL rather than the individual components (see | 72 // full URL rather than the individual components (see |
| 69 // URLMatcherConditionFactory). | 73 // URLMatcherConditionFactory). |
| 70 bool IsFullURLCondition() const; | 74 bool IsFullURLCondition() const; |
| 71 | 75 |
| 76 // Returns whether this URLMatcherCondition is a regular expression to be |
| 77 // handled by a regex matcher instead of a substring matcher. |
| 78 bool IsRegexCondition() const; |
| 79 |
| 72 // Returns whether this condition is fulfilled according to | 80 // Returns whether this condition is fulfilled according to |
| 73 // |matching_substring_patterns| and |url|. | 81 // |matching_patterns| and |url|. |
| 74 bool IsMatch( | 82 bool IsMatch(const std::set<StringPattern::ID>& matching_patterns, |
| 75 const std::set<SubstringPattern::ID>& matching_substring_patterns, | 83 const GURL& url) const; |
| 76 const GURL& url) const; | |
| 77 | 84 |
| 78 private: | 85 private: |
| 79 // |criterion_| and |substring_pattern_| describe together what property a URL | 86 // |criterion_| and |string_pattern_| describe together what property a URL |
| 80 // needs to fulfill to be considered a match. | 87 // needs to fulfill to be considered a match. |
| 81 Criterion criterion_; | 88 Criterion criterion_; |
| 82 | 89 |
| 83 // This is the SubstringPattern that is used in a SubstringSetMatcher. | 90 // This is the StringPattern that is used in a SubstringSetMatcher. |
| 84 const SubstringPattern* substring_pattern_; | 91 const StringPattern* string_pattern_; |
| 85 }; | 92 }; |
| 86 | 93 |
| 87 // Class to map the problem of finding {host, path, query} {prefixes, suffixes, | 94 // Class to map the problem of finding {host, path, query} {prefixes, suffixes, |
| 88 // containments, and equality} in GURLs to the substring matching problem. | 95 // containments, and equality} in GURLs to the substring matching problem. |
| 89 // | 96 // |
| 90 // Say, you want to check whether the path of a URL starts with "/index.html". | 97 // Say, you want to check whether the path of a URL starts with "/index.html". |
| 91 // This class preprocesses a URL like "www.google.com/index.html" into something | 98 // This class preprocesses a URL like "www.google.com/index.html" into something |
| 92 // like "www.google.com|/index.html". After preprocessing, you can search for | 99 // like "www.google.com|/index.html". After preprocessing, you can search for |
| 93 // "|/index.html" in the string and see that this candidate URL actually has | 100 // "|/index.html" in the string and see that this candidate URL actually has |
| 94 // a path that starts with "/index.html". On the contrary, | 101 // a path that starts with "/index.html". On the contrary, |
| 95 // "www.google.com/images/index.html" would be normalized to | 102 // "www.google.com/images/index.html" would be normalized to |
| 96 // "www.google.com|/images/index.html". It is easy to see that it contains | 103 // "www.google.com|/images/index.html". It is easy to see that it contains |
| 97 // "/index.html" but the path of the URL does not start with "/index.html". | 104 // "/index.html" but the path of the URL does not start with "/index.html". |
| 98 // | 105 // |
| 99 // This preprocessing is important if you want to match a URL against many | 106 // This preprocessing is important if you want to match a URL against many |
| 100 // patterns because it reduces the matching to a "discover all substrings | 107 // patterns because it reduces the matching to a "discover all substrings |
| 101 // of a dictionary in a text" problem, which can be solved very efficiently | 108 // of a dictionary in a text" problem, which can be solved very efficiently |
| 102 // by the Aho-Corasick algorithm. | 109 // by the Aho-Corasick algorithm. |
| 103 // | 110 // |
| 104 // IMPORTANT: The URLMatcherConditionFactory owns the SubstringPattern | 111 // IMPORTANT: The URLMatcherConditionFactory owns the StringPattern |
| 105 // referenced by created URLMatcherConditions. Therefore, it must outlive | 112 // referenced by created URLMatcherConditions. Therefore, it must outlive |
| 106 // all created URLMatcherCondition and the SubstringSetMatcher. | 113 // all created URLMatcherCondition and the SubstringSetMatcher. |
| 107 class URLMatcherConditionFactory { | 114 class URLMatcherConditionFactory { |
| 108 public: | 115 public: |
| 109 URLMatcherConditionFactory(); | 116 URLMatcherConditionFactory(); |
| 110 ~URLMatcherConditionFactory(); | 117 ~URLMatcherConditionFactory(); |
| 111 | 118 |
| 112 // Canonicalizes a URL for "Create{Host,Path,Query}*Condition" searches. | 119 // Canonicalizes a URL for "Create{Host,Path,Query}*Condition" searches. |
| 113 std::string CanonicalizeURLForComponentSearches(const GURL& url); | 120 std::string CanonicalizeURLForComponentSearches(const GURL& url); |
| 114 | 121 |
| (...skipping 24 matching lines...) Expand all Loading... |
| 139 URLMatcherCondition CreateHostSuffixPathPrefixCondition( | 146 URLMatcherCondition CreateHostSuffixPathPrefixCondition( |
| 140 const std::string& host_suffix, | 147 const std::string& host_suffix, |
| 141 const std::string& path_prefix); | 148 const std::string& path_prefix); |
| 142 URLMatcherCondition CreateHostEqualsPathPrefixCondition( | 149 URLMatcherCondition CreateHostEqualsPathPrefixCondition( |
| 143 const std::string& host, | 150 const std::string& host, |
| 144 const std::string& path_prefix); | 151 const std::string& path_prefix); |
| 145 | 152 |
| 146 // Canonicalizes a URL for "CreateURL*Condition" searches. | 153 // Canonicalizes a URL for "CreateURL*Condition" searches. |
| 147 std::string CanonicalizeURLForFullSearches(const GURL& url); | 154 std::string CanonicalizeURLForFullSearches(const GURL& url); |
| 148 | 155 |
| 156 // Canonicalizes a URL for "CreateURLMatchesCondition" searches. |
| 157 std::string CanonicalizeURLForRegexSearches(const GURL& url); |
| 158 |
| 149 URLMatcherCondition CreateURLPrefixCondition(const std::string& prefix); | 159 URLMatcherCondition CreateURLPrefixCondition(const std::string& prefix); |
| 150 URLMatcherCondition CreateURLSuffixCondition(const std::string& suffix); | 160 URLMatcherCondition CreateURLSuffixCondition(const std::string& suffix); |
| 151 URLMatcherCondition CreateURLContainsCondition(const std::string& str); | 161 URLMatcherCondition CreateURLContainsCondition(const std::string& str); |
| 152 URLMatcherCondition CreateURLEqualsCondition(const std::string& str); | 162 URLMatcherCondition CreateURLEqualsCondition(const std::string& str); |
| 153 | 163 |
| 164 URLMatcherCondition CreateURLMatchesCondition(const std::string& regex); |
| 165 |
| 154 // Removes all patterns from |pattern_singletons_| that are not listed in | 166 // Removes all patterns from |pattern_singletons_| that are not listed in |
| 155 // |used_patterns|. These patterns are not referenced any more and get | 167 // |used_patterns|. These patterns are not referenced any more and get |
| 156 // freed. | 168 // freed. |
| 157 void ForgetUnusedPatterns( | 169 void ForgetUnusedPatterns( |
| 158 const std::set<SubstringPattern::ID>& used_patterns); | 170 const std::set<StringPattern::ID>& used_patterns); |
| 159 | 171 |
| 160 // Returns true if this object retains no allocated data. Only for debugging. | 172 // Returns true if this object retains no allocated data. Only for debugging. |
| 161 bool IsEmpty() const; | 173 bool IsEmpty() const; |
| 162 | 174 |
| 163 private: | 175 private: |
| 164 // Creates a URLMatcherCondition according to the parameters passed. | 176 // Creates a URLMatcherCondition according to the parameters passed. |
| 165 // The URLMatcherCondition will refer to a SubstringPattern that is | 177 // The URLMatcherCondition will refer to a StringPattern that is |
| 166 // owned by |pattern_singletons_|. | 178 // owned by |pattern_singletons_|. |
| 167 URLMatcherCondition CreateCondition(URLMatcherCondition::Criterion criterion, | 179 URLMatcherCondition CreateCondition(URLMatcherCondition::Criterion criterion, |
| 168 const std::string& pattern); | 180 const std::string& pattern); |
| 169 | 181 |
| 170 // Prepends a "." to the hostname if it does not start with one. | 182 // Prepends a "." to the hostname if it does not start with one. |
| 171 std::string CanonicalizeHostname(const std::string& hostname) const; | 183 std::string CanonicalizeHostname(const std::string& hostname) const; |
| 172 | 184 |
| 173 // Counter that ensures that all created SubstringPatterns have unique IDs. | 185 // Counter that ensures that all created StringPatterns have unique IDs. |
| 186 // Note that substring patterns and regex patterns will use different IDs. |
| 174 int id_counter_; | 187 int id_counter_; |
| 175 | 188 |
| 176 // This comparison considers only the pattern() value of the | 189 // This comparison considers only the pattern() value of the |
| 177 // SubstringPatterns. | 190 // StringPatterns. |
| 178 struct SubstringPatternPointerCompare { | 191 struct StringPatternPointerCompare { |
| 179 bool operator()(SubstringPattern* lhs, SubstringPattern* rhs) const; | 192 bool operator()(StringPattern* lhs, StringPattern* rhs) const; |
| 180 }; | 193 }; |
| 181 // Set to ensure that we generate only one SubstringPattern for each content | 194 // Set to ensure that we generate only one StringPattern for each content |
| 182 // of SubstringPattern::pattern(). | 195 // of StringPattern::pattern(). |
| 183 typedef std::set<SubstringPattern*, SubstringPatternPointerCompare> | 196 typedef std::set<StringPattern*, StringPatternPointerCompare> |
| 184 PatternSingletons; | 197 PatternSingletons; |
| 185 PatternSingletons pattern_singletons_; | 198 PatternSingletons substring_pattern_singletons_; |
| 199 PatternSingletons regex_pattern_singletons_; |
| 186 | 200 |
| 187 DISALLOW_COPY_AND_ASSIGN(URLMatcherConditionFactory); | 201 DISALLOW_COPY_AND_ASSIGN(URLMatcherConditionFactory); |
| 188 }; | 202 }; |
| 189 | 203 |
| 190 // This class represents a filter for the URL scheme to be hooked up into a | 204 // This class represents a filter for the URL scheme to be hooked up into a |
| 191 // URLMatcherConditionSet. | 205 // URLMatcherConditionSet. |
| 192 class URLMatcherSchemeFilter { | 206 class URLMatcherSchemeFilter { |
| 193 public: | 207 public: |
| 194 explicit URLMatcherSchemeFilter(const std::string& filter); | 208 explicit URLMatcherSchemeFilter(const std::string& filter); |
| 195 explicit URLMatcherSchemeFilter(const std::vector<std::string>& filters); | 209 explicit URLMatcherSchemeFilter(const std::vector<std::string>& filters); |
| (...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 237 // Matches if all conditions in |conditions|, |scheme_filter| and | 251 // Matches if all conditions in |conditions|, |scheme_filter| and |
| 238 // |port_filter| are fulfilled. |scheme_filter| and |port_filter| may be NULL, | 252 // |port_filter| are fulfilled. |scheme_filter| and |port_filter| may be NULL, |
| 239 // in which case, no restrictions are imposed on the scheme/port of a URL. | 253 // in which case, no restrictions are imposed on the scheme/port of a URL. |
| 240 URLMatcherConditionSet(ID id, const Conditions& conditions, | 254 URLMatcherConditionSet(ID id, const Conditions& conditions, |
| 241 scoped_ptr<URLMatcherSchemeFilter> scheme_filter, | 255 scoped_ptr<URLMatcherSchemeFilter> scheme_filter, |
| 242 scoped_ptr<URLMatcherPortFilter> port_filter); | 256 scoped_ptr<URLMatcherPortFilter> port_filter); |
| 243 | 257 |
| 244 ID id() const { return id_; } | 258 ID id() const { return id_; } |
| 245 const Conditions& conditions() const { return conditions_; } | 259 const Conditions& conditions() const { return conditions_; } |
| 246 | 260 |
| 247 bool IsMatch( | 261 bool IsMatch(const std::set<StringPattern::ID>& matching_patterns, |
| 248 const std::set<SubstringPattern::ID>& matching_substring_patterns, | 262 const GURL& url) const; |
| 249 const GURL& url) const; | |
| 250 | 263 |
| 251 private: | 264 private: |
| 252 friend class base::RefCounted<URLMatcherConditionSet>; | 265 friend class base::RefCounted<URLMatcherConditionSet>; |
| 253 ~URLMatcherConditionSet(); | 266 ~URLMatcherConditionSet(); |
| 254 ID id_; | 267 ID id_; |
| 255 Conditions conditions_; | 268 Conditions conditions_; |
| 256 scoped_ptr<URLMatcherSchemeFilter> scheme_filter_; | 269 scoped_ptr<URLMatcherSchemeFilter> scheme_filter_; |
| 257 scoped_ptr<URLMatcherPortFilter> port_filter_; | 270 scoped_ptr<URLMatcherPortFilter> port_filter_; |
| 258 | 271 |
| 259 DISALLOW_COPY_AND_ASSIGN(URLMatcherConditionSet); | 272 DISALLOW_COPY_AND_ASSIGN(URLMatcherConditionSet); |
| (...skipping 29 matching lines...) Expand all Loading... |
| 289 // URLMatcherConditionSets for this URLMatcher. | 302 // URLMatcherConditionSets for this URLMatcher. |
| 290 URLMatcherConditionFactory* condition_factory() { | 303 URLMatcherConditionFactory* condition_factory() { |
| 291 return &condition_factory_; | 304 return &condition_factory_; |
| 292 } | 305 } |
| 293 | 306 |
| 294 // Returns true if this object retains no allocated data. Only for debugging. | 307 // Returns true if this object retains no allocated data. Only for debugging. |
| 295 bool IsEmpty() const; | 308 bool IsEmpty() const; |
| 296 | 309 |
| 297 private: | 310 private: |
| 298 void UpdateSubstringSetMatcher(bool full_url_conditions); | 311 void UpdateSubstringSetMatcher(bool full_url_conditions); |
| 312 void UpdateRegexSetMatcher(); |
| 299 void UpdateTriggers(); | 313 void UpdateTriggers(); |
| 300 void UpdateConditionFactory(); | 314 void UpdateConditionFactory(); |
| 301 void UpdateInternalDatastructures(); | 315 void UpdateInternalDatastructures(); |
| 302 | 316 |
| 303 URLMatcherConditionFactory condition_factory_; | 317 URLMatcherConditionFactory condition_factory_; |
| 304 | 318 |
| 305 // Maps the ID of a URLMatcherConditionSet to the respective | 319 // Maps the ID of a URLMatcherConditionSet to the respective |
| 306 // URLMatcherConditionSet. | 320 // URLMatcherConditionSet. |
| 307 typedef std::map<URLMatcherConditionSet::ID, | 321 typedef std::map<URLMatcherConditionSet::ID, |
| 308 scoped_refptr<URLMatcherConditionSet> > | 322 scoped_refptr<URLMatcherConditionSet> > |
| 309 URLMatcherConditionSets; | 323 URLMatcherConditionSets; |
| 310 URLMatcherConditionSets url_matcher_condition_sets_; | 324 URLMatcherConditionSets url_matcher_condition_sets_; |
| 311 | 325 |
| 312 // Maps a SubstringPattern ID to the URLMatcherConditions that need to | 326 // Maps a StringPattern ID to the URLMatcherConditions that need to |
| 313 // be triggered in case of a SubstringPattern match. | 327 // be triggered in case of a StringPattern match. |
| 314 std::map<SubstringPattern::ID, std::set<URLMatcherConditionSet::ID> > | 328 std::map<StringPattern::ID, std::set<URLMatcherConditionSet::ID> > |
| 315 substring_match_triggers_; | 329 substring_match_triggers_; |
| 316 | 330 |
| 317 SubstringSetMatcher full_url_matcher_; | 331 SubstringSetMatcher full_url_matcher_; |
| 318 SubstringSetMatcher url_component_matcher_; | 332 SubstringSetMatcher url_component_matcher_; |
| 319 std::set<const SubstringPattern*> registered_full_url_patterns_; | 333 RegexSetMatcher regex_set_matcher_; |
| 320 std::set<const SubstringPattern*> registered_url_component_patterns_; | 334 std::set<const StringPattern*> registered_full_url_patterns_; |
| 335 std::set<const StringPattern*> registered_url_component_patterns_; |
| 321 | 336 |
| 322 DISALLOW_COPY_AND_ASSIGN(URLMatcher); | 337 DISALLOW_COPY_AND_ASSIGN(URLMatcher); |
| 323 }; | 338 }; |
| 324 | 339 |
| 325 } // namespace extensions | 340 } // namespace extensions |
| 326 | 341 |
| 327 #endif // CHROME_COMMON_EXTENSIONS_MATCHER_URL_MATCHER_H_ | 342 #endif // CHROME_COMMON_EXTENSIONS_MATCHER_URL_MATCHER_H_ |
| OLD | NEW |