Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #ifndef CHROME_BROWSER_EXTENSIONS_API_DECLARATIVE_URL_MATCHER_H_ | |
| 6 #define CHROME_BROWSER_EXTENSIONS_API_DECLARATIVE_URL_MATCHER_H_ | |
| 7 #pragma once | |
| 8 | |
| 9 #include <set> | |
| 10 #include <vector> | |
| 11 | |
| 12 #include "base/hash_tables.h" | |
| 13 #include "base/memory/scoped_ptr.h" | |
| 14 #include "base/memory/scoped_vector.h" | |
| 15 #include "chrome/browser/extensions/api/declarative/substring_set_matcher.h" | |
| 16 | |
| 17 class GURL; | |
| 18 | |
| 19 namespace base { | |
| 20 class DictionaryValue; | |
| 21 } | |
| 22 | |
| 23 namespace extensions { | |
| 24 | |
| 25 // This class represents a single URL matching condition, e.g. a match on the | |
| 26 // host suffix or the containment of a string in the query component of a GURL. | |
| 27 // | |
| 28 // The difference from a simple SubstringPattern is that this also supports | |
| 29 // checking whether the {Host, Path, Query} of a URL contains a string. The | |
| 30 // reduction of URL matching conditions to StringPatterns conducted by | |
| 31 // URLMatcherConditionFactory is not capable of expressing that alone. | |
| 32 class URLMatcherCondition { | |
| 33 public: | |
| 34 enum Criterion { | |
| 35 HOST_PREFIX, | |
| 36 HOST_SUFFIX, | |
| 37 HOST_CONTAINS, | |
| 38 HOST_EQUALS, | |
| 39 PATH_PREFIX, | |
| 40 PATH_SUFFIX, | |
| 41 PATH_CONTAINS, | |
| 42 PATH_EQUALS, | |
| 43 QUERY_PREFIX, | |
| 44 QUERY_SUFFIX, | |
| 45 QUERY_CONTAINS, | |
| 46 QUERY_EQUALS, | |
| 47 HOST_SUFFIX_PATH_PREFIX, | |
| 48 URL_PREFIX, | |
| 49 URL_SUFFIX, | |
| 50 URL_CONTAINS, | |
| 51 URL_EQUALS, | |
| 52 }; | |
| 53 | |
| 54 URLMatcherCondition(); | |
| 55 URLMatcherCondition(Criterion criterion, | |
| 56 const SubstringPattern* substring_pattern); | |
| 57 URLMatcherCondition(const URLMatcherCondition& rhs); | |
| 58 URLMatcherCondition& operator=(const URLMatcherCondition& rhs); | |
| 59 bool operator<(const URLMatcherCondition& rhs) const; | |
| 60 | |
| 61 Criterion criterion() const { return criterion_; } | |
| 62 const SubstringPattern* substring_pattern() const { | |
| 63 return substring_pattern_; | |
| 64 } | |
| 65 | |
| 66 // Returns whether this URLMatcherCondition needs to be executed on a | |
| 67 // full URL rather than the individual components (see | |
| 68 // URLMatcherConditionFactory). | |
| 69 bool IsFullURLCondition() const; | |
| 70 | |
| 71 // Returns whether this condition is fulfilled according to | |
| 72 // |matching_substring_patterns| and |url|. | |
| 73 bool IsMatch( | |
| 74 const std::set<SubstringPattern::ID>& matching_substring_patterns, | |
| 75 const GURL& url) const; | |
| 76 | |
| 77 private: | |
| 78 // |criterion_| and |substring_pattern_| describe together what property a URL | |
| 79 // needs to fulfill to be considered a match. | |
| 80 Criterion criterion_; | |
| 81 | |
| 82 // This is the SubstringPattern that is used in a SubstringSetMatcher. | |
| 83 const SubstringPattern* substring_pattern_; | |
| 84 }; | |
| 85 | |
| 86 // Class to map the problem of finding {host, path, query} {prefixes, suffixes, | |
| 87 // containments, and equality} in GURLs to the substring matching problem. | |
| 88 // | |
| 89 // Say, you want to check whether the path of a URL starts with "/index.html". | |
| 90 // This class preprocesses a URL like "www.google.com/index.html" into something | |
| 91 // like "www.google.com|/index.html". After preprocessing, you can search for | |
| 92 // "|/index.html" in the string and see that this candidate URL actually has | |
| 93 // a path that starts with "/index.html". On the contrary, | |
| 94 // "www.google.com/images/index.html" would be normalized to | |
| 95 // "www.google.com|/images/index.html". It is easy to see that it contains | |
| 96 // "/index.html" but the path of the URL does not start with "/index.html". | |
| 97 // | |
| 98 // This preprocessing is important if you want to match a URL against many | |
| 99 // patterns because it reduces the matching to a "discover all substrings | |
| 100 // of a dictionary in a text" problem, which can be solved very efficiently | |
| 101 // by the Aho-Corasick algorithm. | |
| 102 // | |
| 103 // IMPORTANT: The URLMatcherConditionFactory owns the SubstringPattern | |
| 104 // referenced by created URLMatcherConditions. Therefore, it must outlive | |
| 105 // all created URLMatcherCondition and the SubstringSetMatcher. | |
| 106 class URLMatcherConditionFactory { | |
| 107 public: | |
| 108 URLMatcherConditionFactory(); | |
| 109 ~URLMatcherConditionFactory(); | |
| 110 | |
| 111 // Canonicalizes a URL for "Create{Host,Path,Query}*Condition" searches. | |
| 112 std::string CanonlicalizeURLForComponentSearches(const GURL& url); | |
|
Matt Perry
2012/02/15 22:45:18
typo: Canonicalize*
battre
2012/02/16 14:45:55
Done.
| |
| 113 | |
| 114 // Factory methods for various condition types. | |
| 115 URLMatcherCondition CreateHostPrefixCondition(const std::string& prefix); | |
| 116 URLMatcherCondition CreateHostSuffixCondition(const std::string& suffix); | |
| 117 URLMatcherCondition CreateHostContainsCondition(const std::string& str); | |
| 118 URLMatcherCondition CreateHostEqualsCondition(const std::string& str); | |
| 119 | |
| 120 URLMatcherCondition CreatePathPrefixCondition(const std::string& prefix); | |
| 121 URLMatcherCondition CreatePathSuffixCondition(const std::string& suffix); | |
| 122 URLMatcherCondition CreatePathContainsCondition(const std::string& str); | |
| 123 URLMatcherCondition CreatePathEqualsCondition(const std::string& str); | |
| 124 | |
| 125 URLMatcherCondition CreateQueryPrefixCondition(const std::string& prefix); | |
| 126 URLMatcherCondition CreateQuerySuffixCondition(const std::string& suffix); | |
| 127 URLMatcherCondition CreateQueryContainsCondition(const std::string& str); | |
| 128 URLMatcherCondition CreateQueryEqualsCondition(const std::string& str); | |
| 129 | |
| 130 // This covers the common case, where you don't care whether a domain | |
| 131 // "foobar.com" is expressed as "foobar.com" or "www.foobar.com", and it | |
| 132 // should be followed by a given |path_prefix|. | |
| 133 URLMatcherCondition CreateHostSuffixPathPrefixCondition( | |
| 134 const std::string& host_suffix, | |
| 135 const std::string& path_prefix); | |
| 136 | |
| 137 // Canonicalizes a URL for "CreateURL*Condition" searches. | |
| 138 std::string CanonlicalizeURLForFullSearches(const GURL& url); | |
|
Matt Perry
2012/02/15 22:45:18
ditto typo
battre
2012/02/16 14:45:55
Done.
| |
| 139 | |
| 140 URLMatcherCondition CreateURLPrefixCondition(const std::string& prefix); | |
| 141 URLMatcherCondition CreateURLSuffixCondition(const std::string& suffix); | |
| 142 URLMatcherCondition CreateURLContainsCondition(const std::string& str); | |
| 143 URLMatcherCondition CreateURLEqualsCondition(const std::string& str); | |
| 144 | |
| 145 // Removes all patterns from |pattern_singletons_| that are not listed in | |
| 146 // |used_patterns|. These patterns are not referenced any more and get | |
| 147 // freed. | |
| 148 void ForgetUnusedPatterns( | |
| 149 const std::set<SubstringPattern::ID>& used_patterns); | |
| 150 | |
| 151 private: | |
| 152 // Creates a URLMatcherCondition according to the parameters passed. | |
| 153 // The URLMatcherCondition will refer to a SubstringPattern that is | |
| 154 // owned by |pattern_singletons_|. | |
| 155 URLMatcherCondition CreateCondition(URLMatcherCondition::Criterion criterion, | |
| 156 const std::string& pattern); | |
| 157 | |
| 158 // Prepends a "." to the hostname if it does not start with one. | |
| 159 std::string CanonicalizeHostname(const std::string& hostname) const; | |
| 160 | |
| 161 // Counter that ensures that all created SubstringPatterns have unique IDs. | |
| 162 int id_counter_; | |
| 163 | |
| 164 // These two functions consider only the pattern() value of the | |
| 165 // SubstringPatterns. | |
| 166 struct HashFunction { | |
| 167 size_t operator()(SubstringPattern* substring_pattern) const; | |
| 168 }; | |
| 169 struct EqualsFunction { | |
| 170 bool operator()(SubstringPattern* lhs, SubstringPattern* rhs) const; | |
| 171 }; | |
| 172 // Hash set to ensure that we generate only one SubstringPattern for each | |
| 173 // content of SubstringPattern::pattern(). | |
| 174 typedef base::hash_set<SubstringPattern*, HashFunction, EqualsFunction> | |
| 175 PatternSingletons; | |
| 176 PatternSingletons pattern_singletons_; | |
| 177 | |
| 178 DISALLOW_COPY_AND_ASSIGN(URLMatcherConditionFactory); | |
| 179 }; | |
| 180 | |
| 181 // This class represents a set of conditions that all need to match on a | |
| 182 // given URL in order to be considered a match. | |
| 183 class URLMatcherConditionSet { | |
| 184 public: | |
| 185 typedef int ID; | |
| 186 typedef std::set<URLMatcherCondition> Conditions; | |
| 187 | |
| 188 URLMatcherConditionSet(); | |
| 189 URLMatcherConditionSet(ID id, const Conditions& conditions); | |
| 190 URLMatcherConditionSet(const URLMatcherConditionSet& rhs); | |
| 191 URLMatcherConditionSet& operator=(const URLMatcherConditionSet& rhs); | |
| 192 | |
| 193 ID id() const { return id_; } | |
| 194 const Conditions& conditions() const { return conditions_; } | |
| 195 | |
| 196 bool IsMatch( | |
| 197 const std::set<SubstringPattern::ID>& matching_substring_patterns, | |
| 198 const GURL& url) const; | |
| 199 | |
| 200 private: | |
| 201 ID id_; | |
| 202 Conditions conditions_; | |
| 203 }; | |
| 204 | |
| 205 // This class allows matching one URL against a large set of | |
| 206 // URLMatcherConditionSets at the same time. | |
| 207 class URLMatcher { | |
| 208 public: | |
| 209 URLMatcher(); | |
| 210 | |
| 211 // Adds new URLMatcherConditionSet to this URL Matcher. Each condition set | |
| 212 // must have a unique ID. | |
| 213 // This is an expensive operation as it triggers pre-calculations on the | |
| 214 // currently registered condition sets. Do not call this operation many | |
| 215 // times with a single condition set in each call. | |
| 216 void AddConditionSets( | |
| 217 const std::vector<URLMatcherConditionSet>& condition_sets); | |
| 218 | |
| 219 // Removes the listed condition sets. All |condition_set_ids| must be | |
| 220 // currently registered. This function should be called with large batches | |
| 221 // of |condition_set_ids| at a time to improve performance. | |
| 222 void RemoveConditionSets( | |
| 223 const std::vector<URLMatcherConditionSet::ID>& condition_set_ids); | |
| 224 | |
| 225 // Returns the IDs of all URLMatcherConditionSet that match to this |url|. | |
| 226 std::set<URLMatcherConditionSet::ID> MatchURL(const GURL& url); | |
| 227 | |
| 228 // Returns the URLMatcherConditionFactory that must be used to create | |
| 229 // URLMatcherConditionSets for this URLMatcher. | |
| 230 URLMatcherConditionFactory* condition_factory() { | |
| 231 return &condition_factory_; | |
| 232 } | |
| 233 | |
| 234 private: | |
| 235 void UpdateSubstringSetMatcher(bool full_url_conditions); | |
| 236 void UpdateTriggers(); | |
| 237 void UpdateConditionFactory(); | |
| 238 void UpdateInternalDatastructures(); | |
| 239 | |
| 240 URLMatcherConditionFactory condition_factory_; | |
| 241 | |
| 242 // Maps the ID of a URLMatcherConditionSet to the respective | |
| 243 // URLMatcherConditionSet. | |
| 244 typedef std::map<URLMatcherConditionSet::ID, URLMatcherConditionSet> | |
| 245 URLMatcherConditionSets; | |
| 246 URLMatcherConditionSets url_matcher_condition_sets_; | |
| 247 | |
| 248 // Maps a SubstringPattern ID to the URLMatcherConditions that need to | |
| 249 // be triggered in case of a SubstringPatter match. | |
|
Matt Perry
2012/02/15 22:45:18
SubstringPattern*
battre
2012/02/16 14:45:55
Done.
| |
| 250 std::map<SubstringPattern::ID, std::set<URLMatcherConditionSet::ID> > | |
| 251 substring_match_triggers_; | |
| 252 | |
| 253 SubstringSetMatcher full_url_matcher_; | |
| 254 SubstringSetMatcher url_component_matcher_; | |
| 255 std::set<const SubstringPattern*> registered_full_url_patterns_; | |
| 256 std::set<const SubstringPattern*> registered_url_component_patterns_; | |
| 257 | |
| 258 DISALLOW_COPY_AND_ASSIGN(URLMatcher); | |
| 259 }; | |
| 260 | |
| 261 } // namespace extensions | |
| 262 | |
| 263 #endif // CHROME_BROWSER_EXTENSIONS_API_DECLARATIVE_URL_MATCHER_H_ | |
| OLD | NEW |