Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #ifndef CHROME_BROWSER_EXTENSIONS_API_DECLARATIVE_URL_MATCHER_H_ | |
| 6 #define CHROME_BROWSER_EXTENSIONS_API_DECLARATIVE_URL_MATCHER_H_ | |
| 7 #pragma once | |
| 8 | |
| 9 #include <set> | |
| 10 #include <vector> | |
| 11 | |
| 12 #include "base/memory/linked_ptr.h" | |
| 13 #include "base/memory/scoped_ptr.h" | |
| 14 #include "base/memory/scoped_vector.h" | |
| 15 #include "chrome/browser/extensions/api/declarative/substring_set_matcher.h" | |
| 16 #include "googleurl/src/gurl.h" | |
| 17 | |
| 18 namespace base { | |
| 19 class DictionaryValue; | |
| 20 } | |
| 21 | |
| 22 namespace extensions { | |
| 23 | |
| 24 // This class represents a single URL matching condition, e.g. a match on the | |
| 25 // host suffix or the containment of a string in the query component of a GURL. | |
| 26 // | |
| 27 // The difference from a SubstringPattern created by URLMatcherConditionFactory | |
| 28 // is that this also supports matches to check whether {Host, Path, Query} of a | |
| 29 // URL contains a string. | |
| 30 class URLMatcherCondition { | |
| 31 public: | |
| 32 enum Criterion { | |
| 33 HOST_PREFIX, | |
| 34 HOST_SUFFIX, | |
| 35 HOST_CONTAINS, | |
| 36 HOST_EQUALS, | |
| 37 PATH_PREFIX, | |
| 38 PATH_SUFFIX, | |
| 39 PATH_CONTAINS, | |
| 40 PATH_EQUALS, | |
| 41 QUERY_PREFIX, | |
| 42 QUERY_SUFFIX, | |
| 43 QUERY_CONTAINS, | |
| 44 QUERY_EQUALS, | |
| 45 HOST_SUFFIX_PATH_PREFIX, | |
| 46 URL_PREFIX, | |
| 47 URL_SUFFIX, | |
| 48 URL_CONTAINS, | |
| 49 URL_EQUALS, | |
| 50 }; | |
| 51 | |
| 52 URLMatcherCondition(Criterion criterion, | |
| 53 const SubstringPattern* substring_pattern); | |
| 54 | |
| 55 Criterion criterion() const { return criterion_; } | |
| 56 const SubstringPattern* substring_pattern() const { | |
| 57 return substring_pattern_; | |
| 58 } | |
| 59 | |
| 60 // Returns whether this URLMatcherCondition needs to be executed on a | |
| 61 // full URL rather than the individual components (see | |
| 62 // URLMatcherConditionFactory). | |
| 63 bool IsFullUrlCondition() const; | |
| 64 | |
| 65 // Returns whether this condition is fulfilled according to | |
| 66 // |matching_substring_patterns| and |url|. | |
| 67 bool IsMatch( | |
| 68 const std::set<SubstringPattern::ID>& matching_substring_patterns, | |
| 69 const GURL& url) const; | |
| 70 | |
| 71 private: | |
| 72 // |criterion_| and |substring_pattern_| describe together what property a URL | |
| 73 // needs to fulfill to be considered a match. | |
| 74 Criterion criterion_; | |
| 75 | |
| 76 // This is the URLMatcherCondition that is used in a SubstringSetMatcher. | |
| 77 // It becomes valid after BuildSubstringPattern has been called. | |
| 78 const SubstringPattern* substring_pattern_; | |
| 79 }; | |
| 80 | |
| 81 // Class to map the problem of finding {host, path, query} {prefixes, suffixes, | |
| 82 // containments, and equality} in GURLs to the substring matching problem. | |
| 83 // | |
| 84 // Say, you want to check whether the path of a URL starts with "/index.html". | |
| 85 // This class preprocesses a URL like "www.google.com/index.html" into something | |
| 86 // like "www.google.com|/index.html". After preprocessing, you can search for | |
| 87 // "|/index.html" in the string and see that this candidate URL actually has | |
| 88 // a path that starts with "/index.html". On the contrary, | |
| 89 // "www.google.com/images/index.html" would be normalized to | |
| 90 // "www.google.com|/images/index.html". It is easy to see that it contains | |
| 91 // "/index.html" but the path of the URL does not start with "/index.html". | |
| 92 // | |
| 93 // This preprocessing is important if you want to match a URL against many | |
| 94 // patterns because it reduces the matching to a "discover all substrings | |
| 95 // of a dictionary in a text" problem, which can be solved very efficiently | |
| 96 // by the Aho-Corasick algorithm. | |
| 97 // | |
| 98 // IMPORTANT: The URLMatcherConditionFactory owns the SubstringPattern | |
| 99 // referenced by created URLMatcherConditions. Therefore, it must outlive | |
| 100 // all created URLMatcherCondition and the SubstringSetMatcher. | |
| 101 class URLMatcherConditionFactory { | |
| 102 public: | |
| 103 URLMatcherConditionFactory(); | |
| 104 | |
| 105 // Canonicalizes a URL for "Create{Host,Path,Query}*Condition" searches. | |
| 106 std::string CanonlicalizeURLForComponentSearches(const GURL& url); | |
| 107 | |
| 108 // Factory methods for various condition types. | |
| 109 scoped_ptr<URLMatcherCondition> CreateHostPrefixCondition( | |
|
Matt Perry
2012/02/14 19:56:16
An URLMatcherCondition is now just a Criterion and
battre
2012/02/14 21:56:42
Done.
| |
| 110 const std::string& prefix); | |
| 111 scoped_ptr<URLMatcherCondition> CreateHostSuffixCondition( | |
| 112 const std::string& suffix); | |
| 113 scoped_ptr<URLMatcherCondition> CreateHostContainsCondition( | |
| 114 const std::string& str); | |
| 115 scoped_ptr<URLMatcherCondition> CreateHostEqualsCondition( | |
| 116 const std::string& str); | |
| 117 | |
| 118 scoped_ptr<URLMatcherCondition> CreatePathPrefixCondition( | |
| 119 const std::string& prefix); | |
| 120 scoped_ptr<URLMatcherCondition> CreatePathSuffixCondition( | |
| 121 const std::string& suffix); | |
| 122 scoped_ptr<URLMatcherCondition> CreatePathContainsCondition( | |
| 123 const std::string& str); | |
| 124 scoped_ptr<URLMatcherCondition> CreatePathEqualsCondition( | |
| 125 const std::string& str); | |
| 126 | |
| 127 scoped_ptr<URLMatcherCondition> CreateQueryPrefixCondition( | |
| 128 const std::string& prefix); | |
| 129 scoped_ptr<URLMatcherCondition> CreateQuerySuffixCondition( | |
| 130 const std::string& suffix); | |
| 131 scoped_ptr<URLMatcherCondition> CreateQueryContainsCondition( | |
| 132 const std::string& str); | |
| 133 scoped_ptr<URLMatcherCondition> CreateQueryEqualsCondition( | |
| 134 const std::string& str); | |
| 135 | |
| 136 // This covers the common case, where you don't care whether a domain | |
| 137 // "foobar.com" is expressed as "foobar.com" or "www.foobar.com", and it | |
| 138 // should be followed by a given |path_prefix|. | |
| 139 scoped_ptr<URLMatcherCondition> CreateHostSuffixPathPrefixCondition( | |
| 140 const std::string& host_suffix, | |
| 141 const std::string& path_prefix); | |
| 142 | |
| 143 // Canonicalizes a URL for "CreateURL*Condition" searches. | |
| 144 std::string CanonlicalizeURLForFullSearches(const GURL& url); | |
| 145 | |
| 146 scoped_ptr<URLMatcherCondition> CreateURLPrefixCondition( | |
| 147 const std::string& prefix); | |
| 148 scoped_ptr<URLMatcherCondition> CreateURLSuffixCondition( | |
| 149 const std::string& suffix); | |
| 150 scoped_ptr<URLMatcherCondition> CreateURLContainsCondition( | |
| 151 const std::string& str); | |
| 152 scoped_ptr<URLMatcherCondition> CreateURLEqualsCondition( | |
| 153 const std::string& str); | |
| 154 | |
| 155 // Removes all patterns from |pattern_singletons_| that are not listed in | |
| 156 // |used_patterns|. These patterns are not referenced any more and may be | |
| 157 // freed. | |
| 158 void ForgetUnusedPatterns( | |
| 159 const std::set<SubstringPattern::ID>& used_patterns); | |
| 160 | |
| 161 private: | |
| 162 // Creates a URLMatcherCondition according to the parameters passed. | |
| 163 // The URLMatcherCondition will refer to a SubstringPattern that is | |
| 164 // owned by |pattern_singletons_|. | |
| 165 scoped_ptr<URLMatcherCondition> CreateCondition( | |
| 166 URLMatcherCondition::Criterion criterion, | |
| 167 const std::string& pattern); | |
| 168 | |
| 169 // Prepends a "." to the hostname if it does not start with one. | |
| 170 std::string CanonicalizeHostname(const std::string& hostname) const; | |
| 171 | |
| 172 // Counter that ensures that all created SubstringPatterns have unique IDs. | |
| 173 int id_counter_; | |
| 174 | |
| 175 // Maps the pattern() value of a SubstringPattern to the instance. | |
| 176 typedef std::map<std::string, linked_ptr<const SubstringPattern> > | |
| 177 PatternSingletons; | |
| 178 PatternSingletons pattern_singletons_; | |
|
Matt Perry
2012/02/14 20:47:47
Also, this should use a set, or a hash_set if you
battre
2012/02/14 21:56:42
Done.
| |
| 179 }; | |
| 180 | |
| 181 // This class represents a set of conditions that all need to match on a | |
| 182 // given URL in order to be considered a match. | |
| 183 class URLMatcherConditionSet { | |
| 184 public: | |
| 185 typedef int ID; | |
| 186 typedef ScopedVector<const URLMatcherCondition> Conditions; | |
|
Matt Perry
2012/02/14 19:56:16
Likewise, this is overkill. Just make it a std::se
battre
2012/02/14 21:56:42
Done.
| |
| 187 | |
| 188 URLMatcherConditionSet(ID id, scoped_ptr<Conditions> conditions); | |
| 189 | |
| 190 ID id() const { return id_; } | |
| 191 const Conditions& conditions() const { return conditions_; } | |
| 192 | |
| 193 bool IsMatch( | |
| 194 const std::set<SubstringPattern::ID>& matching_substring_patterns, | |
| 195 const GURL& url) const; | |
| 196 | |
| 197 private: | |
| 198 ID id_; | |
| 199 Conditions conditions_; | |
| 200 }; | |
| 201 | |
| 202 // This class allows matching one URL against a large set of | |
| 203 // URLMatcherConditionSets at the same time. | |
| 204 class URLMatcher { | |
| 205 public: | |
| 206 URLMatcher(); | |
| 207 | |
| 208 void AddConditionSets( | |
| 209 scoped_ptr<ScopedVector<const URLMatcherConditionSet> > condition_sets); | |
|
Matt Perry
2012/02/14 19:56:16
Ditto, overkill. Pass by const ref.
battre
2012/02/14 21:56:42
Done.
| |
| 210 void RemoveConditionSets( | |
| 211 const std::vector<URLMatcherConditionSet::ID>& condition_ids); | |
| 212 | |
| 213 std::set<URLMatcherConditionSet::ID> MatchUrl(const GURL& url); | |
|
Matt Perry
2012/02/14 23:52:25
MatchURL
| |
| 214 | |
| 215 URLMatcherConditionFactory* condition_factory() { | |
| 216 return &condition_factory_; | |
| 217 } | |
| 218 | |
| 219 private: | |
| 220 void UpdateSubstringSetMatcher(bool full_url_conditions); | |
| 221 void UpdateTriggers(); | |
| 222 void UpdateConditionFactory(); | |
| 223 void UpdateInternalDatastructures(); | |
| 224 | |
| 225 URLMatcherConditionFactory condition_factory_; | |
| 226 | |
| 227 // Maps a condition ID (as passed to AddConditions()) to the respective | |
| 228 // URLMatcherConditionSet. | |
| 229 typedef std::map<URLMatcherConditionSet::ID, | |
| 230 linked_ptr<const URLMatcherConditionSet> > | |
| 231 URLMatcherConditionSets; | |
| 232 URLMatcherConditionSets url_matcher_condition_sets_; | |
| 233 | |
| 234 // Maps a SubstringPattern ID to the URLMatcherConditions that need to | |
| 235 // be triggered in case of a SubstringPatter match. | |
| 236 std::map<SubstringPattern::ID, std::set<URLMatcherConditionSet::ID> > | |
| 237 substring_match_triggers_; | |
| 238 | |
| 239 SubstringSetMatcher full_url_matcher_; | |
| 240 SubstringSetMatcher url_component_matcher_; | |
| 241 std::set<const SubstringPattern*> registered_full_url_patterns_; | |
| 242 std::set<const SubstringPattern*> registered_url_component_patterns_; | |
| 243 | |
| 244 DISALLOW_COPY_AND_ASSIGN(URLMatcher); | |
| 245 }; | |
| 246 | |
| 247 } // namespace extensions | |
| 248 | |
| 249 #endif // CHROME_BROWSER_EXTENSIONS_API_DECLARATIVE_URL_MATCHER_H_ | |
| OLD | NEW |