OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef CHROME_COMMON_EXTENSIONS_MATCHER_URL_MATCHER_H_ | 5 #ifndef CHROME_COMMON_EXTENSIONS_MATCHER_URL_MATCHER_H_ |
6 #define CHROME_COMMON_EXTENSIONS_MATCHER_URL_MATCHER_H_ | 6 #define CHROME_COMMON_EXTENSIONS_MATCHER_URL_MATCHER_H_ |
7 | 7 |
8 #include <set> | 8 #include <set> |
9 #include <vector> | 9 #include <vector> |
10 | 10 |
11 #include "base/memory/ref_counted.h" | 11 #include "base/memory/ref_counted.h" |
12 #include "base/memory/scoped_ptr.h" | 12 #include "base/memory/scoped_ptr.h" |
13 #include "base/memory/scoped_vector.h" | 13 #include "base/memory/scoped_vector.h" |
| 14 #include "chrome/common/extensions/matcher/regex_set_matcher.h" |
14 #include "chrome/common/extensions/matcher/substring_set_matcher.h" | 15 #include "chrome/common/extensions/matcher/substring_set_matcher.h" |
15 | 16 |
16 class GURL; | 17 class GURL; |
17 | 18 |
18 namespace base { | 19 namespace base { |
19 class DictionaryValue; | 20 class DictionaryValue; |
20 } | 21 } |
21 | 22 |
22 namespace extensions { | 23 namespace extensions { |
23 | 24 |
24 // This class represents a single URL matching condition, e.g. a match on the | 25 // This class represents a single URL matching condition, e.g. a match on the |
25 // host suffix or the containment of a string in the query component of a GURL. | 26 // host suffix or the containment of a string in the query component of a GURL. |
26 // | 27 // |
27 // The difference from a simple SubstringPattern is that this also supports | 28 // The difference from a simple StringPattern is that this also supports |
28 // checking whether the {Host, Path, Query} of a URL contains a string. The | 29 // checking whether the {Host, Path, Query} of a URL contains a string. The |
29 // reduction of URL matching conditions to StringPatterns conducted by | 30 // reduction of URL matching conditions to StringPatterns conducted by |
30 // URLMatcherConditionFactory is not capable of expressing that alone. | 31 // URLMatcherConditionFactory is not capable of expressing that alone. |
| 32 // |
| 33 // Also supported is matching regular expressions against the URL (URL_MATCHES). |
31 class URLMatcherCondition { | 34 class URLMatcherCondition { |
32 public: | 35 public: |
33 enum Criterion { | 36 enum Criterion { |
34 HOST_PREFIX, | 37 HOST_PREFIX, |
35 HOST_SUFFIX, | 38 HOST_SUFFIX, |
36 HOST_CONTAINS, | 39 HOST_CONTAINS, |
37 HOST_EQUALS, | 40 HOST_EQUALS, |
38 PATH_PREFIX, | 41 PATH_PREFIX, |
39 PATH_SUFFIX, | 42 PATH_SUFFIX, |
40 PATH_CONTAINS, | 43 PATH_CONTAINS, |
41 PATH_EQUALS, | 44 PATH_EQUALS, |
42 QUERY_PREFIX, | 45 QUERY_PREFIX, |
43 QUERY_SUFFIX, | 46 QUERY_SUFFIX, |
44 QUERY_CONTAINS, | 47 QUERY_CONTAINS, |
45 QUERY_EQUALS, | 48 QUERY_EQUALS, |
46 HOST_SUFFIX_PATH_PREFIX, | 49 HOST_SUFFIX_PATH_PREFIX, |
47 HOST_EQUALS_PATH_PREFIX, | 50 HOST_EQUALS_PATH_PREFIX, |
48 URL_PREFIX, | 51 URL_PREFIX, |
49 URL_SUFFIX, | 52 URL_SUFFIX, |
50 URL_CONTAINS, | 53 URL_CONTAINS, |
51 URL_EQUALS, | 54 URL_EQUALS, |
| 55 URL_MATCHES, |
52 }; | 56 }; |
53 | 57 |
54 URLMatcherCondition(); | 58 URLMatcherCondition(); |
55 ~URLMatcherCondition(); | 59 ~URLMatcherCondition(); |
56 URLMatcherCondition(Criterion criterion, | 60 URLMatcherCondition(Criterion criterion, |
57 const SubstringPattern* substring_pattern); | 61 const StringPattern* substring_pattern); |
58 URLMatcherCondition(const URLMatcherCondition& rhs); | 62 URLMatcherCondition(const URLMatcherCondition& rhs); |
59 URLMatcherCondition& operator=(const URLMatcherCondition& rhs); | 63 URLMatcherCondition& operator=(const URLMatcherCondition& rhs); |
60 bool operator<(const URLMatcherCondition& rhs) const; | 64 bool operator<(const URLMatcherCondition& rhs) const; |
61 | 65 |
62 Criterion criterion() const { return criterion_; } | 66 Criterion criterion() const { return criterion_; } |
63 const SubstringPattern* substring_pattern() const { | 67 const StringPattern* string_pattern() const { |
64 return substring_pattern_; | 68 return string_pattern_; |
65 } | 69 } |
66 | 70 |
67 // Returns whether this URLMatcherCondition needs to be executed on a | 71 // Returns whether this URLMatcherCondition needs to be executed on a |
68 // full URL rather than the individual components (see | 72 // full URL rather than the individual components (see |
69 // URLMatcherConditionFactory). | 73 // URLMatcherConditionFactory). |
70 bool IsFullURLCondition() const; | 74 bool IsFullURLCondition() const; |
71 | 75 |
| 76 // Returns whether this URLMatcherCondition is a regular expression to be |
| 77 // handled by a regex matcher instead of a substring matcher. |
| 78 bool IsRegexCondition() const; |
| 79 |
72 // Returns whether this condition is fulfilled according to | 80 // Returns whether this condition is fulfilled according to |
73 // |matching_substring_patterns| and |url|. | 81 // |matching_patterns| and |url|. |
74 bool IsMatch( | 82 bool IsMatch(const std::set<StringPattern::ID>& matching_patterns, |
75 const std::set<SubstringPattern::ID>& matching_substring_patterns, | 83 const GURL& url) const; |
76 const GURL& url) const; | |
77 | 84 |
78 private: | 85 private: |
79 // |criterion_| and |substring_pattern_| describe together what property a URL | 86 // |criterion_| and |string_pattern_| describe together what property a URL |
80 // needs to fulfill to be considered a match. | 87 // needs to fulfill to be considered a match. |
81 Criterion criterion_; | 88 Criterion criterion_; |
82 | 89 |
83 // This is the SubstringPattern that is used in a SubstringSetMatcher. | 90 // This is the StringPattern that is used in a SubstringSetMatcher. |
84 const SubstringPattern* substring_pattern_; | 91 const StringPattern* string_pattern_; |
85 }; | 92 }; |
86 | 93 |
87 // Class to map the problem of finding {host, path, query} {prefixes, suffixes, | 94 // Class to map the problem of finding {host, path, query} {prefixes, suffixes, |
88 // containments, and equality} in GURLs to the substring matching problem. | 95 // containments, and equality} in GURLs to the substring matching problem. |
89 // | 96 // |
90 // Say, you want to check whether the path of a URL starts with "/index.html". | 97 // Say, you want to check whether the path of a URL starts with "/index.html". |
91 // This class preprocesses a URL like "www.google.com/index.html" into something | 98 // This class preprocesses a URL like "www.google.com/index.html" into something |
92 // like "www.google.com|/index.html". After preprocessing, you can search for | 99 // like "www.google.com|/index.html". After preprocessing, you can search for |
93 // "|/index.html" in the string and see that this candidate URL actually has | 100 // "|/index.html" in the string and see that this candidate URL actually has |
94 // a path that starts with "/index.html". On the contrary, | 101 // a path that starts with "/index.html". On the contrary, |
95 // "www.google.com/images/index.html" would be normalized to | 102 // "www.google.com/images/index.html" would be normalized to |
96 // "www.google.com|/images/index.html". It is easy to see that it contains | 103 // "www.google.com|/images/index.html". It is easy to see that it contains |
97 // "/index.html" but the path of the URL does not start with "/index.html". | 104 // "/index.html" but the path of the URL does not start with "/index.html". |
98 // | 105 // |
99 // This preprocessing is important if you want to match a URL against many | 106 // This preprocessing is important if you want to match a URL against many |
100 // patterns because it reduces the matching to a "discover all substrings | 107 // patterns because it reduces the matching to a "discover all substrings |
101 // of a dictionary in a text" problem, which can be solved very efficiently | 108 // of a dictionary in a text" problem, which can be solved very efficiently |
102 // by the Aho-Corasick algorithm. | 109 // by the Aho-Corasick algorithm. |
103 // | 110 // |
104 // IMPORTANT: The URLMatcherConditionFactory owns the SubstringPattern | 111 // IMPORTANT: The URLMatcherConditionFactory owns the StringPattern |
105 // referenced by created URLMatcherConditions. Therefore, it must outlive | 112 // referenced by created URLMatcherConditions. Therefore, it must outlive |
106 // all created URLMatcherCondition and the SubstringSetMatcher. | 113 // all created URLMatcherCondition and the SubstringSetMatcher. |
107 class URLMatcherConditionFactory { | 114 class URLMatcherConditionFactory { |
108 public: | 115 public: |
109 URLMatcherConditionFactory(); | 116 URLMatcherConditionFactory(); |
110 ~URLMatcherConditionFactory(); | 117 ~URLMatcherConditionFactory(); |
111 | 118 |
112 // Canonicalizes a URL for "Create{Host,Path,Query}*Condition" searches. | 119 // Canonicalizes a URL for "Create{Host,Path,Query}*Condition" searches. |
113 std::string CanonicalizeURLForComponentSearches(const GURL& url); | 120 std::string CanonicalizeURLForComponentSearches(const GURL& url); |
114 | 121 |
(...skipping 24 matching lines...) Expand all Loading... |
139 URLMatcherCondition CreateHostSuffixPathPrefixCondition( | 146 URLMatcherCondition CreateHostSuffixPathPrefixCondition( |
140 const std::string& host_suffix, | 147 const std::string& host_suffix, |
141 const std::string& path_prefix); | 148 const std::string& path_prefix); |
142 URLMatcherCondition CreateHostEqualsPathPrefixCondition( | 149 URLMatcherCondition CreateHostEqualsPathPrefixCondition( |
143 const std::string& host, | 150 const std::string& host, |
144 const std::string& path_prefix); | 151 const std::string& path_prefix); |
145 | 152 |
146 // Canonicalizes a URL for "CreateURL*Condition" searches. | 153 // Canonicalizes a URL for "CreateURL*Condition" searches. |
147 std::string CanonicalizeURLForFullSearches(const GURL& url); | 154 std::string CanonicalizeURLForFullSearches(const GURL& url); |
148 | 155 |
| 156 // Canonicalizes a URL for "CreateURLMatchesCondition" searches. |
| 157 std::string CanonicalizeURLForRegexSearches(const GURL& url); |
| 158 |
149 URLMatcherCondition CreateURLPrefixCondition(const std::string& prefix); | 159 URLMatcherCondition CreateURLPrefixCondition(const std::string& prefix); |
150 URLMatcherCondition CreateURLSuffixCondition(const std::string& suffix); | 160 URLMatcherCondition CreateURLSuffixCondition(const std::string& suffix); |
151 URLMatcherCondition CreateURLContainsCondition(const std::string& str); | 161 URLMatcherCondition CreateURLContainsCondition(const std::string& str); |
152 URLMatcherCondition CreateURLEqualsCondition(const std::string& str); | 162 URLMatcherCondition CreateURLEqualsCondition(const std::string& str); |
153 | 163 |
| 164 URLMatcherCondition CreateURLMatchesCondition(const std::string& regex); |
| 165 |
154 // Removes all patterns from |pattern_singletons_| that are not listed in | 166 // Removes all patterns from |pattern_singletons_| that are not listed in |
155 // |used_patterns|. These patterns are not referenced any more and get | 167 // |used_patterns|. These patterns are not referenced any more and get |
156 // freed. | 168 // freed. |
157 void ForgetUnusedPatterns( | 169 void ForgetUnusedPatterns( |
158 const std::set<SubstringPattern::ID>& used_patterns); | 170 const std::set<StringPattern::ID>& used_patterns); |
159 | 171 |
160 // Returns true if this object retains no allocated data. Only for debugging. | 172 // Returns true if this object retains no allocated data. Only for debugging. |
161 bool IsEmpty() const; | 173 bool IsEmpty() const; |
162 | 174 |
163 private: | 175 private: |
164 // Creates a URLMatcherCondition according to the parameters passed. | 176 // Creates a URLMatcherCondition according to the parameters passed. |
165 // The URLMatcherCondition will refer to a SubstringPattern that is | 177 // The URLMatcherCondition will refer to a StringPattern that is |
166 // owned by |pattern_singletons_|. | 178 // owned by |pattern_singletons_|. |
167 URLMatcherCondition CreateCondition(URLMatcherCondition::Criterion criterion, | 179 URLMatcherCondition CreateCondition(URLMatcherCondition::Criterion criterion, |
168 const std::string& pattern); | 180 const std::string& pattern); |
169 | 181 |
170 // Prepends a "." to the hostname if it does not start with one. | 182 // Prepends a "." to the hostname if it does not start with one. |
171 std::string CanonicalizeHostname(const std::string& hostname) const; | 183 std::string CanonicalizeHostname(const std::string& hostname) const; |
172 | 184 |
173 // Counter that ensures that all created SubstringPatterns have unique IDs. | 185 // Counter that ensures that all created StringPatterns have unique IDs. |
| 186 // Note that substring patterns and regex patterns will use different IDs. |
174 int id_counter_; | 187 int id_counter_; |
175 | 188 |
176 // This comparison considers only the pattern() value of the | 189 // This comparison considers only the pattern() value of the |
177 // SubstringPatterns. | 190 // StringPatterns. |
178 struct SubstringPatternPointerCompare { | 191 struct StringPatternPointerCompare { |
179 bool operator()(SubstringPattern* lhs, SubstringPattern* rhs) const; | 192 bool operator()(StringPattern* lhs, StringPattern* rhs) const; |
180 }; | 193 }; |
181 // Set to ensure that we generate only one SubstringPattern for each content | 194 // Set to ensure that we generate only one StringPattern for each content |
182 // of SubstringPattern::pattern(). | 195 // of StringPattern::pattern(). |
183 typedef std::set<SubstringPattern*, SubstringPatternPointerCompare> | 196 typedef std::set<StringPattern*, StringPatternPointerCompare> |
184 PatternSingletons; | 197 PatternSingletons; |
185 PatternSingletons pattern_singletons_; | 198 PatternSingletons substring_pattern_singletons_; |
| 199 PatternSingletons regex_pattern_singletons_; |
186 | 200 |
187 DISALLOW_COPY_AND_ASSIGN(URLMatcherConditionFactory); | 201 DISALLOW_COPY_AND_ASSIGN(URLMatcherConditionFactory); |
188 }; | 202 }; |
189 | 203 |
190 // This class represents a filter for the URL scheme to be hooked up into a | 204 // This class represents a filter for the URL scheme to be hooked up into a |
191 // URLMatcherConditionSet. | 205 // URLMatcherConditionSet. |
192 class URLMatcherSchemeFilter { | 206 class URLMatcherSchemeFilter { |
193 public: | 207 public: |
194 explicit URLMatcherSchemeFilter(const std::string& filter); | 208 explicit URLMatcherSchemeFilter(const std::string& filter); |
195 explicit URLMatcherSchemeFilter(const std::vector<std::string>& filters); | 209 explicit URLMatcherSchemeFilter(const std::vector<std::string>& filters); |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
237 // Matches if all conditions in |conditions|, |scheme_filter| and | 251 // Matches if all conditions in |conditions|, |scheme_filter| and |
238 // |port_filter| are fulfilled. |scheme_filter| and |port_filter| may be NULL, | 252 // |port_filter| are fulfilled. |scheme_filter| and |port_filter| may be NULL, |
239 // in which case, no restrictions are imposed on the scheme/port of a URL. | 253 // in which case, no restrictions are imposed on the scheme/port of a URL. |
240 URLMatcherConditionSet(ID id, const Conditions& conditions, | 254 URLMatcherConditionSet(ID id, const Conditions& conditions, |
241 scoped_ptr<URLMatcherSchemeFilter> scheme_filter, | 255 scoped_ptr<URLMatcherSchemeFilter> scheme_filter, |
242 scoped_ptr<URLMatcherPortFilter> port_filter); | 256 scoped_ptr<URLMatcherPortFilter> port_filter); |
243 | 257 |
244 ID id() const { return id_; } | 258 ID id() const { return id_; } |
245 const Conditions& conditions() const { return conditions_; } | 259 const Conditions& conditions() const { return conditions_; } |
246 | 260 |
247 bool IsMatch( | 261 bool IsMatch(const std::set<StringPattern::ID>& matching_patterns, |
248 const std::set<SubstringPattern::ID>& matching_substring_patterns, | 262 const GURL& url) const; |
249 const GURL& url) const; | |
250 | 263 |
251 private: | 264 private: |
252 friend class base::RefCounted<URLMatcherConditionSet>; | 265 friend class base::RefCounted<URLMatcherConditionSet>; |
253 ~URLMatcherConditionSet(); | 266 ~URLMatcherConditionSet(); |
254 ID id_; | 267 ID id_; |
255 Conditions conditions_; | 268 Conditions conditions_; |
256 scoped_ptr<URLMatcherSchemeFilter> scheme_filter_; | 269 scoped_ptr<URLMatcherSchemeFilter> scheme_filter_; |
257 scoped_ptr<URLMatcherPortFilter> port_filter_; | 270 scoped_ptr<URLMatcherPortFilter> port_filter_; |
258 | 271 |
259 DISALLOW_COPY_AND_ASSIGN(URLMatcherConditionSet); | 272 DISALLOW_COPY_AND_ASSIGN(URLMatcherConditionSet); |
(...skipping 29 matching lines...) Expand all Loading... |
289 // URLMatcherConditionSets for this URLMatcher. | 302 // URLMatcherConditionSets for this URLMatcher. |
290 URLMatcherConditionFactory* condition_factory() { | 303 URLMatcherConditionFactory* condition_factory() { |
291 return &condition_factory_; | 304 return &condition_factory_; |
292 } | 305 } |
293 | 306 |
294 // Returns true if this object retains no allocated data. Only for debugging. | 307 // Returns true if this object retains no allocated data. Only for debugging. |
295 bool IsEmpty() const; | 308 bool IsEmpty() const; |
296 | 309 |
297 private: | 310 private: |
298 void UpdateSubstringSetMatcher(bool full_url_conditions); | 311 void UpdateSubstringSetMatcher(bool full_url_conditions); |
| 312 void UpdateRegexSetMatcher(); |
299 void UpdateTriggers(); | 313 void UpdateTriggers(); |
300 void UpdateConditionFactory(); | 314 void UpdateConditionFactory(); |
301 void UpdateInternalDatastructures(); | 315 void UpdateInternalDatastructures(); |
302 | 316 |
303 URLMatcherConditionFactory condition_factory_; | 317 URLMatcherConditionFactory condition_factory_; |
304 | 318 |
305 // Maps the ID of a URLMatcherConditionSet to the respective | 319 // Maps the ID of a URLMatcherConditionSet to the respective |
306 // URLMatcherConditionSet. | 320 // URLMatcherConditionSet. |
307 typedef std::map<URLMatcherConditionSet::ID, | 321 typedef std::map<URLMatcherConditionSet::ID, |
308 scoped_refptr<URLMatcherConditionSet> > | 322 scoped_refptr<URLMatcherConditionSet> > |
309 URLMatcherConditionSets; | 323 URLMatcherConditionSets; |
310 URLMatcherConditionSets url_matcher_condition_sets_; | 324 URLMatcherConditionSets url_matcher_condition_sets_; |
311 | 325 |
312 // Maps a SubstringPattern ID to the URLMatcherConditions that need to | 326 // Maps a StringPattern ID to the URLMatcherConditions that need to |
313 // be triggered in case of a SubstringPattern match. | 327 // be triggered in case of a StringPattern match. |
314 std::map<SubstringPattern::ID, std::set<URLMatcherConditionSet::ID> > | 328 std::map<StringPattern::ID, std::set<URLMatcherConditionSet::ID> > |
315 substring_match_triggers_; | 329 substring_match_triggers_; |
316 | 330 |
317 SubstringSetMatcher full_url_matcher_; | 331 SubstringSetMatcher full_url_matcher_; |
318 SubstringSetMatcher url_component_matcher_; | 332 SubstringSetMatcher url_component_matcher_; |
319 std::set<const SubstringPattern*> registered_full_url_patterns_; | 333 RegexSetMatcher regex_set_matcher_; |
320 std::set<const SubstringPattern*> registered_url_component_patterns_; | 334 std::set<const StringPattern*> registered_full_url_patterns_; |
| 335 std::set<const StringPattern*> registered_url_component_patterns_; |
321 | 336 |
322 DISALLOW_COPY_AND_ASSIGN(URLMatcher); | 337 DISALLOW_COPY_AND_ASSIGN(URLMatcher); |
323 }; | 338 }; |
324 | 339 |
325 } // namespace extensions | 340 } // namespace extensions |
326 | 341 |
327 #endif // CHROME_COMMON_EXTENSIONS_MATCHER_URL_MATCHER_H_ | 342 #endif // CHROME_COMMON_EXTENSIONS_MATCHER_URL_MATCHER_H_ |
OLD | NEW |