Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(500)

Side by Side Diff: chrome/common/extensions/matcher/url_matcher.h

Issue 10910179: Event matching by regular expression matching on URLs. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: ascii artiste Created 8 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef CHROME_COMMON_EXTENSIONS_MATCHER_URL_MATCHER_H_ 5 #ifndef CHROME_COMMON_EXTENSIONS_MATCHER_URL_MATCHER_H_
6 #define CHROME_COMMON_EXTENSIONS_MATCHER_URL_MATCHER_H_ 6 #define CHROME_COMMON_EXTENSIONS_MATCHER_URL_MATCHER_H_
7 7
8 #include <set> 8 #include <set>
9 #include <vector> 9 #include <vector>
10 10
11 #include "base/memory/ref_counted.h" 11 #include "base/memory/ref_counted.h"
12 #include "base/memory/scoped_ptr.h" 12 #include "base/memory/scoped_ptr.h"
13 #include "base/memory/scoped_vector.h" 13 #include "base/memory/scoped_vector.h"
14 #include "chrome/common/extensions/matcher/regex_set_matcher.h"
14 #include "chrome/common/extensions/matcher/substring_set_matcher.h" 15 #include "chrome/common/extensions/matcher/substring_set_matcher.h"
15 16
16 class GURL; 17 class GURL;
17 18
18 namespace base { 19 namespace base {
19 class DictionaryValue; 20 class DictionaryValue;
20 } 21 }
21 22
22 namespace extensions { 23 namespace extensions {
23 24
24 // This class represents a single URL matching condition, e.g. a match on the 25 // This class represents a single URL matching condition, e.g. a match on the
25 // host suffix or the containment of a string in the query component of a GURL. 26 // host suffix or the containment of a string in the query component of a GURL.
26 // 27 //
27 // The difference from a simple SubstringPattern is that this also supports 28 // The difference from a simple StringPattern is that this also supports
28 // checking whether the {Host, Path, Query} of a URL contains a string. The 29 // checking whether the {Host, Path, Query} of a URL contains a string. The
29 // reduction of URL matching conditions to StringPatterns conducted by 30 // reduction of URL matching conditions to StringPatterns conducted by
30 // URLMatcherConditionFactory is not capable of expressing that alone. 31 // URLMatcherConditionFactory is not capable of expressing that alone.
32 //
33 // Also supported is matching regular expressions against the URL (URL_MATCHES).
31 class URLMatcherCondition { 34 class URLMatcherCondition {
32 public: 35 public:
33 enum Criterion { 36 enum Criterion {
34 HOST_PREFIX, 37 HOST_PREFIX,
35 HOST_SUFFIX, 38 HOST_SUFFIX,
36 HOST_CONTAINS, 39 HOST_CONTAINS,
37 HOST_EQUALS, 40 HOST_EQUALS,
38 PATH_PREFIX, 41 PATH_PREFIX,
39 PATH_SUFFIX, 42 PATH_SUFFIX,
40 PATH_CONTAINS, 43 PATH_CONTAINS,
41 PATH_EQUALS, 44 PATH_EQUALS,
42 QUERY_PREFIX, 45 QUERY_PREFIX,
43 QUERY_SUFFIX, 46 QUERY_SUFFIX,
44 QUERY_CONTAINS, 47 QUERY_CONTAINS,
45 QUERY_EQUALS, 48 QUERY_EQUALS,
46 HOST_SUFFIX_PATH_PREFIX, 49 HOST_SUFFIX_PATH_PREFIX,
47 HOST_EQUALS_PATH_PREFIX, 50 HOST_EQUALS_PATH_PREFIX,
48 URL_PREFIX, 51 URL_PREFIX,
49 URL_SUFFIX, 52 URL_SUFFIX,
50 URL_CONTAINS, 53 URL_CONTAINS,
51 URL_EQUALS, 54 URL_EQUALS,
55 URL_MATCHES,
52 }; 56 };
53 57
54 URLMatcherCondition(); 58 URLMatcherCondition();
55 ~URLMatcherCondition(); 59 ~URLMatcherCondition();
56 URLMatcherCondition(Criterion criterion, 60 URLMatcherCondition(Criterion criterion,
57 const SubstringPattern* substring_pattern); 61 const StringPattern* substring_pattern);
58 URLMatcherCondition(const URLMatcherCondition& rhs); 62 URLMatcherCondition(const URLMatcherCondition& rhs);
59 URLMatcherCondition& operator=(const URLMatcherCondition& rhs); 63 URLMatcherCondition& operator=(const URLMatcherCondition& rhs);
60 bool operator<(const URLMatcherCondition& rhs) const; 64 bool operator<(const URLMatcherCondition& rhs) const;
61 65
62 Criterion criterion() const { return criterion_; } 66 Criterion criterion() const { return criterion_; }
63 const SubstringPattern* substring_pattern() const { 67 const StringPattern* string_pattern() const {
64 return substring_pattern_; 68 return string_pattern_;
65 } 69 }
66 70
67 // Returns whether this URLMatcherCondition needs to be executed on a 71 // Returns whether this URLMatcherCondition needs to be executed on a
68 // full URL rather than the individual components (see 72 // full URL rather than the individual components (see
69 // URLMatcherConditionFactory). 73 // URLMatcherConditionFactory).
70 bool IsFullURLCondition() const; 74 bool IsFullURLCondition() const;
71 75
76 // Returns whether this URLMatcherCondition is a regular expression to be
77 // handled by a regex matcher instead of a substring matcher.
78 bool IsRegexCondition() const;
79
72 // Returns whether this condition is fulfilled according to 80 // Returns whether this condition is fulfilled according to
73 // |matching_substring_patterns| and |url|. 81 // |matching_patterns| and |url|.
74 bool IsMatch( 82 bool IsMatch(const std::set<StringPattern::ID>& matching_patterns,
75 const std::set<SubstringPattern::ID>& matching_substring_patterns, 83 const GURL& url) const;
76 const GURL& url) const;
77 84
78 private: 85 private:
79 // |criterion_| and |substring_pattern_| describe together what property a URL 86 // |criterion_| and |string_pattern_| describe together what property a URL
80 // needs to fulfill to be considered a match. 87 // needs to fulfill to be considered a match.
81 Criterion criterion_; 88 Criterion criterion_;
82 89
83 // This is the SubstringPattern that is used in a SubstringSetMatcher. 90 // This is the StringPattern that is used in a SubstringSetMatcher.
84 const SubstringPattern* substring_pattern_; 91 const StringPattern* string_pattern_;
85 }; 92 };
86 93
87 // Class to map the problem of finding {host, path, query} {prefixes, suffixes, 94 // Class to map the problem of finding {host, path, query} {prefixes, suffixes,
88 // containments, and equality} in GURLs to the substring matching problem. 95 // containments, and equality} in GURLs to the substring matching problem.
89 // 96 //
90 // Say, you want to check whether the path of a URL starts with "/index.html". 97 // Say, you want to check whether the path of a URL starts with "/index.html".
91 // This class preprocesses a URL like "www.google.com/index.html" into something 98 // This class preprocesses a URL like "www.google.com/index.html" into something
92 // like "www.google.com|/index.html". After preprocessing, you can search for 99 // like "www.google.com|/index.html". After preprocessing, you can search for
93 // "|/index.html" in the string and see that this candidate URL actually has 100 // "|/index.html" in the string and see that this candidate URL actually has
94 // a path that starts with "/index.html". On the contrary, 101 // a path that starts with "/index.html". On the contrary,
95 // "www.google.com/images/index.html" would be normalized to 102 // "www.google.com/images/index.html" would be normalized to
96 // "www.google.com|/images/index.html". It is easy to see that it contains 103 // "www.google.com|/images/index.html". It is easy to see that it contains
97 // "/index.html" but the path of the URL does not start with "/index.html". 104 // "/index.html" but the path of the URL does not start with "/index.html".
98 // 105 //
99 // This preprocessing is important if you want to match a URL against many 106 // This preprocessing is important if you want to match a URL against many
100 // patterns because it reduces the matching to a "discover all substrings 107 // patterns because it reduces the matching to a "discover all substrings
101 // of a dictionary in a text" problem, which can be solved very efficiently 108 // of a dictionary in a text" problem, which can be solved very efficiently
102 // by the Aho-Corasick algorithm. 109 // by the Aho-Corasick algorithm.
103 // 110 //
104 // IMPORTANT: The URLMatcherConditionFactory owns the SubstringPattern 111 // IMPORTANT: The URLMatcherConditionFactory owns the StringPattern
105 // referenced by created URLMatcherConditions. Therefore, it must outlive 112 // referenced by created URLMatcherConditions. Therefore, it must outlive
106 // all created URLMatcherCondition and the SubstringSetMatcher. 113 // all created URLMatcherCondition and the SubstringSetMatcher.
107 class URLMatcherConditionFactory { 114 class URLMatcherConditionFactory {
108 public: 115 public:
109 URLMatcherConditionFactory(); 116 URLMatcherConditionFactory();
110 ~URLMatcherConditionFactory(); 117 ~URLMatcherConditionFactory();
111 118
112 // Canonicalizes a URL for "Create{Host,Path,Query}*Condition" searches. 119 // Canonicalizes a URL for "Create{Host,Path,Query}*Condition" searches.
113 std::string CanonicalizeURLForComponentSearches(const GURL& url); 120 std::string CanonicalizeURLForComponentSearches(const GURL& url);
114 121
(...skipping 24 matching lines...) Expand all
139 URLMatcherCondition CreateHostSuffixPathPrefixCondition( 146 URLMatcherCondition CreateHostSuffixPathPrefixCondition(
140 const std::string& host_suffix, 147 const std::string& host_suffix,
141 const std::string& path_prefix); 148 const std::string& path_prefix);
142 URLMatcherCondition CreateHostEqualsPathPrefixCondition( 149 URLMatcherCondition CreateHostEqualsPathPrefixCondition(
143 const std::string& host, 150 const std::string& host,
144 const std::string& path_prefix); 151 const std::string& path_prefix);
145 152
146 // Canonicalizes a URL for "CreateURL*Condition" searches. 153 // Canonicalizes a URL for "CreateURL*Condition" searches.
147 std::string CanonicalizeURLForFullSearches(const GURL& url); 154 std::string CanonicalizeURLForFullSearches(const GURL& url);
148 155
156 // Canonicalizes a URL for "CreateURLMatchesCondition" searches.
157 std::string CanonicalizeURLForRegexSearches(const GURL& url);
158
149 URLMatcherCondition CreateURLPrefixCondition(const std::string& prefix); 159 URLMatcherCondition CreateURLPrefixCondition(const std::string& prefix);
150 URLMatcherCondition CreateURLSuffixCondition(const std::string& suffix); 160 URLMatcherCondition CreateURLSuffixCondition(const std::string& suffix);
151 URLMatcherCondition CreateURLContainsCondition(const std::string& str); 161 URLMatcherCondition CreateURLContainsCondition(const std::string& str);
152 URLMatcherCondition CreateURLEqualsCondition(const std::string& str); 162 URLMatcherCondition CreateURLEqualsCondition(const std::string& str);
153 163
164 URLMatcherCondition CreateURLMatchesCondition(const std::string& regex);
165
154 // Removes all patterns from |pattern_singletons_| that are not listed in 166 // Removes all patterns from |pattern_singletons_| that are not listed in
155 // |used_patterns|. These patterns are not referenced any more and get 167 // |used_patterns|. These patterns are not referenced any more and get
156 // freed. 168 // freed.
157 void ForgetUnusedPatterns( 169 void ForgetUnusedPatterns(
158 const std::set<SubstringPattern::ID>& used_patterns); 170 const std::set<StringPattern::ID>& used_patterns);
159 171
160 // Returns true if this object retains no allocated data. Only for debugging. 172 // Returns true if this object retains no allocated data. Only for debugging.
161 bool IsEmpty() const; 173 bool IsEmpty() const;
162 174
163 private: 175 private:
164 // Creates a URLMatcherCondition according to the parameters passed. 176 // Creates a URLMatcherCondition according to the parameters passed.
165 // The URLMatcherCondition will refer to a SubstringPattern that is 177 // The URLMatcherCondition will refer to a StringPattern that is
166 // owned by |pattern_singletons_|. 178 // owned by |pattern_singletons_|.
167 URLMatcherCondition CreateCondition(URLMatcherCondition::Criterion criterion, 179 URLMatcherCondition CreateCondition(URLMatcherCondition::Criterion criterion,
168 const std::string& pattern); 180 const std::string& pattern);
169 181
170 // Prepends a "." to the hostname if it does not start with one. 182 // Prepends a "." to the hostname if it does not start with one.
171 std::string CanonicalizeHostname(const std::string& hostname) const; 183 std::string CanonicalizeHostname(const std::string& hostname) const;
172 184
173 // Counter that ensures that all created SubstringPatterns have unique IDs. 185 // Counter that ensures that all created StringPatterns have unique IDs.
186 // Note that substring patterns and regex patterns will use different IDs.
174 int id_counter_; 187 int id_counter_;
175 188
176 // This comparison considers only the pattern() value of the 189 // This comparison considers only the pattern() value of the
177 // SubstringPatterns. 190 // StringPatterns.
178 struct SubstringPatternPointerCompare { 191 struct StringPatternPointerCompare {
179 bool operator()(SubstringPattern* lhs, SubstringPattern* rhs) const; 192 bool operator()(StringPattern* lhs, StringPattern* rhs) const;
180 }; 193 };
181 // Set to ensure that we generate only one SubstringPattern for each content 194 // Set to ensure that we generate only one StringPattern for each content
182 // of SubstringPattern::pattern(). 195 // of StringPattern::pattern().
183 typedef std::set<SubstringPattern*, SubstringPatternPointerCompare> 196 typedef std::set<StringPattern*, StringPatternPointerCompare>
184 PatternSingletons; 197 PatternSingletons;
185 PatternSingletons pattern_singletons_; 198 PatternSingletons substring_pattern_singletons_;
199 PatternSingletons regex_pattern_singletons_;
186 200
187 DISALLOW_COPY_AND_ASSIGN(URLMatcherConditionFactory); 201 DISALLOW_COPY_AND_ASSIGN(URLMatcherConditionFactory);
188 }; 202 };
189 203
190 // This class represents a filter for the URL scheme to be hooked up into a 204 // This class represents a filter for the URL scheme to be hooked up into a
191 // URLMatcherConditionSet. 205 // URLMatcherConditionSet.
192 class URLMatcherSchemeFilter { 206 class URLMatcherSchemeFilter {
193 public: 207 public:
194 explicit URLMatcherSchemeFilter(const std::string& filter); 208 explicit URLMatcherSchemeFilter(const std::string& filter);
195 explicit URLMatcherSchemeFilter(const std::vector<std::string>& filters); 209 explicit URLMatcherSchemeFilter(const std::vector<std::string>& filters);
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
237 // Matches if all conditions in |conditions|, |scheme_filter| and 251 // Matches if all conditions in |conditions|, |scheme_filter| and
238 // |port_filter| are fulfilled. |scheme_filter| and |port_filter| may be NULL, 252 // |port_filter| are fulfilled. |scheme_filter| and |port_filter| may be NULL,
239 // in which case, no restrictions are imposed on the scheme/port of a URL. 253 // in which case, no restrictions are imposed on the scheme/port of a URL.
240 URLMatcherConditionSet(ID id, const Conditions& conditions, 254 URLMatcherConditionSet(ID id, const Conditions& conditions,
241 scoped_ptr<URLMatcherSchemeFilter> scheme_filter, 255 scoped_ptr<URLMatcherSchemeFilter> scheme_filter,
242 scoped_ptr<URLMatcherPortFilter> port_filter); 256 scoped_ptr<URLMatcherPortFilter> port_filter);
243 257
244 ID id() const { return id_; } 258 ID id() const { return id_; }
245 const Conditions& conditions() const { return conditions_; } 259 const Conditions& conditions() const { return conditions_; }
246 260
247 bool IsMatch( 261 bool IsMatch(const std::set<StringPattern::ID>& matching_patterns,
248 const std::set<SubstringPattern::ID>& matching_substring_patterns, 262 const GURL& url) const;
249 const GURL& url) const;
250 263
251 private: 264 private:
252 friend class base::RefCounted<URLMatcherConditionSet>; 265 friend class base::RefCounted<URLMatcherConditionSet>;
253 ~URLMatcherConditionSet(); 266 ~URLMatcherConditionSet();
254 ID id_; 267 ID id_;
255 Conditions conditions_; 268 Conditions conditions_;
256 scoped_ptr<URLMatcherSchemeFilter> scheme_filter_; 269 scoped_ptr<URLMatcherSchemeFilter> scheme_filter_;
257 scoped_ptr<URLMatcherPortFilter> port_filter_; 270 scoped_ptr<URLMatcherPortFilter> port_filter_;
258 271
259 DISALLOW_COPY_AND_ASSIGN(URLMatcherConditionSet); 272 DISALLOW_COPY_AND_ASSIGN(URLMatcherConditionSet);
(...skipping 29 matching lines...) Expand all
289 // URLMatcherConditionSets for this URLMatcher. 302 // URLMatcherConditionSets for this URLMatcher.
290 URLMatcherConditionFactory* condition_factory() { 303 URLMatcherConditionFactory* condition_factory() {
291 return &condition_factory_; 304 return &condition_factory_;
292 } 305 }
293 306
294 // Returns true if this object retains no allocated data. Only for debugging. 307 // Returns true if this object retains no allocated data. Only for debugging.
295 bool IsEmpty() const; 308 bool IsEmpty() const;
296 309
297 private: 310 private:
298 void UpdateSubstringSetMatcher(bool full_url_conditions); 311 void UpdateSubstringSetMatcher(bool full_url_conditions);
312 void UpdateRegexSetMatcher();
299 void UpdateTriggers(); 313 void UpdateTriggers();
300 void UpdateConditionFactory(); 314 void UpdateConditionFactory();
301 void UpdateInternalDatastructures(); 315 void UpdateInternalDatastructures();
302 316
303 URLMatcherConditionFactory condition_factory_; 317 URLMatcherConditionFactory condition_factory_;
304 318
305 // Maps the ID of a URLMatcherConditionSet to the respective 319 // Maps the ID of a URLMatcherConditionSet to the respective
306 // URLMatcherConditionSet. 320 // URLMatcherConditionSet.
307 typedef std::map<URLMatcherConditionSet::ID, 321 typedef std::map<URLMatcherConditionSet::ID,
308 scoped_refptr<URLMatcherConditionSet> > 322 scoped_refptr<URLMatcherConditionSet> >
309 URLMatcherConditionSets; 323 URLMatcherConditionSets;
310 URLMatcherConditionSets url_matcher_condition_sets_; 324 URLMatcherConditionSets url_matcher_condition_sets_;
311 325
312 // Maps a SubstringPattern ID to the URLMatcherConditions that need to 326 // Maps a StringPattern ID to the URLMatcherConditions that need to
313 // be triggered in case of a SubstringPattern match. 327 // be triggered in case of a StringPattern match.
314 std::map<SubstringPattern::ID, std::set<URLMatcherConditionSet::ID> > 328 std::map<StringPattern::ID, std::set<URLMatcherConditionSet::ID> >
315 substring_match_triggers_; 329 substring_match_triggers_;
316 330
317 SubstringSetMatcher full_url_matcher_; 331 SubstringSetMatcher full_url_matcher_;
318 SubstringSetMatcher url_component_matcher_; 332 SubstringSetMatcher url_component_matcher_;
319 std::set<const SubstringPattern*> registered_full_url_patterns_; 333 RegexSetMatcher regex_set_matcher_;
320 std::set<const SubstringPattern*> registered_url_component_patterns_; 334 std::set<const StringPattern*> registered_full_url_patterns_;
335 std::set<const StringPattern*> registered_url_component_patterns_;
321 336
322 DISALLOW_COPY_AND_ASSIGN(URLMatcher); 337 DISALLOW_COPY_AND_ASSIGN(URLMatcher);
323 }; 338 };
324 339
325 } // namespace extensions 340 } // namespace extensions
326 341
327 #endif // CHROME_COMMON_EXTENSIONS_MATCHER_URL_MATCHER_H_ 342 #endif // CHROME_COMMON_EXTENSIONS_MATCHER_URL_MATCHER_H_
OLDNEW
« no previous file with comments | « chrome/common/extensions/matcher/substring_set_matcher_unittest.cc ('k') | chrome/common/extensions/matcher/url_matcher.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698