Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(77)

Side by Side Diff: chrome/browser/extensions/api/declarative/url_matcher.h

Issue 9390018: Implementation of a Matching strategy for URLs in the Declarative WebRequest API. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Cleanup Created 8 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef CHROME_BROWSER_EXTENSIONS_API_DECLARATIVE_URL_MATCHER_H_
6 #define CHROME_BROWSER_EXTENSIONS_API_DECLARATIVE_URL_MATCHER_H_
7 #pragma once
8
9 #include <set>
10 #include <vector>
11
12 #include "base/hash_tables.h"
13 #include "base/memory/scoped_ptr.h"
14 #include "base/memory/scoped_vector.h"
15 #include "chrome/browser/extensions/api/declarative/substring_set_matcher.h"
16
17 class GURL;
18
19 namespace base {
20 class DictionaryValue;
21 }
22
23 namespace extensions {
24
25 // This class represents a single URL matching condition, e.g. a match on the
26 // host suffix or the containment of a string in the query component of a GURL.
27 //
28 // The difference from a simple SubstringPattern is that this also supports
29 // checking whether the {Host, Path, Query} of a URL contains a string. The
30 // reduction of URL matching conditions to StringPatterns conducted by
31 // URLMatcherConditionFactory is not capable of expressing that alone.
32 class URLMatcherCondition {
33 public:
34 enum Criterion {
35 HOST_PREFIX,
36 HOST_SUFFIX,
37 HOST_CONTAINS,
38 HOST_EQUALS,
39 PATH_PREFIX,
40 PATH_SUFFIX,
41 PATH_CONTAINS,
42 PATH_EQUALS,
43 QUERY_PREFIX,
44 QUERY_SUFFIX,
45 QUERY_CONTAINS,
46 QUERY_EQUALS,
47 HOST_SUFFIX_PATH_PREFIX,
48 URL_PREFIX,
49 URL_SUFFIX,
50 URL_CONTAINS,
51 URL_EQUALS,
52 };
53
54 URLMatcherCondition();
55 URLMatcherCondition(Criterion criterion,
56 const SubstringPattern* substring_pattern);
57 URLMatcherCondition(const URLMatcherCondition& rhs);
58 URLMatcherCondition& operator=(const URLMatcherCondition& rhs);
59 bool operator<(const URLMatcherCondition& rhs) const;
60
61 Criterion criterion() const { return criterion_; }
62 const SubstringPattern* substring_pattern() const {
63 return substring_pattern_;
64 }
65
66 // Returns whether this URLMatcherCondition needs to be executed on a
67 // full URL rather than the individual components (see
68 // URLMatcherConditionFactory).
69 bool IsFullURLCondition() const;
70
71 // Returns whether this condition is fulfilled according to
72 // |matching_substring_patterns| and |url|.
73 bool IsMatch(
74 const std::set<SubstringPattern::ID>& matching_substring_patterns,
75 const GURL& url) const;
76
77 private:
78 // |criterion_| and |substring_pattern_| describe together what property a URL
79 // needs to fulfill to be considered a match.
80 Criterion criterion_;
81
82 // This is the SubstringPattern that is used in a SubstringSetMatcher.
83 const SubstringPattern* substring_pattern_;
84 };
85
86 // Class to map the problem of finding {host, path, query} {prefixes, suffixes,
87 // containments, and equality} in GURLs to the substring matching problem.
88 //
89 // Say, you want to check whether the path of a URL starts with "/index.html".
90 // This class preprocesses a URL like "www.google.com/index.html" into something
91 // like "www.google.com|/index.html". After preprocessing, you can search for
92 // "|/index.html" in the string and see that this candidate URL actually has
93 // a path that starts with "/index.html". On the contrary,
94 // "www.google.com/images/index.html" would be normalized to
95 // "www.google.com|/images/index.html". It is easy to see that it contains
96 // "/index.html" but the path of the URL does not start with "/index.html".
97 //
98 // This preprocessing is important if you want to match a URL against many
99 // patterns because it reduces the matching to a "discover all substrings
100 // of a dictionary in a text" problem, which can be solved very efficiently
101 // by the Aho-Corasick algorithm.
102 //
103 // IMPORTANT: The URLMatcherConditionFactory owns the SubstringPattern
104 // referenced by created URLMatcherConditions. Therefore, it must outlive
105 // all created URLMatcherCondition and the SubstringSetMatcher.
106 class URLMatcherConditionFactory {
107 public:
108 URLMatcherConditionFactory();
109 ~URLMatcherConditionFactory();
110
111 // Canonicalizes a URL for "Create{Host,Path,Query}*Condition" searches.
112 std::string CanonlicalizeURLForComponentSearches(const GURL& url);
Matt Perry 2012/02/15 22:45:18 typo: Canonicalize*
battre 2012/02/16 14:45:55 Done.
113
114 // Factory methods for various condition types.
115 URLMatcherCondition CreateHostPrefixCondition(const std::string& prefix);
116 URLMatcherCondition CreateHostSuffixCondition(const std::string& suffix);
117 URLMatcherCondition CreateHostContainsCondition(const std::string& str);
118 URLMatcherCondition CreateHostEqualsCondition(const std::string& str);
119
120 URLMatcherCondition CreatePathPrefixCondition(const std::string& prefix);
121 URLMatcherCondition CreatePathSuffixCondition(const std::string& suffix);
122 URLMatcherCondition CreatePathContainsCondition(const std::string& str);
123 URLMatcherCondition CreatePathEqualsCondition(const std::string& str);
124
125 URLMatcherCondition CreateQueryPrefixCondition(const std::string& prefix);
126 URLMatcherCondition CreateQuerySuffixCondition(const std::string& suffix);
127 URLMatcherCondition CreateQueryContainsCondition(const std::string& str);
128 URLMatcherCondition CreateQueryEqualsCondition(const std::string& str);
129
130 // This covers the common case, where you don't care whether a domain
131 // "foobar.com" is expressed as "foobar.com" or "www.foobar.com", and it
132 // should be followed by a given |path_prefix|.
133 URLMatcherCondition CreateHostSuffixPathPrefixCondition(
134 const std::string& host_suffix,
135 const std::string& path_prefix);
136
137 // Canonicalizes a URL for "CreateURL*Condition" searches.
138 std::string CanonlicalizeURLForFullSearches(const GURL& url);
Matt Perry 2012/02/15 22:45:18 ditto typo
battre 2012/02/16 14:45:55 Done.
139
140 URLMatcherCondition CreateURLPrefixCondition(const std::string& prefix);
141 URLMatcherCondition CreateURLSuffixCondition(const std::string& suffix);
142 URLMatcherCondition CreateURLContainsCondition(const std::string& str);
143 URLMatcherCondition CreateURLEqualsCondition(const std::string& str);
144
145 // Removes all patterns from |pattern_singletons_| that are not listed in
146 // |used_patterns|. These patterns are not referenced any more and get
147 // freed.
148 void ForgetUnusedPatterns(
149 const std::set<SubstringPattern::ID>& used_patterns);
150
151 private:
152 // Creates a URLMatcherCondition according to the parameters passed.
153 // The URLMatcherCondition will refer to a SubstringPattern that is
154 // owned by |pattern_singletons_|.
155 URLMatcherCondition CreateCondition(URLMatcherCondition::Criterion criterion,
156 const std::string& pattern);
157
158 // Prepends a "." to the hostname if it does not start with one.
159 std::string CanonicalizeHostname(const std::string& hostname) const;
160
161 // Counter that ensures that all created SubstringPatterns have unique IDs.
162 int id_counter_;
163
164 // These two functions consider only the pattern() value of the
165 // SubstringPatterns.
166 struct HashFunction {
167 size_t operator()(SubstringPattern* substring_pattern) const;
168 };
169 struct EqualsFunction {
170 bool operator()(SubstringPattern* lhs, SubstringPattern* rhs) const;
171 };
172 // Hash set to ensure that we generate only one SubstringPattern for each
173 // content of SubstringPattern::pattern().
174 typedef base::hash_set<SubstringPattern*, HashFunction, EqualsFunction>
175 PatternSingletons;
176 PatternSingletons pattern_singletons_;
177
178 DISALLOW_COPY_AND_ASSIGN(URLMatcherConditionFactory);
179 };
180
181 // This class represents a set of conditions that all need to match on a
182 // given URL in order to be considered a match.
183 class URLMatcherConditionSet {
184 public:
185 typedef int ID;
186 typedef std::set<URLMatcherCondition> Conditions;
187
188 URLMatcherConditionSet();
189 URLMatcherConditionSet(ID id, const Conditions& conditions);
190 URLMatcherConditionSet(const URLMatcherConditionSet& rhs);
191 URLMatcherConditionSet& operator=(const URLMatcherConditionSet& rhs);
192
193 ID id() const { return id_; }
194 const Conditions& conditions() const { return conditions_; }
195
196 bool IsMatch(
197 const std::set<SubstringPattern::ID>& matching_substring_patterns,
198 const GURL& url) const;
199
200 private:
201 ID id_;
202 Conditions conditions_;
203 };
204
205 // This class allows matching one URL against a large set of
206 // URLMatcherConditionSets at the same time.
207 class URLMatcher {
208 public:
209 URLMatcher();
210
211 // Adds new URLMatcherConditionSet to this URL Matcher. Each condition set
212 // must have a unique ID.
213 // This is an expensive operation as it triggers pre-calculations on the
214 // currently registered condition sets. Do not call this operation many
215 // times with a single condition set in each call.
216 void AddConditionSets(
217 const std::vector<URLMatcherConditionSet>& condition_sets);
218
219 // Removes the listed condition sets. All |condition_set_ids| must be
220 // currently registered. This function should be called with large batches
221 // of |condition_set_ids| at a time to improve performance.
222 void RemoveConditionSets(
223 const std::vector<URLMatcherConditionSet::ID>& condition_set_ids);
224
225 // Returns the IDs of all URLMatcherConditionSet that match to this |url|.
226 std::set<URLMatcherConditionSet::ID> MatchURL(const GURL& url);
227
228 // Returns the URLMatcherConditionFactory that must be used to create
229 // URLMatcherConditionSets for this URLMatcher.
230 URLMatcherConditionFactory* condition_factory() {
231 return &condition_factory_;
232 }
233
234 private:
235 void UpdateSubstringSetMatcher(bool full_url_conditions);
236 void UpdateTriggers();
237 void UpdateConditionFactory();
238 void UpdateInternalDatastructures();
239
240 URLMatcherConditionFactory condition_factory_;
241
242 // Maps the ID of a URLMatcherConditionSet to the respective
243 // URLMatcherConditionSet.
244 typedef std::map<URLMatcherConditionSet::ID, URLMatcherConditionSet>
245 URLMatcherConditionSets;
246 URLMatcherConditionSets url_matcher_condition_sets_;
247
248 // Maps a SubstringPattern ID to the URLMatcherConditions that need to
249 // be triggered in case of a SubstringPatter match.
Matt Perry 2012/02/15 22:45:18 SubstringPattern*
battre 2012/02/16 14:45:55 Done.
250 std::map<SubstringPattern::ID, std::set<URLMatcherConditionSet::ID> >
251 substring_match_triggers_;
252
253 SubstringSetMatcher full_url_matcher_;
254 SubstringSetMatcher url_component_matcher_;
255 std::set<const SubstringPattern*> registered_full_url_patterns_;
256 std::set<const SubstringPattern*> registered_url_component_patterns_;
257
258 DISALLOW_COPY_AND_ASSIGN(URLMatcher);
259 };
260
261 } // namespace extensions
262
263 #endif // CHROME_BROWSER_EXTENSIONS_API_DECLARATIVE_URL_MATCHER_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698