Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(531)

Side by Side Diff: chrome/browser/extensions/api/declarative/url_matcher.h

Issue 9390018: Implementation of a Matching strategy for URLs in the Declarative WebRequest API. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Refactored for memory improvements Created 8 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef CHROME_BROWSER_EXTENSIONS_API_DECLARATIVE_URL_MATCHER_H_
6 #define CHROME_BROWSER_EXTENSIONS_API_DECLARATIVE_URL_MATCHER_H_
7 #pragma once
8
9 #include <set>
10 #include <vector>
11
12 #include "base/memory/linked_ptr.h"
13 #include "base/memory/scoped_ptr.h"
14 #include "base/memory/scoped_vector.h"
15 #include "chrome/browser/extensions/api/declarative/substring_set_matcher.h"
16 #include "googleurl/src/gurl.h"
17
18 namespace base {
19 class DictionaryValue;
20 }
21
22 namespace extensions {
23
24 // This class represents a single URL matching condition, e.g. a match on the
25 // host suffix or the containment of a string in the query component of a GURL.
26 //
27 // The difference from a SubstringPattern created by URLMatcherConditionFactory
28 // is that this also supports matches to check whether {Host, Path, Query} of a
29 // URL contains a string.
30 class URLMatcherCondition {
31 public:
32 enum Criterion {
33 HOST_PREFIX,
34 HOST_SUFFIX,
35 HOST_CONTAINS,
36 HOST_EQUALS,
37 PATH_PREFIX,
38 PATH_SUFFIX,
39 PATH_CONTAINS,
40 PATH_EQUALS,
41 QUERY_PREFIX,
42 QUERY_SUFFIX,
43 QUERY_CONTAINS,
44 QUERY_EQUALS,
45 HOST_SUFFIX_PATH_PREFIX,
46 URL_PREFIX,
47 URL_SUFFIX,
48 URL_CONTAINS,
49 URL_EQUALS,
50 };
51
52 URLMatcherCondition(Criterion criterion,
53 const SubstringPattern* substring_pattern);
54
55 Criterion criterion() const { return criterion_; }
56 const SubstringPattern* substring_pattern() const {
57 return substring_pattern_;
58 }
59
60 // Returns whether this URLMatcherCondition needs to be executed on a
61 // full URL rather than the individual components (see
62 // URLMatcherConditionFactory).
63 bool IsFullUrlCondition() const;
64
65 // Returns whether this condition is fulfilled according to
66 // |matching_substring_patterns| and |url|.
67 bool IsMatch(
68 const std::set<SubstringPattern::ID>& matching_substring_patterns,
69 const GURL& url) const;
70
71 private:
72 // |criterion_| and |substring_pattern_| describe together what property a URL
73 // needs to fulfill to be considered a match.
74 Criterion criterion_;
75
76 // This is the URLMatcherCondition that is used in a SubstringSetMatcher.
77 // It becomes valid after BuildSubstringPattern has been called.
78 const SubstringPattern* substring_pattern_;
79 };
80
81 // Class to map the problem of finding {host, path, query} {prefixes, suffixes,
82 // containments, and equality} in GURLs to the substring matching problem.
83 //
84 // Say, you want to check whether the path of a URL starts with "/index.html".
85 // This class preprocesses a URL like "www.google.com/index.html" into something
86 // like "www.google.com|/index.html". After preprocessing, you can search for
87 // "|/index.html" in the string and see that this candidate URL actually has
88 // a path that starts with "/index.html". On the contrary,
89 // "www.google.com/images/index.html" would be normalized to
90 // "www.google.com|/images/index.html". It is easy to see that it contains
91 // "/index.html" but the path of the URL does not start with "/index.html".
92 //
93 // This preprocessing is important if you want to match a URL against many
94 // patterns because it reduces the matching to a "discover all substrings
95 // of a dictionary in a text" problem, which can be solved very efficiently
96 // by the Aho-Corasick algorithm.
97 //
98 // IMPORTANT: The URLMatcherConditionFactory owns the SubstringPattern
99 // referenced by created URLMatcherConditions. Therefore, it must outlive
100 // all created URLMatcherCondition and the SubstringSetMatcher.
101 class URLMatcherConditionFactory {
102 public:
103 URLMatcherConditionFactory();
104
105 // Canonicalizes a URL for "Create{Host,Path,Query}*Condition" searches.
106 std::string CanonlicalizeURLForComponentSearches(const GURL& url);
107
108 // Factory methods for various condition types.
109 scoped_ptr<URLMatcherCondition> CreateHostPrefixCondition(
Matt Perry 2012/02/14 19:56:16 An URLMatcherCondition is now just a Criterion and
battre 2012/02/14 21:56:42 Done.
110 const std::string& prefix);
111 scoped_ptr<URLMatcherCondition> CreateHostSuffixCondition(
112 const std::string& suffix);
113 scoped_ptr<URLMatcherCondition> CreateHostContainsCondition(
114 const std::string& str);
115 scoped_ptr<URLMatcherCondition> CreateHostEqualsCondition(
116 const std::string& str);
117
118 scoped_ptr<URLMatcherCondition> CreatePathPrefixCondition(
119 const std::string& prefix);
120 scoped_ptr<URLMatcherCondition> CreatePathSuffixCondition(
121 const std::string& suffix);
122 scoped_ptr<URLMatcherCondition> CreatePathContainsCondition(
123 const std::string& str);
124 scoped_ptr<URLMatcherCondition> CreatePathEqualsCondition(
125 const std::string& str);
126
127 scoped_ptr<URLMatcherCondition> CreateQueryPrefixCondition(
128 const std::string& prefix);
129 scoped_ptr<URLMatcherCondition> CreateQuerySuffixCondition(
130 const std::string& suffix);
131 scoped_ptr<URLMatcherCondition> CreateQueryContainsCondition(
132 const std::string& str);
133 scoped_ptr<URLMatcherCondition> CreateQueryEqualsCondition(
134 const std::string& str);
135
136 // This covers the common case, where you don't care whether a domain
137 // "foobar.com" is expressed as "foobar.com" or "www.foobar.com", and it
138 // should be followed by a given |path_prefix|.
139 scoped_ptr<URLMatcherCondition> CreateHostSuffixPathPrefixCondition(
140 const std::string& host_suffix,
141 const std::string& path_prefix);
142
143 // Canonicalizes a URL for "CreateURL*Condition" searches.
144 std::string CanonlicalizeURLForFullSearches(const GURL& url);
145
146 scoped_ptr<URLMatcherCondition> CreateURLPrefixCondition(
147 const std::string& prefix);
148 scoped_ptr<URLMatcherCondition> CreateURLSuffixCondition(
149 const std::string& suffix);
150 scoped_ptr<URLMatcherCondition> CreateURLContainsCondition(
151 const std::string& str);
152 scoped_ptr<URLMatcherCondition> CreateURLEqualsCondition(
153 const std::string& str);
154
155 // Removes all patterns from |pattern_singletons_| that are not listed in
156 // |used_patterns|. These patterns are not referenced any more and may be
157 // freed.
158 void ForgetUnusedPatterns(
159 const std::set<SubstringPattern::ID>& used_patterns);
160
161 private:
162 // Creates a URLMatcherCondition according to the parameters passed.
163 // The URLMatcherCondition will refer to a SubstringPattern that is
164 // owned by |pattern_singletons_|.
165 scoped_ptr<URLMatcherCondition> CreateCondition(
166 URLMatcherCondition::Criterion criterion,
167 const std::string& pattern);
168
169 // Prepends a "." to the hostname if it does not start with one.
170 std::string CanonicalizeHostname(const std::string& hostname) const;
171
172 // Counter that ensures that all created SubstringPatterns have unique IDs.
173 int id_counter_;
174
175 // Maps the pattern() value of a SubstringPattern to the instance.
176 typedef std::map<std::string, linked_ptr<const SubstringPattern> >
177 PatternSingletons;
178 PatternSingletons pattern_singletons_;
Matt Perry 2012/02/14 20:47:47 Also, this should use a set, or a hash_set if you
battre 2012/02/14 21:56:42 Done.
179 };
180
181 // This class represents a set of conditions that all need to match on a
182 // given URL in order to be considered a match.
183 class URLMatcherConditionSet {
184 public:
185 typedef int ID;
186 typedef ScopedVector<const URLMatcherCondition> Conditions;
Matt Perry 2012/02/14 19:56:16 Likewise, this is overkill. Just make it a std::se
battre 2012/02/14 21:56:42 Done.
187
188 URLMatcherConditionSet(ID id, scoped_ptr<Conditions> conditions);
189
190 ID id() const { return id_; }
191 const Conditions& conditions() const { return conditions_; }
192
193 bool IsMatch(
194 const std::set<SubstringPattern::ID>& matching_substring_patterns,
195 const GURL& url) const;
196
197 private:
198 ID id_;
199 Conditions conditions_;
200 };
201
202 // This class allows matching one URL against a large set of
203 // URLMatcherConditionSets at the same time.
204 class URLMatcher {
205 public:
206 URLMatcher();
207
208 void AddConditionSets(
209 scoped_ptr<ScopedVector<const URLMatcherConditionSet> > condition_sets);
Matt Perry 2012/02/14 19:56:16 Ditto, overkill. Pass by const ref.
battre 2012/02/14 21:56:42 Done.
210 void RemoveConditionSets(
211 const std::vector<URLMatcherConditionSet::ID>& condition_ids);
212
213 std::set<URLMatcherConditionSet::ID> MatchUrl(const GURL& url);
Matt Perry 2012/02/14 23:52:25 MatchURL
214
215 URLMatcherConditionFactory* condition_factory() {
216 return &condition_factory_;
217 }
218
219 private:
220 void UpdateSubstringSetMatcher(bool full_url_conditions);
221 void UpdateTriggers();
222 void UpdateConditionFactory();
223 void UpdateInternalDatastructures();
224
225 URLMatcherConditionFactory condition_factory_;
226
227 // Maps a condition ID (as passed to AddConditions()) to the respective
228 // URLMatcherConditionSet.
229 typedef std::map<URLMatcherConditionSet::ID,
230 linked_ptr<const URLMatcherConditionSet> >
231 URLMatcherConditionSets;
232 URLMatcherConditionSets url_matcher_condition_sets_;
233
234 // Maps a SubstringPattern ID to the URLMatcherConditions that need to
235 // be triggered in case of a SubstringPatter match.
236 std::map<SubstringPattern::ID, std::set<URLMatcherConditionSet::ID> >
237 substring_match_triggers_;
238
239 SubstringSetMatcher full_url_matcher_;
240 SubstringSetMatcher url_component_matcher_;
241 std::set<const SubstringPattern*> registered_full_url_patterns_;
242 std::set<const SubstringPattern*> registered_url_component_patterns_;
243
244 DISALLOW_COPY_AND_ASSIGN(URLMatcher);
245 };
246
247 } // namespace extensions
248
249 #endif // CHROME_BROWSER_EXTENSIONS_API_DECLARATIVE_URL_MATCHER_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698