Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(841)

Side by Side Diff: chrome/common/extensions/matcher/substring_set_matcher.h

Issue 10910179: Event matching by regular expression matching on URLs. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: ascii artiste Created 8 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef CHROME_COMMON_EXTENSIONS_MATCHER_SUBSTRING_SET_MATCHER_H_ 5 #ifndef CHROME_COMMON_EXTENSIONS_MATCHER_SUBSTRING_SET_MATCHER_H_
6 #define CHROME_COMMON_EXTENSIONS_MATCHER_SUBSTRING_SET_MATCHER_H_ 6 #define CHROME_COMMON_EXTENSIONS_MATCHER_SUBSTRING_SET_MATCHER_H_
7 7
8 #include <map> 8 #include <map>
9 #include <set> 9 #include <set>
10 #include <string> 10 #include <string>
11 #include <vector> 11 #include <vector>
12 12
13 #include "base/basictypes.h" 13 #include "base/basictypes.h"
14 #include "chrome/common/extensions/matcher/string_pattern.h"
14 15
15 namespace extensions { 16 namespace extensions {
16 17
17 // An individual pattern of the SubstringSetMatcher. A pattern consists of
18 // a string (interpreted as individual bytes, no character encoding) and an
19 // identifier.
20 // Each pattern that matches a string emits its ID to the caller of
21 // SubstringSetMatcher::Match(). This helps the caller to figure out what
22 // patterns matched a string. All patterns registered to a SubstringSetMatcher
23 // need to contain unique IDs.
24 class SubstringPattern {
25 public:
26 typedef int ID;
27
28 SubstringPattern(const std::string& pattern, ID id);
29 ~SubstringPattern();
30 const std::string& pattern() const { return pattern_; }
31 ID id() const { return id_; }
32
33 bool operator<(const SubstringPattern& rhs) const;
34
35 private:
36 std::string pattern_;
37 ID id_;
38
39 DISALLOW_COPY_AND_ASSIGN(SubstringPattern);
40 };
41
42 // Class that store a set of string patterns and can find for a string S, 18 // Class that store a set of string patterns and can find for a string S,
43 // which string patterns occur in S. 19 // which string patterns occur in S.
44 class SubstringSetMatcher { 20 class SubstringSetMatcher {
45 public: 21 public:
46 SubstringSetMatcher(); 22 SubstringSetMatcher();
47 ~SubstringSetMatcher(); 23 ~SubstringSetMatcher();
48 24
49 // Registers all |patterns|. The ownership remains with the caller. 25 // Registers all |patterns|. The ownership remains with the caller.
50 // The same pattern cannot be registered twice and each pattern needs to have 26 // The same pattern cannot be registered twice and each pattern needs to have
51 // a unique ID. 27 // a unique ID.
52 // Ownership of the patterns remains with the caller. 28 // Ownership of the patterns remains with the caller.
53 void RegisterPatterns(const std::vector<const SubstringPattern*>& patterns); 29 void RegisterPatterns(const std::vector<const StringPattern*>& patterns);
54 30
55 // Unregisters the passed |patterns|. 31 // Unregisters the passed |patterns|.
56 void UnregisterPatterns(const std::vector<const SubstringPattern*>& patterns); 32 void UnregisterPatterns(const std::vector<const StringPattern*>& patterns);
57 33
58 // Analogous to RegisterPatterns and UnregisterPatterns but executes both 34 // Analogous to RegisterPatterns and UnregisterPatterns but executes both
59 // operations in one step, which is cheaper in the execution. 35 // operations in one step, which is cheaper in the execution.
60 void RegisterAndUnregisterPatterns( 36 void RegisterAndUnregisterPatterns(
61 const std::vector<const SubstringPattern*>& to_register, 37 const std::vector<const StringPattern*>& to_register,
62 const std::vector<const SubstringPattern*>& to_unregister); 38 const std::vector<const StringPattern*>& to_unregister);
63 39
64 // Matches |text| against all registered SubstringPatterns. Stores the IDs 40 // Matches |text| against all registered StringPatterns. Stores the IDs
65 // of matching patterns in |matches|. |matches| is not cleared before adding 41 // of matching patterns in |matches|. |matches| is not cleared before adding
66 // to it. 42 // to it.
67 bool Match(const std::string& text, 43 bool Match(const std::string& text,
68 std::set<SubstringPattern::ID>* matches) const; 44 std::set<StringPattern::ID>* matches) const;
69 45
70 // Returns true if this object retains no allocated data. Only for debugging. 46 // Returns true if this object retains no allocated data. Only for debugging.
71 bool IsEmpty() const; 47 bool IsEmpty() const;
72 48
73 private: 49 private:
74 // A node of an Aho Corasick Tree. This is implemented according to 50 // A node of an Aho Corasick Tree. This is implemented according to
75 // http://www.cs.uku.fi/~kilpelai/BSA05/lectures/slides04.pdf 51 // http://www.cs.uku.fi/~kilpelai/BSA05/lectures/slides04.pdf
76 // 52 //
77 // The algorithm is based on the idea of building a trie of all registered 53 // The algorithm is based on the idea of building a trie of all registered
78 // patterns. Each node of the tree is annotated with a set of pattern 54 // patterns. Each node of the tree is annotated with a set of pattern
(...skipping 19 matching lines...) Expand all
98 // corresponds to the longest proper suffix of text[i, ..., i + k - 1] that 74 // corresponds to the longest proper suffix of text[i, ..., i + k - 1] that
99 // is a prefix of any registered pattern. 75 // is a prefix of any registered pattern.
100 // 76 //
101 // If your brain thinks "Forget it, let's go shopping.", don't worry. 77 // If your brain thinks "Forget it, let's go shopping.", don't worry.
102 // Take a nap and read an introductory text on the Aho Corasick algorithm. 78 // Take a nap and read an introductory text on the Aho Corasick algorithm.
103 // It will make sense. Eventually. 79 // It will make sense. Eventually.
104 class AhoCorasickNode { 80 class AhoCorasickNode {
105 public: 81 public:
106 // Key: label of the edge, value: node index in |tree_| of parent class. 82 // Key: label of the edge, value: node index in |tree_| of parent class.
107 typedef std::map<char, int> Edges; 83 typedef std::map<char, int> Edges;
108 typedef std::set<SubstringPattern::ID> Matches; 84 typedef std::set<StringPattern::ID> Matches;
109 85
110 AhoCorasickNode(); 86 AhoCorasickNode();
111 ~AhoCorasickNode(); 87 ~AhoCorasickNode();
112 AhoCorasickNode(const AhoCorasickNode& other); 88 AhoCorasickNode(const AhoCorasickNode& other);
113 AhoCorasickNode& operator=(const AhoCorasickNode& other); 89 AhoCorasickNode& operator=(const AhoCorasickNode& other);
114 90
115 bool HasEdge(char c) const; 91 bool HasEdge(char c) const;
116 int GetEdge(char c) const; 92 int GetEdge(char c) const;
117 void SetEdge(char c, int node); 93 void SetEdge(char c, int node);
118 const Edges& edges() const { return edges_; } 94 const Edges& edges() const { return edges_; }
119 95
120 int failure() const { return failure_; } 96 int failure() const { return failure_; }
121 void set_failure(int failure) { failure_ = failure; } 97 void set_failure(int failure) { failure_ = failure; }
122 98
123 void AddMatch(SubstringPattern::ID id); 99 void AddMatch(StringPattern::ID id);
124 void AddMatches(const Matches& matches); 100 void AddMatches(const Matches& matches);
125 const Matches& matches() const { return matches_; } 101 const Matches& matches() const { return matches_; }
126 102
127 private: 103 private:
128 // Outgoing edges of current node. 104 // Outgoing edges of current node.
129 Edges edges_; 105 Edges edges_;
130 106
131 // Node index that failure edge leads to. 107 // Node index that failure edge leads to.
132 int failure_; 108 int failure_;
133 109
134 // Identifiers of matches. 110 // Identifiers of matches.
135 Matches matches_; 111 Matches matches_;
136 }; 112 };
137 113
138 void RebuildAhoCorasickTree(); 114 void RebuildAhoCorasickTree();
139 115
140 // Inserts a path for |pattern->pattern()| into the tree and adds 116 // Inserts a path for |pattern->pattern()| into the tree and adds
141 // |pattern->id()| to the set of matches. Ownership of |pattern| remains with 117 // |pattern->id()| to the set of matches. Ownership of |pattern| remains with
142 // the caller. 118 // the caller.
143 void InsertPatternIntoAhoCorasickTree(const SubstringPattern* pattern); 119 void InsertPatternIntoAhoCorasickTree(const StringPattern* pattern);
144 void CreateFailureEdges(); 120 void CreateFailureEdges();
145 121
146 // Set of all registered SubstringPatterns. Used to regenerate the 122 // Set of all registered StringPatterns. Used to regenerate the
147 // Aho-Corasick tree in case patterns are registered or unregistered. 123 // Aho-Corasick tree in case patterns are registered or unregistered.
148 typedef std::map<SubstringPattern::ID, const SubstringPattern*> 124 typedef std::map<StringPattern::ID, const StringPattern*>
149 SubstringPatternSet; 125 SubstringPatternSet;
150 SubstringPatternSet patterns_; 126 SubstringPatternSet patterns_;
151 127
152 // The nodes of a Aho-Corasick tree. 128 // The nodes of a Aho-Corasick tree.
153 std::vector<AhoCorasickNode> tree_; 129 std::vector<AhoCorasickNode> tree_;
154 130
155 DISALLOW_COPY_AND_ASSIGN(SubstringSetMatcher); 131 DISALLOW_COPY_AND_ASSIGN(SubstringSetMatcher);
156 }; 132 };
157 133
158 } // namespace extensions 134 } // namespace extensions
159 135
160 #endif // CHROME_COMMON_EXTENSIONS_MATCHER_SUBSTRING_SET_MATCHER_H_ 136 #endif // CHROME_COMMON_EXTENSIONS_MATCHER_SUBSTRING_SET_MATCHER_H_
OLDNEW
« no previous file with comments | « chrome/common/extensions/matcher/string_pattern_unittest.cc ('k') | chrome/common/extensions/matcher/substring_set_matcher.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698