OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef EXTENSIONS_COMMON_MATCHER_SUBSTRING_SET_MATCHER_H_ | 5 #ifndef EXTENSIONS_COMMON_MATCHER_SUBSTRING_SET_MATCHER_H_ |
6 #define EXTENSIONS_COMMON_MATCHER_SUBSTRING_SET_MATCHER_H_ | 6 #define EXTENSIONS_COMMON_MATCHER_SUBSTRING_SET_MATCHER_H_ |
7 | 7 |
8 #include <limits> | |
9 #include <map> | 8 #include <map> |
10 #include <set> | 9 #include <set> |
11 #include <string> | 10 #include <string> |
12 #include <vector> | 11 #include <vector> |
13 | 12 |
14 #include "base/basictypes.h" | 13 #include "base/basictypes.h" |
15 #include "extensions/common/matcher/string_pattern.h" | 14 #include "extensions/common/matcher/string_pattern.h" |
16 | 15 |
17 namespace extensions { | 16 namespace extensions { |
18 | 17 |
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
74 // text[i + k] at depth k + 1, we follow a failure edge. This edge | 73 // text[i + k] at depth k + 1, we follow a failure edge. This edge |
75 // corresponds to the longest proper suffix of text[i, ..., i + k - 1] that | 74 // corresponds to the longest proper suffix of text[i, ..., i + k - 1] that |
76 // is a prefix of any registered pattern. | 75 // is a prefix of any registered pattern. |
77 // | 76 // |
78 // If your brain thinks "Forget it, let's go shopping.", don't worry. | 77 // If your brain thinks "Forget it, let's go shopping.", don't worry. |
79 // Take a nap and read an introductory text on the Aho Corasick algorithm. | 78 // Take a nap and read an introductory text on the Aho Corasick algorithm. |
80 // It will make sense. Eventually. | 79 // It will make sense. Eventually. |
81 class AhoCorasickNode { | 80 class AhoCorasickNode { |
82 public: | 81 public: |
83 // Key: label of the edge, value: node index in |tree_| of parent class. | 82 // Key: label of the edge, value: node index in |tree_| of parent class. |
84 typedef std::map<char, uint32> Edges; | 83 typedef std::map<char, int> Edges; |
85 typedef std::set<StringPattern::ID> Matches; | 84 typedef std::set<StringPattern::ID> Matches; |
86 | 85 |
87 static const uint32 kNoSuchEdge; // Represents an invalid node index. | |
88 | |
89 AhoCorasickNode(); | 86 AhoCorasickNode(); |
90 ~AhoCorasickNode(); | 87 ~AhoCorasickNode(); |
91 AhoCorasickNode(const AhoCorasickNode& other); | 88 AhoCorasickNode(const AhoCorasickNode& other); |
92 AhoCorasickNode& operator=(const AhoCorasickNode& other); | 89 AhoCorasickNode& operator=(const AhoCorasickNode& other); |
93 | 90 |
94 uint32 GetEdge(char c) const; | 91 bool HasEdge(char c) const; |
95 void SetEdge(char c, uint32 node); | 92 int GetEdge(char c) const; |
| 93 void SetEdge(char c, int node); |
96 const Edges& edges() const { return edges_; } | 94 const Edges& edges() const { return edges_; } |
97 | 95 |
98 uint32 failure() const { return failure_; } | 96 int failure() const { return failure_; } |
99 void set_failure(uint32 failure) { failure_ = failure; } | 97 void set_failure(int failure) { failure_ = failure; } |
100 | 98 |
101 void AddMatch(StringPattern::ID id); | 99 void AddMatch(StringPattern::ID id); |
102 void AddMatches(const Matches& matches); | 100 void AddMatches(const Matches& matches); |
103 const Matches& matches() const { return matches_; } | 101 const Matches& matches() const { return matches_; } |
104 | 102 |
105 private: | 103 private: |
106 // Outgoing edges of current node. | 104 // Outgoing edges of current node. |
107 Edges edges_; | 105 Edges edges_; |
108 | 106 |
109 // Node index that failure edge leads to. | 107 // Node index that failure edge leads to. |
110 uint32 failure_; | 108 int failure_; |
111 | 109 |
112 // Identifiers of matches. | 110 // Identifiers of matches. |
113 Matches matches_; | 111 Matches matches_; |
114 }; | 112 }; |
115 | 113 |
116 typedef std::map<StringPattern::ID, const StringPattern*> SubstringPatternMap; | 114 void RebuildAhoCorasickTree(); |
117 typedef std::vector<const StringPattern*> SubstringPatternVector; | |
118 | |
119 // |sorted_patterns| is a copy of |patterns_| sorted by the pattern string. | |
120 void RebuildAhoCorasickTree(const SubstringPatternVector& sorted_patterns); | |
121 | 115 |
122 // Inserts a path for |pattern->pattern()| into the tree and adds | 116 // Inserts a path for |pattern->pattern()| into the tree and adds |
123 // |pattern->id()| to the set of matches. Ownership of |pattern| remains with | 117 // |pattern->id()| to the set of matches. Ownership of |pattern| remains with |
124 // the caller. | 118 // the caller. |
125 void InsertPatternIntoAhoCorasickTree(const StringPattern* pattern); | 119 void InsertPatternIntoAhoCorasickTree(const StringPattern* pattern); |
126 void CreateFailureEdges(); | 120 void CreateFailureEdges(); |
127 | 121 |
128 // Set of all registered StringPatterns. Used to regenerate the | 122 // Set of all registered StringPatterns. Used to regenerate the |
129 // Aho-Corasick tree in case patterns are registered or unregistered. | 123 // Aho-Corasick tree in case patterns are registered or unregistered. |
130 SubstringPatternMap patterns_; | 124 typedef std::map<StringPattern::ID, const StringPattern*> |
| 125 SubstringPatternSet; |
| 126 SubstringPatternSet patterns_; |
131 | 127 |
132 // The nodes of a Aho-Corasick tree. | 128 // The nodes of a Aho-Corasick tree. |
133 std::vector<AhoCorasickNode> tree_; | 129 std::vector<AhoCorasickNode> tree_; |
134 | 130 |
135 DISALLOW_COPY_AND_ASSIGN(SubstringSetMatcher); | 131 DISALLOW_COPY_AND_ASSIGN(SubstringSetMatcher); |
136 }; | 132 }; |
137 | 133 |
138 } // namespace extensions | 134 } // namespace extensions |
139 | 135 |
140 #endif // EXTENSIONS_COMMON_MATCHER_SUBSTRING_SET_MATCHER_H_ | 136 #endif // EXTENSIONS_COMMON_MATCHER_SUBSTRING_SET_MATCHER_H_ |
OLD | NEW |