OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef EXTENSIONS_COMMON_MATCHER_SUBSTRING_SET_MATCHER_H_ | 5 #ifndef EXTENSIONS_COMMON_MATCHER_SUBSTRING_SET_MATCHER_H_ |
6 #define EXTENSIONS_COMMON_MATCHER_SUBSTRING_SET_MATCHER_H_ | 6 #define EXTENSIONS_COMMON_MATCHER_SUBSTRING_SET_MATCHER_H_ |
7 | 7 |
| 8 #include <limits> |
8 #include <map> | 9 #include <map> |
9 #include <set> | 10 #include <set> |
10 #include <string> | 11 #include <string> |
11 #include <vector> | 12 #include <vector> |
12 | 13 |
13 #include "base/basictypes.h" | 14 #include "base/basictypes.h" |
14 #include "extensions/common/matcher/string_pattern.h" | 15 #include "extensions/common/matcher/string_pattern.h" |
15 | 16 |
16 namespace extensions { | 17 namespace extensions { |
17 | 18 |
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
73 // text[i + k] at depth k + 1, we follow a failure edge. This edge | 74 // text[i + k] at depth k + 1, we follow a failure edge. This edge |
74 // corresponds to the longest proper suffix of text[i, ..., i + k - 1] that | 75 // corresponds to the longest proper suffix of text[i, ..., i + k - 1] that |
75 // is a prefix of any registered pattern. | 76 // is a prefix of any registered pattern. |
76 // | 77 // |
77 // If your brain thinks "Forget it, let's go shopping.", don't worry. | 78 // If your brain thinks "Forget it, let's go shopping.", don't worry. |
78 // Take a nap and read an introductory text on the Aho Corasick algorithm. | 79 // Take a nap and read an introductory text on the Aho Corasick algorithm. |
79 // It will make sense. Eventually. | 80 // It will make sense. Eventually. |
80 class AhoCorasickNode { | 81 class AhoCorasickNode { |
81 public: | 82 public: |
82 // Key: label of the edge, value: node index in |tree_| of parent class. | 83 // Key: label of the edge, value: node index in |tree_| of parent class. |
83 typedef std::map<char, int> Edges; | 84 typedef std::map<char, uint32> Edges; |
84 typedef std::set<StringPattern::ID> Matches; | 85 typedef std::set<StringPattern::ID> Matches; |
85 | 86 |
| 87 static const uint32 kNoSuchEdge; // Represents an invalid node index. |
| 88 |
86 AhoCorasickNode(); | 89 AhoCorasickNode(); |
87 ~AhoCorasickNode(); | 90 ~AhoCorasickNode(); |
88 AhoCorasickNode(const AhoCorasickNode& other); | 91 AhoCorasickNode(const AhoCorasickNode& other); |
89 AhoCorasickNode& operator=(const AhoCorasickNode& other); | 92 AhoCorasickNode& operator=(const AhoCorasickNode& other); |
90 | 93 |
91 bool HasEdge(char c) const; | 94 uint32 GetEdge(char c) const; |
92 int GetEdge(char c) const; | 95 void SetEdge(char c, uint32 node); |
93 void SetEdge(char c, int node); | |
94 const Edges& edges() const { return edges_; } | 96 const Edges& edges() const { return edges_; } |
95 | 97 |
96 int failure() const { return failure_; } | 98 uint32 failure() const { return failure_; } |
97 void set_failure(int failure) { failure_ = failure; } | 99 void set_failure(uint32 failure) { failure_ = failure; } |
98 | 100 |
99 void AddMatch(StringPattern::ID id); | 101 void AddMatch(StringPattern::ID id); |
100 void AddMatches(const Matches& matches); | 102 void AddMatches(const Matches& matches); |
101 const Matches& matches() const { return matches_; } | 103 const Matches& matches() const { return matches_; } |
102 | 104 |
103 private: | 105 private: |
104 // Outgoing edges of current node. | 106 // Outgoing edges of current node. |
105 Edges edges_; | 107 Edges edges_; |
106 | 108 |
107 // Node index that failure edge leads to. | 109 // Node index that failure edge leads to. |
108 int failure_; | 110 uint32 failure_; |
109 | 111 |
110 // Identifiers of matches. | 112 // Identifiers of matches. |
111 Matches matches_; | 113 Matches matches_; |
112 }; | 114 }; |
113 | 115 |
114 void RebuildAhoCorasickTree(); | 116 typedef std::map<StringPattern::ID, const StringPattern*> SubstringPatternMap; |
| 117 typedef std::vector<const StringPattern*> SubstringPatternVector; |
| 118 |
| 119 // |sorted_patterns| is a copy of |patterns_| sorted by the pattern string. |
| 120 void RebuildAhoCorasickTree(const SubstringPatternVector& sorted_patterns); |
115 | 121 |
116 // Inserts a path for |pattern->pattern()| into the tree and adds | 122 // Inserts a path for |pattern->pattern()| into the tree and adds |
117 // |pattern->id()| to the set of matches. Ownership of |pattern| remains with | 123 // |pattern->id()| to the set of matches. Ownership of |pattern| remains with |
118 // the caller. | 124 // the caller. |
119 void InsertPatternIntoAhoCorasickTree(const StringPattern* pattern); | 125 void InsertPatternIntoAhoCorasickTree(const StringPattern* pattern); |
120 void CreateFailureEdges(); | 126 void CreateFailureEdges(); |
121 | 127 |
122 // Set of all registered StringPatterns. Used to regenerate the | 128 // Set of all registered StringPatterns. Used to regenerate the |
123 // Aho-Corasick tree in case patterns are registered or unregistered. | 129 // Aho-Corasick tree in case patterns are registered or unregistered. |
124 typedef std::map<StringPattern::ID, const StringPattern*> | 130 SubstringPatternMap patterns_; |
125 SubstringPatternSet; | |
126 SubstringPatternSet patterns_; | |
127 | 131 |
128 // The nodes of a Aho-Corasick tree. | 132 // The nodes of a Aho-Corasick tree. |
129 std::vector<AhoCorasickNode> tree_; | 133 std::vector<AhoCorasickNode> tree_; |
130 | 134 |
131 DISALLOW_COPY_AND_ASSIGN(SubstringSetMatcher); | 135 DISALLOW_COPY_AND_ASSIGN(SubstringSetMatcher); |
132 }; | 136 }; |
133 | 137 |
134 } // namespace extensions | 138 } // namespace extensions |
135 | 139 |
136 #endif // EXTENSIONS_COMMON_MATCHER_SUBSTRING_SET_MATCHER_H_ | 140 #endif // EXTENSIONS_COMMON_MATCHER_SUBSTRING_SET_MATCHER_H_ |
OLD | NEW |