| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef EXTENSIONS_COMMON_MATCHER_SUBSTRING_SET_MATCHER_H_ | 5 #ifndef EXTENSIONS_COMMON_MATCHER_SUBSTRING_SET_MATCHER_H_ |
| 6 #define EXTENSIONS_COMMON_MATCHER_SUBSTRING_SET_MATCHER_H_ | 6 #define EXTENSIONS_COMMON_MATCHER_SUBSTRING_SET_MATCHER_H_ |
| 7 | 7 |
| 8 #include <limits> | |
| 9 #include <map> | 8 #include <map> |
| 10 #include <set> | 9 #include <set> |
| 11 #include <string> | 10 #include <string> |
| 12 #include <vector> | 11 #include <vector> |
| 13 | 12 |
| 14 #include "base/basictypes.h" | 13 #include "base/basictypes.h" |
| 15 #include "extensions/common/matcher/string_pattern.h" | 14 #include "extensions/common/matcher/string_pattern.h" |
| 16 | 15 |
| 17 namespace extensions { | 16 namespace extensions { |
| 18 | 17 |
| (...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 74 // text[i + k] at depth k + 1, we follow a failure edge. This edge | 73 // text[i + k] at depth k + 1, we follow a failure edge. This edge |
| 75 // corresponds to the longest proper suffix of text[i, ..., i + k - 1] that | 74 // corresponds to the longest proper suffix of text[i, ..., i + k - 1] that |
| 76 // is a prefix of any registered pattern. | 75 // is a prefix of any registered pattern. |
| 77 // | 76 // |
| 78 // If your brain thinks "Forget it, let's go shopping.", don't worry. | 77 // If your brain thinks "Forget it, let's go shopping.", don't worry. |
| 79 // Take a nap and read an introductory text on the Aho Corasick algorithm. | 78 // Take a nap and read an introductory text on the Aho Corasick algorithm. |
| 80 // It will make sense. Eventually. | 79 // It will make sense. Eventually. |
| 81 class AhoCorasickNode { | 80 class AhoCorasickNode { |
| 82 public: | 81 public: |
| 83 // Key: label of the edge, value: node index in |tree_| of parent class. | 82 // Key: label of the edge, value: node index in |tree_| of parent class. |
| 84 typedef std::map<char, uint32> Edges; | 83 typedef std::map<char, int> Edges; |
| 85 typedef std::set<StringPattern::ID> Matches; | 84 typedef std::set<StringPattern::ID> Matches; |
| 86 | 85 |
| 87 static const uint32 kNoSuchEdge; // Represents an invalid node index. | |
| 88 | |
| 89 AhoCorasickNode(); | 86 AhoCorasickNode(); |
| 90 ~AhoCorasickNode(); | 87 ~AhoCorasickNode(); |
| 91 AhoCorasickNode(const AhoCorasickNode& other); | 88 AhoCorasickNode(const AhoCorasickNode& other); |
| 92 AhoCorasickNode& operator=(const AhoCorasickNode& other); | 89 AhoCorasickNode& operator=(const AhoCorasickNode& other); |
| 93 | 90 |
| 94 uint32 GetEdge(char c) const; | 91 bool HasEdge(char c) const; |
| 95 void SetEdge(char c, uint32 node); | 92 int GetEdge(char c) const; |
| 93 void SetEdge(char c, int node); |
| 96 const Edges& edges() const { return edges_; } | 94 const Edges& edges() const { return edges_; } |
| 97 | 95 |
| 98 uint32 failure() const { return failure_; } | 96 int failure() const { return failure_; } |
| 99 void set_failure(uint32 failure) { failure_ = failure; } | 97 void set_failure(int failure) { failure_ = failure; } |
| 100 | 98 |
| 101 void AddMatch(StringPattern::ID id); | 99 void AddMatch(StringPattern::ID id); |
| 102 void AddMatches(const Matches& matches); | 100 void AddMatches(const Matches& matches); |
| 103 const Matches& matches() const { return matches_; } | 101 const Matches& matches() const { return matches_; } |
| 104 | 102 |
| 105 private: | 103 private: |
| 106 // Outgoing edges of current node. | 104 // Outgoing edges of current node. |
| 107 Edges edges_; | 105 Edges edges_; |
| 108 | 106 |
| 109 // Node index that failure edge leads to. | 107 // Node index that failure edge leads to. |
| 110 uint32 failure_; | 108 int failure_; |
| 111 | 109 |
| 112 // Identifiers of matches. | 110 // Identifiers of matches. |
| 113 Matches matches_; | 111 Matches matches_; |
| 114 }; | 112 }; |
| 115 | 113 |
| 116 typedef std::map<StringPattern::ID, const StringPattern*> SubstringPatternMap; | 114 void RebuildAhoCorasickTree(); |
| 117 typedef std::vector<const StringPattern*> SubstringPatternVector; | |
| 118 | |
| 119 // |sorted_patterns| is a copy of |patterns_| sorted by the pattern string. | |
| 120 void RebuildAhoCorasickTree(const SubstringPatternVector& sorted_patterns); | |
| 121 | 115 |
| 122 // Inserts a path for |pattern->pattern()| into the tree and adds | 116 // Inserts a path for |pattern->pattern()| into the tree and adds |
| 123 // |pattern->id()| to the set of matches. Ownership of |pattern| remains with | 117 // |pattern->id()| to the set of matches. Ownership of |pattern| remains with |
| 124 // the caller. | 118 // the caller. |
| 125 void InsertPatternIntoAhoCorasickTree(const StringPattern* pattern); | 119 void InsertPatternIntoAhoCorasickTree(const StringPattern* pattern); |
| 126 void CreateFailureEdges(); | 120 void CreateFailureEdges(); |
| 127 | 121 |
| 128 // Set of all registered StringPatterns. Used to regenerate the | 122 // Set of all registered StringPatterns. Used to regenerate the |
| 129 // Aho-Corasick tree in case patterns are registered or unregistered. | 123 // Aho-Corasick tree in case patterns are registered or unregistered. |
| 130 SubstringPatternMap patterns_; | 124 typedef std::map<StringPattern::ID, const StringPattern*> |
| 125 SubstringPatternSet; |
| 126 SubstringPatternSet patterns_; |
| 131 | 127 |
| 132 // The nodes of a Aho-Corasick tree. | 128 // The nodes of a Aho-Corasick tree. |
| 133 std::vector<AhoCorasickNode> tree_; | 129 std::vector<AhoCorasickNode> tree_; |
| 134 | 130 |
| 135 DISALLOW_COPY_AND_ASSIGN(SubstringSetMatcher); | 131 DISALLOW_COPY_AND_ASSIGN(SubstringSetMatcher); |
| 136 }; | 132 }; |
| 137 | 133 |
| 138 } // namespace extensions | 134 } // namespace extensions |
| 139 | 135 |
| 140 #endif // EXTENSIONS_COMMON_MATCHER_SUBSTRING_SET_MATCHER_H_ | 136 #endif // EXTENSIONS_COMMON_MATCHER_SUBSTRING_SET_MATCHER_H_ |
| OLD | NEW |