| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef CHROME_COMMON_EXTENSIONS_MATCHER_SUBSTRING_SET_MATCHER_H_ | 5 #ifndef CHROME_COMMON_EXTENSIONS_MATCHER_SUBSTRING_SET_MATCHER_H_ |
| 6 #define CHROME_COMMON_EXTENSIONS_MATCHER_SUBSTRING_SET_MATCHER_H_ | 6 #define CHROME_COMMON_EXTENSIONS_MATCHER_SUBSTRING_SET_MATCHER_H_ |
| 7 | 7 |
| 8 #include <map> | 8 #include <map> |
| 9 #include <set> | 9 #include <set> |
| 10 #include <string> | 10 #include <string> |
| 11 #include <vector> | 11 #include <vector> |
| 12 | 12 |
| 13 #include "base/basictypes.h" | 13 #include "base/basictypes.h" |
| 14 #include "chrome/common/extensions/matcher/string_pattern.h" |
| 14 | 15 |
| 15 namespace extensions { | 16 namespace extensions { |
| 16 | 17 |
| 17 // An individual pattern of the SubstringSetMatcher. A pattern consists of | |
| 18 // a string (interpreted as individual bytes, no character encoding) and an | |
| 19 // identifier. | |
| 20 // Each pattern that matches a string emits its ID to the caller of | |
| 21 // SubstringSetMatcher::Match(). This helps the caller to figure out what | |
| 22 // patterns matched a string. All patterns registered to a SubstringSetMatcher | |
| 23 // need to contain unique IDs. | |
| 24 class SubstringPattern { | |
| 25 public: | |
| 26 typedef int ID; | |
| 27 | |
| 28 SubstringPattern(const std::string& pattern, ID id); | |
| 29 ~SubstringPattern(); | |
| 30 const std::string& pattern() const { return pattern_; } | |
| 31 ID id() const { return id_; } | |
| 32 | |
| 33 bool operator<(const SubstringPattern& rhs) const; | |
| 34 | |
| 35 private: | |
| 36 std::string pattern_; | |
| 37 ID id_; | |
| 38 | |
| 39 DISALLOW_COPY_AND_ASSIGN(SubstringPattern); | |
| 40 }; | |
| 41 | |
| 42 // Class that store a set of string patterns and can find for a string S, | 18 // Class that store a set of string patterns and can find for a string S, |
| 43 // which string patterns occur in S. | 19 // which string patterns occur in S. |
| 44 class SubstringSetMatcher { | 20 class SubstringSetMatcher { |
| 45 public: | 21 public: |
| 46 SubstringSetMatcher(); | 22 SubstringSetMatcher(); |
| 47 ~SubstringSetMatcher(); | 23 ~SubstringSetMatcher(); |
| 48 | 24 |
| 49 // Registers all |patterns|. The ownership remains with the caller. | 25 // Registers all |patterns|. The ownership remains with the caller. |
| 50 // The same pattern cannot be registered twice and each pattern needs to have | 26 // The same pattern cannot be registered twice and each pattern needs to have |
| 51 // a unique ID. | 27 // a unique ID. |
| 52 // Ownership of the patterns remains with the caller. | 28 // Ownership of the patterns remains with the caller. |
| 53 void RegisterPatterns(const std::vector<const SubstringPattern*>& patterns); | 29 void RegisterPatterns(const std::vector<const StringPattern*>& patterns); |
| 54 | 30 |
| 55 // Unregisters the passed |patterns|. | 31 // Unregisters the passed |patterns|. |
| 56 void UnregisterPatterns(const std::vector<const SubstringPattern*>& patterns); | 32 void UnregisterPatterns(const std::vector<const StringPattern*>& patterns); |
| 57 | 33 |
| 58 // Analogous to RegisterPatterns and UnregisterPatterns but executes both | 34 // Analogous to RegisterPatterns and UnregisterPatterns but executes both |
| 59 // operations in one step, which is cheaper in the execution. | 35 // operations in one step, which is cheaper in the execution. |
| 60 void RegisterAndUnregisterPatterns( | 36 void RegisterAndUnregisterPatterns( |
| 61 const std::vector<const SubstringPattern*>& to_register, | 37 const std::vector<const StringPattern*>& to_register, |
| 62 const std::vector<const SubstringPattern*>& to_unregister); | 38 const std::vector<const StringPattern*>& to_unregister); |
| 63 | 39 |
| 64 // Matches |text| against all registered SubstringPatterns. Stores the IDs | 40 // Matches |text| against all registered StringPatterns. Stores the IDs |
| 65 // of matching patterns in |matches|. |matches| is not cleared before adding | 41 // of matching patterns in |matches|. |matches| is not cleared before adding |
| 66 // to it. | 42 // to it. |
| 67 bool Match(const std::string& text, | 43 bool Match(const std::string& text, |
| 68 std::set<SubstringPattern::ID>* matches) const; | 44 std::set<StringPattern::ID>* matches) const; |
| 69 | 45 |
| 70 // Returns true if this object retains no allocated data. Only for debugging. | 46 // Returns true if this object retains no allocated data. Only for debugging. |
| 71 bool IsEmpty() const; | 47 bool IsEmpty() const; |
| 72 | 48 |
| 73 private: | 49 private: |
| 74 // A node of an Aho Corasick Tree. This is implemented according to | 50 // A node of an Aho Corasick Tree. This is implemented according to |
| 75 // http://www.cs.uku.fi/~kilpelai/BSA05/lectures/slides04.pdf | 51 // http://www.cs.uku.fi/~kilpelai/BSA05/lectures/slides04.pdf |
| 76 // | 52 // |
| 77 // The algorithm is based on the idea of building a trie of all registered | 53 // The algorithm is based on the idea of building a trie of all registered |
| 78 // patterns. Each node of the tree is annotated with a set of pattern | 54 // patterns. Each node of the tree is annotated with a set of pattern |
| (...skipping 19 matching lines...) Expand all Loading... |
| 98 // corresponds to the longest proper suffix of text[i, ..., i + k - 1] that | 74 // corresponds to the longest proper suffix of text[i, ..., i + k - 1] that |
| 99 // is a prefix of any registered pattern. | 75 // is a prefix of any registered pattern. |
| 100 // | 76 // |
| 101 // If your brain thinks "Forget it, let's go shopping.", don't worry. | 77 // If your brain thinks "Forget it, let's go shopping.", don't worry. |
| 102 // Take a nap and read an introductory text on the Aho Corasick algorithm. | 78 // Take a nap and read an introductory text on the Aho Corasick algorithm. |
| 103 // It will make sense. Eventually. | 79 // It will make sense. Eventually. |
| 104 class AhoCorasickNode { | 80 class AhoCorasickNode { |
| 105 public: | 81 public: |
| 106 // Key: label of the edge, value: node index in |tree_| of parent class. | 82 // Key: label of the edge, value: node index in |tree_| of parent class. |
| 107 typedef std::map<char, int> Edges; | 83 typedef std::map<char, int> Edges; |
| 108 typedef std::set<SubstringPattern::ID> Matches; | 84 typedef std::set<StringPattern::ID> Matches; |
| 109 | 85 |
| 110 AhoCorasickNode(); | 86 AhoCorasickNode(); |
| 111 ~AhoCorasickNode(); | 87 ~AhoCorasickNode(); |
| 112 AhoCorasickNode(const AhoCorasickNode& other); | 88 AhoCorasickNode(const AhoCorasickNode& other); |
| 113 AhoCorasickNode& operator=(const AhoCorasickNode& other); | 89 AhoCorasickNode& operator=(const AhoCorasickNode& other); |
| 114 | 90 |
| 115 bool HasEdge(char c) const; | 91 bool HasEdge(char c) const; |
| 116 int GetEdge(char c) const; | 92 int GetEdge(char c) const; |
| 117 void SetEdge(char c, int node); | 93 void SetEdge(char c, int node); |
| 118 const Edges& edges() const { return edges_; } | 94 const Edges& edges() const { return edges_; } |
| 119 | 95 |
| 120 int failure() const { return failure_; } | 96 int failure() const { return failure_; } |
| 121 void set_failure(int failure) { failure_ = failure; } | 97 void set_failure(int failure) { failure_ = failure; } |
| 122 | 98 |
| 123 void AddMatch(SubstringPattern::ID id); | 99 void AddMatch(StringPattern::ID id); |
| 124 void AddMatches(const Matches& matches); | 100 void AddMatches(const Matches& matches); |
| 125 const Matches& matches() const { return matches_; } | 101 const Matches& matches() const { return matches_; } |
| 126 | 102 |
| 127 private: | 103 private: |
| 128 // Outgoing edges of current node. | 104 // Outgoing edges of current node. |
| 129 Edges edges_; | 105 Edges edges_; |
| 130 | 106 |
| 131 // Node index that failure edge leads to. | 107 // Node index that failure edge leads to. |
| 132 int failure_; | 108 int failure_; |
| 133 | 109 |
| 134 // Identifiers of matches. | 110 // Identifiers of matches. |
| 135 Matches matches_; | 111 Matches matches_; |
| 136 }; | 112 }; |
| 137 | 113 |
| 138 void RebuildAhoCorasickTree(); | 114 void RebuildAhoCorasickTree(); |
| 139 | 115 |
| 140 // Inserts a path for |pattern->pattern()| into the tree and adds | 116 // Inserts a path for |pattern->pattern()| into the tree and adds |
| 141 // |pattern->id()| to the set of matches. Ownership of |pattern| remains with | 117 // |pattern->id()| to the set of matches. Ownership of |pattern| remains with |
| 142 // the caller. | 118 // the caller. |
| 143 void InsertPatternIntoAhoCorasickTree(const SubstringPattern* pattern); | 119 void InsertPatternIntoAhoCorasickTree(const StringPattern* pattern); |
| 144 void CreateFailureEdges(); | 120 void CreateFailureEdges(); |
| 145 | 121 |
| 146 // Set of all registered SubstringPatterns. Used to regenerate the | 122 // Set of all registered StringPatterns. Used to regenerate the |
| 147 // Aho-Corasick tree in case patterns are registered or unregistered. | 123 // Aho-Corasick tree in case patterns are registered or unregistered. |
| 148 typedef std::map<SubstringPattern::ID, const SubstringPattern*> | 124 typedef std::map<StringPattern::ID, const StringPattern*> |
| 149 SubstringPatternSet; | 125 SubstringPatternSet; |
| 150 SubstringPatternSet patterns_; | 126 SubstringPatternSet patterns_; |
| 151 | 127 |
| 152 // The nodes of a Aho-Corasick tree. | 128 // The nodes of a Aho-Corasick tree. |
| 153 std::vector<AhoCorasickNode> tree_; | 129 std::vector<AhoCorasickNode> tree_; |
| 154 | 130 |
| 155 DISALLOW_COPY_AND_ASSIGN(SubstringSetMatcher); | 131 DISALLOW_COPY_AND_ASSIGN(SubstringSetMatcher); |
| 156 }; | 132 }; |
| 157 | 133 |
| 158 } // namespace extensions | 134 } // namespace extensions |
| 159 | 135 |
| 160 #endif // CHROME_COMMON_EXTENSIONS_MATCHER_SUBSTRING_SET_MATCHER_H_ | 136 #endif // CHROME_COMMON_EXTENSIONS_MATCHER_SUBSTRING_SET_MATCHER_H_ |
| OLD | NEW |