| OLD | NEW |
| 1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef COMPONENTS_URL_MATCHER_SUBSTRING_SET_MATCHER_H_ | 5 #ifndef COMPONENTS_URL_MATCHER_SUBSTRING_SET_MATCHER_H_ |
| 6 #define COMPONENTS_URL_MATCHER_SUBSTRING_SET_MATCHER_H_ | 6 #define COMPONENTS_URL_MATCHER_SUBSTRING_SET_MATCHER_H_ |
| 7 | 7 |
| 8 #include <stdint.h> |
| 9 |
| 8 #include <limits> | 10 #include <limits> |
| 9 #include <map> | 11 #include <map> |
| 10 #include <set> | 12 #include <set> |
| 11 #include <string> | 13 #include <string> |
| 12 #include <vector> | 14 #include <vector> |
| 13 | 15 |
| 14 #include "base/basictypes.h" | 16 #include "base/macros.h" |
| 15 #include "components/url_matcher/string_pattern.h" | 17 #include "components/url_matcher/string_pattern.h" |
| 16 #include "components/url_matcher/url_matcher_export.h" | 18 #include "components/url_matcher/url_matcher_export.h" |
| 17 | 19 |
| 18 namespace url_matcher { | 20 namespace url_matcher { |
| 19 | 21 |
| 20 // Class that store a set of string patterns and can find for a string S, | 22 // Class that store a set of string patterns and can find for a string S, |
| 21 // which string patterns occur in S. | 23 // which string patterns occur in S. |
| 22 class URL_MATCHER_EXPORT SubstringSetMatcher { | 24 class URL_MATCHER_EXPORT SubstringSetMatcher { |
| 23 public: | 25 public: |
| 24 SubstringSetMatcher(); | 26 SubstringSetMatcher(); |
| (...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 75 // text[i + k] at depth k + 1, we follow a failure edge. This edge | 77 // text[i + k] at depth k + 1, we follow a failure edge. This edge |
| 76 // corresponds to the longest proper suffix of text[i, ..., i + k - 1] that | 78 // corresponds to the longest proper suffix of text[i, ..., i + k - 1] that |
| 77 // is a prefix of any registered pattern. | 79 // is a prefix of any registered pattern. |
| 78 // | 80 // |
| 79 // If your brain thinks "Forget it, let's go shopping.", don't worry. | 81 // If your brain thinks "Forget it, let's go shopping.", don't worry. |
| 80 // Take a nap and read an introductory text on the Aho Corasick algorithm. | 82 // Take a nap and read an introductory text on the Aho Corasick algorithm. |
| 81 // It will make sense. Eventually. | 83 // It will make sense. Eventually. |
| 82 class AhoCorasickNode { | 84 class AhoCorasickNode { |
| 83 public: | 85 public: |
| 84 // Key: label of the edge, value: node index in |tree_| of parent class. | 86 // Key: label of the edge, value: node index in |tree_| of parent class. |
| 85 typedef std::map<char, uint32> Edges; | 87 typedef std::map<char, uint32_t> Edges; |
| 86 typedef std::set<StringPattern::ID> Matches; | 88 typedef std::set<StringPattern::ID> Matches; |
| 87 | 89 |
| 88 static const uint32 kNoSuchEdge; // Represents an invalid node index. | 90 static const uint32_t kNoSuchEdge; // Represents an invalid node index. |
| 89 | 91 |
| 90 AhoCorasickNode(); | 92 AhoCorasickNode(); |
| 91 ~AhoCorasickNode(); | 93 ~AhoCorasickNode(); |
| 92 AhoCorasickNode(const AhoCorasickNode& other); | 94 AhoCorasickNode(const AhoCorasickNode& other); |
| 93 AhoCorasickNode& operator=(const AhoCorasickNode& other); | 95 AhoCorasickNode& operator=(const AhoCorasickNode& other); |
| 94 | 96 |
| 95 uint32 GetEdge(char c) const; | 97 uint32_t GetEdge(char c) const; |
| 96 void SetEdge(char c, uint32 node); | 98 void SetEdge(char c, uint32_t node); |
| 97 const Edges& edges() const { return edges_; } | 99 const Edges& edges() const { return edges_; } |
| 98 | 100 |
| 99 uint32 failure() const { return failure_; } | 101 uint32_t failure() const { return failure_; } |
| 100 void set_failure(uint32 failure) { failure_ = failure; } | 102 void set_failure(uint32_t failure) { failure_ = failure; } |
| 101 | 103 |
| 102 void AddMatch(StringPattern::ID id); | 104 void AddMatch(StringPattern::ID id); |
| 103 void AddMatches(const Matches& matches); | 105 void AddMatches(const Matches& matches); |
| 104 const Matches& matches() const { return matches_; } | 106 const Matches& matches() const { return matches_; } |
| 105 | 107 |
| 106 private: | 108 private: |
| 107 // Outgoing edges of current node. | 109 // Outgoing edges of current node. |
| 108 Edges edges_; | 110 Edges edges_; |
| 109 | 111 |
| 110 // Node index that failure edge leads to. | 112 // Node index that failure edge leads to. |
| 111 uint32 failure_; | 113 uint32_t failure_; |
| 112 | 114 |
| 113 // Identifiers of matches. | 115 // Identifiers of matches. |
| 114 Matches matches_; | 116 Matches matches_; |
| 115 }; | 117 }; |
| 116 | 118 |
| 117 typedef std::map<StringPattern::ID, const StringPattern*> SubstringPatternMap; | 119 typedef std::map<StringPattern::ID, const StringPattern*> SubstringPatternMap; |
| 118 typedef std::vector<const StringPattern*> SubstringPatternVector; | 120 typedef std::vector<const StringPattern*> SubstringPatternVector; |
| 119 | 121 |
| 120 // |sorted_patterns| is a copy of |patterns_| sorted by the pattern string. | 122 // |sorted_patterns| is a copy of |patterns_| sorted by the pattern string. |
| 121 void RebuildAhoCorasickTree(const SubstringPatternVector& sorted_patterns); | 123 void RebuildAhoCorasickTree(const SubstringPatternVector& sorted_patterns); |
| (...skipping 10 matching lines...) Expand all Loading... |
| 132 | 134 |
| 133 // The nodes of a Aho-Corasick tree. | 135 // The nodes of a Aho-Corasick tree. |
| 134 std::vector<AhoCorasickNode> tree_; | 136 std::vector<AhoCorasickNode> tree_; |
| 135 | 137 |
| 136 DISALLOW_COPY_AND_ASSIGN(SubstringSetMatcher); | 138 DISALLOW_COPY_AND_ASSIGN(SubstringSetMatcher); |
| 137 }; | 139 }; |
| 138 | 140 |
| 139 } // namespace url_matcher | 141 } // namespace url_matcher |
| 140 | 142 |
| 141 #endif // COMPONENTS_URL_MATCHER_SUBSTRING_SET_MATCHER_H_ | 143 #endif // COMPONENTS_URL_MATCHER_SUBSTRING_SET_MATCHER_H_ |
| OLD | NEW |