OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/common/extensions/matcher/substring_set_matcher.h" | 5 #include "chrome/common/extensions/matcher/substring_set_matcher.h" |
6 | 6 |
7 #include <queue> | 7 #include <queue> |
8 | 8 |
9 #include "base/logging.h" | 9 #include "base/logging.h" |
10 #include "base/stl_util.h" | 10 #include "base/stl_util.h" |
11 | 11 |
12 namespace extensions { | 12 namespace extensions { |
13 | 13 |
14 // | 14 // |
15 // SubstringPattern | |
16 // | |
17 | |
18 SubstringPattern::SubstringPattern(const std::string& pattern, | |
19 SubstringPattern::ID id) | |
20 : pattern_(pattern), id_(id) {} | |
21 | |
22 SubstringPattern::~SubstringPattern() {} | |
23 | |
24 bool SubstringPattern::operator<(const SubstringPattern& rhs) const { | |
25 if (id_ < rhs.id_) return true; | |
26 if (id_ > rhs.id_) return false; | |
27 return pattern_ < rhs.pattern_; | |
28 } | |
29 | |
30 // | |
31 // SubstringSetMatcher | 15 // SubstringSetMatcher |
32 // | 16 // |
33 | 17 |
34 SubstringSetMatcher::SubstringSetMatcher() { | 18 SubstringSetMatcher::SubstringSetMatcher() { |
35 RebuildAhoCorasickTree(); | 19 RebuildAhoCorasickTree(); |
36 } | 20 } |
37 | 21 |
38 SubstringSetMatcher::~SubstringSetMatcher() {} | 22 SubstringSetMatcher::~SubstringSetMatcher() {} |
39 | 23 |
40 void SubstringSetMatcher::RegisterPatterns( | 24 void SubstringSetMatcher::RegisterPatterns( |
41 const std::vector<const SubstringPattern*>& patterns) { | 25 const std::vector<const StringPattern*>& patterns) { |
42 RegisterAndUnregisterPatterns(patterns, | 26 RegisterAndUnregisterPatterns(patterns, |
43 std::vector<const SubstringPattern*>()); | 27 std::vector<const StringPattern*>()); |
44 } | 28 } |
45 | 29 |
46 void SubstringSetMatcher::UnregisterPatterns( | 30 void SubstringSetMatcher::UnregisterPatterns( |
47 const std::vector<const SubstringPattern*>& patterns) { | 31 const std::vector<const StringPattern*>& patterns) { |
48 RegisterAndUnregisterPatterns(std::vector<const SubstringPattern*>(), | 32 RegisterAndUnregisterPatterns(std::vector<const StringPattern*>(), |
49 patterns); | 33 patterns); |
50 } | 34 } |
51 | 35 |
52 void SubstringSetMatcher::RegisterAndUnregisterPatterns( | 36 void SubstringSetMatcher::RegisterAndUnregisterPatterns( |
53 const std::vector<const SubstringPattern*>& to_register, | 37 const std::vector<const StringPattern*>& to_register, |
54 const std::vector<const SubstringPattern*>& to_unregister) { | 38 const std::vector<const StringPattern*>& to_unregister) { |
55 // Register patterns. | 39 // Register patterns. |
56 for (std::vector<const SubstringPattern*>::const_iterator i = | 40 for (std::vector<const StringPattern*>::const_iterator i = |
57 to_register.begin(); i != to_register.end(); ++i) { | 41 to_register.begin(); i != to_register.end(); ++i) { |
58 DCHECK(patterns_.find((*i)->id()) == patterns_.end()); | 42 DCHECK(patterns_.find((*i)->id()) == patterns_.end()); |
59 patterns_[(*i)->id()] = *i; | 43 patterns_[(*i)->id()] = *i; |
60 } | 44 } |
61 | 45 |
62 // Unregister patterns | 46 // Unregister patterns |
63 for (std::vector<const SubstringPattern*>::const_iterator i = | 47 for (std::vector<const StringPattern*>::const_iterator i = |
64 to_unregister.begin(); i != to_unregister.end(); ++i) { | 48 to_unregister.begin(); i != to_unregister.end(); ++i) { |
65 patterns_.erase((*i)->id()); | 49 patterns_.erase((*i)->id()); |
66 } | 50 } |
67 | 51 |
68 RebuildAhoCorasickTree(); | 52 RebuildAhoCorasickTree(); |
69 } | 53 } |
70 | 54 |
71 bool SubstringSetMatcher::Match(const std::string& text, | 55 bool SubstringSetMatcher::Match(const std::string& text, |
72 std::set<SubstringPattern::ID>* matches) const { | 56 std::set<StringPattern::ID>* matches) const { |
73 size_t old_number_of_matches = matches->size(); | 57 size_t old_number_of_matches = matches->size(); |
74 | 58 |
75 // Handle patterns matching the empty string. | 59 // Handle patterns matching the empty string. |
76 matches->insert(tree_[0].matches().begin(), tree_[0].matches().end()); | 60 matches->insert(tree_[0].matches().begin(), tree_[0].matches().end()); |
77 | 61 |
78 int current_node = 0; | 62 int current_node = 0; |
79 size_t text_length = text.length(); | 63 size_t text_length = text.length(); |
80 for (size_t i = 0; i < text_length; ++i) { | 64 for (size_t i = 0; i < text_length; ++i) { |
81 while (!tree_[current_node].HasEdge(text[i]) && current_node != 0) | 65 while (!tree_[current_node].HasEdge(text[i]) && current_node != 0) |
82 current_node = tree_[current_node].failure(); | 66 current_node = tree_[current_node].failure(); |
(...skipping 25 matching lines...) Expand all Loading... |
108 // Insert all patterns. | 92 // Insert all patterns. |
109 for (SubstringPatternSet::const_iterator i = patterns_.begin(); | 93 for (SubstringPatternSet::const_iterator i = patterns_.begin(); |
110 i != patterns_.end(); ++i) { | 94 i != patterns_.end(); ++i) { |
111 InsertPatternIntoAhoCorasickTree(i->second); | 95 InsertPatternIntoAhoCorasickTree(i->second); |
112 } | 96 } |
113 | 97 |
114 CreateFailureEdges(); | 98 CreateFailureEdges(); |
115 } | 99 } |
116 | 100 |
117 void SubstringSetMatcher::InsertPatternIntoAhoCorasickTree( | 101 void SubstringSetMatcher::InsertPatternIntoAhoCorasickTree( |
118 const SubstringPattern* pattern) { | 102 const StringPattern* pattern) { |
119 const std::string& text = pattern->pattern(); | 103 const std::string& text = pattern->pattern(); |
120 size_t text_length = text.length(); | 104 size_t text_length = text.length(); |
121 | 105 |
122 // Iterators on the tree and the text. | 106 // Iterators on the tree and the text. |
123 int current_node = 0; | 107 int current_node = 0; |
124 size_t text_pos = 0; | 108 size_t text_pos = 0; |
125 | 109 |
126 // Follow existing paths for as long as possible. | 110 // Follow existing paths for as long as possible. |
127 while (text_pos < text_length && | 111 while (text_pos < text_length && |
128 tree_[current_node].HasEdge(text[text_pos])) { | 112 tree_[current_node].HasEdge(text[text_pos])) { |
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
205 | 189 |
206 int SubstringSetMatcher::AhoCorasickNode::GetEdge(char c) const { | 190 int SubstringSetMatcher::AhoCorasickNode::GetEdge(char c) const { |
207 std::map<char, int>::const_iterator i = edges_.find(c); | 191 std::map<char, int>::const_iterator i = edges_.find(c); |
208 return i == edges_.end() ? -1 : i->second; | 192 return i == edges_.end() ? -1 : i->second; |
209 } | 193 } |
210 | 194 |
211 void SubstringSetMatcher::AhoCorasickNode::SetEdge(char c, int node) { | 195 void SubstringSetMatcher::AhoCorasickNode::SetEdge(char c, int node) { |
212 edges_[c] = node; | 196 edges_[c] = node; |
213 } | 197 } |
214 | 198 |
215 void SubstringSetMatcher::AhoCorasickNode::AddMatch(SubstringPattern::ID id) { | 199 void SubstringSetMatcher::AhoCorasickNode::AddMatch(StringPattern::ID id) { |
216 matches_.insert(id); | 200 matches_.insert(id); |
217 } | 201 } |
218 | 202 |
219 void SubstringSetMatcher::AhoCorasickNode::AddMatches( | 203 void SubstringSetMatcher::AhoCorasickNode::AddMatches( |
220 const SubstringSetMatcher::AhoCorasickNode::Matches& matches) { | 204 const SubstringSetMatcher::AhoCorasickNode::Matches& matches) { |
221 matches_.insert(matches.begin(), matches.end()); | 205 matches_.insert(matches.begin(), matches.end()); |
222 } | 206 } |
223 | 207 |
224 } // namespace extensions | 208 } // namespace extensions |
OLD | NEW |