| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/common/extensions/matcher/substring_set_matcher.h" | 5 #include "chrome/common/extensions/matcher/substring_set_matcher.h" |
| 6 | 6 |
| 7 #include <queue> | 7 #include <queue> |
| 8 | 8 |
| 9 #include "base/logging.h" | 9 #include "base/logging.h" |
| 10 #include "base/stl_util.h" | 10 #include "base/stl_util.h" |
| 11 | 11 |
| 12 namespace extensions { | 12 namespace extensions { |
| 13 | 13 |
| 14 // | 14 // |
| 15 // SubstringPattern | |
| 16 // | |
| 17 | |
| 18 SubstringPattern::SubstringPattern(const std::string& pattern, | |
| 19 SubstringPattern::ID id) | |
| 20 : pattern_(pattern), id_(id) {} | |
| 21 | |
| 22 SubstringPattern::~SubstringPattern() {} | |
| 23 | |
| 24 bool SubstringPattern::operator<(const SubstringPattern& rhs) const { | |
| 25 if (id_ < rhs.id_) return true; | |
| 26 if (id_ > rhs.id_) return false; | |
| 27 return pattern_ < rhs.pattern_; | |
| 28 } | |
| 29 | |
| 30 // | |
| 31 // SubstringSetMatcher | 15 // SubstringSetMatcher |
| 32 // | 16 // |
| 33 | 17 |
| 34 SubstringSetMatcher::SubstringSetMatcher() { | 18 SubstringSetMatcher::SubstringSetMatcher() { |
| 35 RebuildAhoCorasickTree(); | 19 RebuildAhoCorasickTree(); |
| 36 } | 20 } |
| 37 | 21 |
| 38 SubstringSetMatcher::~SubstringSetMatcher() {} | 22 SubstringSetMatcher::~SubstringSetMatcher() {} |
| 39 | 23 |
| 40 void SubstringSetMatcher::RegisterPatterns( | 24 void SubstringSetMatcher::RegisterPatterns( |
| 41 const std::vector<const SubstringPattern*>& patterns) { | 25 const std::vector<const StringPattern*>& patterns) { |
| 42 RegisterAndUnregisterPatterns(patterns, | 26 RegisterAndUnregisterPatterns(patterns, |
| 43 std::vector<const SubstringPattern*>()); | 27 std::vector<const StringPattern*>()); |
| 44 } | 28 } |
| 45 | 29 |
| 46 void SubstringSetMatcher::UnregisterPatterns( | 30 void SubstringSetMatcher::UnregisterPatterns( |
| 47 const std::vector<const SubstringPattern*>& patterns) { | 31 const std::vector<const StringPattern*>& patterns) { |
| 48 RegisterAndUnregisterPatterns(std::vector<const SubstringPattern*>(), | 32 RegisterAndUnregisterPatterns(std::vector<const StringPattern*>(), |
| 49 patterns); | 33 patterns); |
| 50 } | 34 } |
| 51 | 35 |
| 52 void SubstringSetMatcher::RegisterAndUnregisterPatterns( | 36 void SubstringSetMatcher::RegisterAndUnregisterPatterns( |
| 53 const std::vector<const SubstringPattern*>& to_register, | 37 const std::vector<const StringPattern*>& to_register, |
| 54 const std::vector<const SubstringPattern*>& to_unregister) { | 38 const std::vector<const StringPattern*>& to_unregister) { |
| 55 // Register patterns. | 39 // Register patterns. |
| 56 for (std::vector<const SubstringPattern*>::const_iterator i = | 40 for (std::vector<const StringPattern*>::const_iterator i = |
| 57 to_register.begin(); i != to_register.end(); ++i) { | 41 to_register.begin(); i != to_register.end(); ++i) { |
| 58 DCHECK(patterns_.find((*i)->id()) == patterns_.end()); | 42 DCHECK(patterns_.find((*i)->id()) == patterns_.end()); |
| 59 patterns_[(*i)->id()] = *i; | 43 patterns_[(*i)->id()] = *i; |
| 60 } | 44 } |
| 61 | 45 |
| 62 // Unregister patterns | 46 // Unregister patterns |
| 63 for (std::vector<const SubstringPattern*>::const_iterator i = | 47 for (std::vector<const StringPattern*>::const_iterator i = |
| 64 to_unregister.begin(); i != to_unregister.end(); ++i) { | 48 to_unregister.begin(); i != to_unregister.end(); ++i) { |
| 65 patterns_.erase((*i)->id()); | 49 patterns_.erase((*i)->id()); |
| 66 } | 50 } |
| 67 | 51 |
| 68 RebuildAhoCorasickTree(); | 52 RebuildAhoCorasickTree(); |
| 69 } | 53 } |
| 70 | 54 |
| 71 bool SubstringSetMatcher::Match(const std::string& text, | 55 bool SubstringSetMatcher::Match(const std::string& text, |
| 72 std::set<SubstringPattern::ID>* matches) const { | 56 std::set<StringPattern::ID>* matches) const { |
| 73 size_t old_number_of_matches = matches->size(); | 57 size_t old_number_of_matches = matches->size(); |
| 74 | 58 |
| 75 // Handle patterns matching the empty string. | 59 // Handle patterns matching the empty string. |
| 76 matches->insert(tree_[0].matches().begin(), tree_[0].matches().end()); | 60 matches->insert(tree_[0].matches().begin(), tree_[0].matches().end()); |
| 77 | 61 |
| 78 int current_node = 0; | 62 int current_node = 0; |
| 79 size_t text_length = text.length(); | 63 size_t text_length = text.length(); |
| 80 for (size_t i = 0; i < text_length; ++i) { | 64 for (size_t i = 0; i < text_length; ++i) { |
| 81 while (!tree_[current_node].HasEdge(text[i]) && current_node != 0) | 65 while (!tree_[current_node].HasEdge(text[i]) && current_node != 0) |
| 82 current_node = tree_[current_node].failure(); | 66 current_node = tree_[current_node].failure(); |
| (...skipping 25 matching lines...) Expand all Loading... |
| 108 // Insert all patterns. | 92 // Insert all patterns. |
| 109 for (SubstringPatternSet::const_iterator i = patterns_.begin(); | 93 for (SubstringPatternSet::const_iterator i = patterns_.begin(); |
| 110 i != patterns_.end(); ++i) { | 94 i != patterns_.end(); ++i) { |
| 111 InsertPatternIntoAhoCorasickTree(i->second); | 95 InsertPatternIntoAhoCorasickTree(i->second); |
| 112 } | 96 } |
| 113 | 97 |
| 114 CreateFailureEdges(); | 98 CreateFailureEdges(); |
| 115 } | 99 } |
| 116 | 100 |
| 117 void SubstringSetMatcher::InsertPatternIntoAhoCorasickTree( | 101 void SubstringSetMatcher::InsertPatternIntoAhoCorasickTree( |
| 118 const SubstringPattern* pattern) { | 102 const StringPattern* pattern) { |
| 119 const std::string& text = pattern->pattern(); | 103 const std::string& text = pattern->pattern(); |
| 120 size_t text_length = text.length(); | 104 size_t text_length = text.length(); |
| 121 | 105 |
| 122 // Iterators on the tree and the text. | 106 // Iterators on the tree and the text. |
| 123 int current_node = 0; | 107 int current_node = 0; |
| 124 size_t text_pos = 0; | 108 size_t text_pos = 0; |
| 125 | 109 |
| 126 // Follow existing paths for as long as possible. | 110 // Follow existing paths for as long as possible. |
| 127 while (text_pos < text_length && | 111 while (text_pos < text_length && |
| 128 tree_[current_node].HasEdge(text[text_pos])) { | 112 tree_[current_node].HasEdge(text[text_pos])) { |
| (...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 205 | 189 |
| 206 int SubstringSetMatcher::AhoCorasickNode::GetEdge(char c) const { | 190 int SubstringSetMatcher::AhoCorasickNode::GetEdge(char c) const { |
| 207 std::map<char, int>::const_iterator i = edges_.find(c); | 191 std::map<char, int>::const_iterator i = edges_.find(c); |
| 208 return i == edges_.end() ? -1 : i->second; | 192 return i == edges_.end() ? -1 : i->second; |
| 209 } | 193 } |
| 210 | 194 |
| 211 void SubstringSetMatcher::AhoCorasickNode::SetEdge(char c, int node) { | 195 void SubstringSetMatcher::AhoCorasickNode::SetEdge(char c, int node) { |
| 212 edges_[c] = node; | 196 edges_[c] = node; |
| 213 } | 197 } |
| 214 | 198 |
| 215 void SubstringSetMatcher::AhoCorasickNode::AddMatch(SubstringPattern::ID id) { | 199 void SubstringSetMatcher::AhoCorasickNode::AddMatch(StringPattern::ID id) { |
| 216 matches_.insert(id); | 200 matches_.insert(id); |
| 217 } | 201 } |
| 218 | 202 |
| 219 void SubstringSetMatcher::AhoCorasickNode::AddMatches( | 203 void SubstringSetMatcher::AhoCorasickNode::AddMatches( |
| 220 const SubstringSetMatcher::AhoCorasickNode::Matches& matches) { | 204 const SubstringSetMatcher::AhoCorasickNode::Matches& matches) { |
| 221 matches_.insert(matches.begin(), matches.end()); | 205 matches_.insert(matches.begin(), matches.end()); |
| 222 } | 206 } |
| 223 | 207 |
| 224 } // namespace extensions | 208 } // namespace extensions |
| OLD | NEW |