Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(7)

Unified Diff: trunk/src/extensions/common/matcher/substring_set_matcher.cc

Issue 15027007: Revert 198871 "Speed improvements to SubstringSetMatcher" (Closed) Base URL: svn://svn.chromium.org/chrome/
Patch Set: Created 7 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « trunk/src/extensions/common/matcher/substring_set_matcher.h ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: trunk/src/extensions/common/matcher/substring_set_matcher.cc
===================================================================
--- trunk/src/extensions/common/matcher/substring_set_matcher.cc (revision 198873)
+++ trunk/src/extensions/common/matcher/substring_set_matcher.cc (working copy)
@@ -4,7 +4,6 @@
#include "extensions/common/matcher/substring_set_matcher.h"
-#include <algorithm>
#include <queue>
#include "base/logging.h"
@@ -12,51 +11,12 @@
namespace extensions {
-namespace {
-
-// Compare StringPattern instances based on their string patterns.
-bool ComparePatterns(const StringPattern* a, const StringPattern* b) {
- return a->pattern() <= b->pattern();
-}
-
-// Given the set of patterns, compute how many nodes will the corresponding
-// Aho-Corasick tree have. Note that |patterns| need to be sorted.
-uint32 TreeSize(const std::vector<const StringPattern*>& patterns) {
- uint32 result = 1u; // 1 for the root node.
- if (patterns.empty())
- return result;
-
- std::vector<const StringPattern*>::const_iterator last = patterns.begin();
- std::vector<const StringPattern*>::const_iterator current = last + 1;
- // For the first pattern, each letter is a label of an edge to a new node.
- result += (*last)->pattern().size();
-
- // For the subsequent patterns, only count the edges which were not counted
- // yet. For this it suffices to test against the previous pattern, because the
- // patterns are sorted.
- for (; current != patterns.end(); ++last, ++current) {
- const std::string& last_pattern = (*last)->pattern();
- const std::string& current_pattern = (*current)->pattern();
- const uint32 prefix_bound =
- std::min(last_pattern.size(), current_pattern.size());
-
- uint32 common_prefix = 0;
- while (common_prefix < prefix_bound &&
- last_pattern[common_prefix] == current_pattern[common_prefix])
- ++common_prefix;
- result += current_pattern.size() - common_prefix;
- }
- return result;
-}
-
-} // namespace
-
//
// SubstringSetMatcher
//
SubstringSetMatcher::SubstringSetMatcher() {
- RebuildAhoCorasickTree(SubstringPatternVector());
+ RebuildAhoCorasickTree();
}
SubstringSetMatcher::~SubstringSetMatcher() {}
@@ -89,44 +49,27 @@
patterns_.erase((*i)->id());
}
- // Now we compute the total number of tree nodes needed.
- SubstringPatternVector sorted_patterns;
- sorted_patterns.resize(patterns_.size());
-
- size_t next = 0;
- for (SubstringPatternMap::const_iterator i = patterns_.begin();
- i != patterns_.end();
- ++i, ++next) {
- sorted_patterns[next] = i->second;
- }
-
- std::sort(sorted_patterns.begin(), sorted_patterns.end(), ComparePatterns);
- tree_.reserve(TreeSize(sorted_patterns));
-
- RebuildAhoCorasickTree(sorted_patterns);
+ RebuildAhoCorasickTree();
}
bool SubstringSetMatcher::Match(const std::string& text,
std::set<StringPattern::ID>* matches) const {
- const size_t old_number_of_matches = matches->size();
+ size_t old_number_of_matches = matches->size();
// Handle patterns matching the empty string.
matches->insert(tree_[0].matches().begin(), tree_[0].matches().end());
- uint32 current_node = 0;
- for (std::string::const_iterator i = text.begin(); i != text.end(); ++i) {
- uint32 edge_from_current = tree_[current_node].GetEdge(*i);
- while (edge_from_current == AhoCorasickNode::kNoSuchEdge &&
- current_node != 0) {
+ int current_node = 0;
+ size_t text_length = text.length();
+ for (size_t i = 0; i < text_length; ++i) {
+ while (!tree_[current_node].HasEdge(text[i]) && current_node != 0)
current_node = tree_[current_node].failure();
- edge_from_current = tree_[current_node].GetEdge(*i);
- }
- if (edge_from_current != AhoCorasickNode::kNoSuchEdge) {
- current_node = edge_from_current;
+ if (tree_[current_node].HasEdge(text[i])) {
+ current_node = tree_[current_node].GetEdge(text[i]);
matches->insert(tree_[current_node].matches().begin(),
tree_[current_node].matches().end());
} else {
- DCHECK_EQ(0u, current_node);
+ DCHECK_EQ(0, current_node);
}
}
@@ -138,8 +81,7 @@
return patterns_.empty() && tree_.size() == 1u;
}
-void SubstringSetMatcher::RebuildAhoCorasickTree(
- const SubstringPatternVector& sorted_patterns) {
+void SubstringSetMatcher::RebuildAhoCorasickTree() {
tree_.clear();
// Initialize root note of tree.
@@ -148,10 +90,9 @@
tree_.push_back(root);
// Insert all patterns.
- for (SubstringPatternVector::const_iterator i = sorted_patterns.begin();
- i != sorted_patterns.end();
- ++i) {
- InsertPatternIntoAhoCorasickTree(*i);
+ for (SubstringPatternSet::const_iterator i = patterns_.begin();
+ i != patterns_.end(); ++i) {
+ InsertPatternIntoAhoCorasickTree(i->second);
}
CreateFailureEdges();
@@ -160,27 +101,26 @@
void SubstringSetMatcher::InsertPatternIntoAhoCorasickTree(
const StringPattern* pattern) {
const std::string& text = pattern->pattern();
- const std::string::const_iterator text_end = text.end();
+ size_t text_length = text.length();
// Iterators on the tree and the text.
- uint32 current_node = 0;
- std::string::const_iterator i = text.begin();
+ int current_node = 0;
+ size_t text_pos = 0;
// Follow existing paths for as long as possible.
- uint32 edge_from_current = tree_[current_node].GetEdge(*i);
- while (i != text_end &&
- edge_from_current != AhoCorasickNode::kNoSuchEdge) {
- current_node = edge_from_current;
- ++i;
- edge_from_current = tree_[current_node].GetEdge(*i);
+ while (text_pos < text_length &&
+ tree_[current_node].HasEdge(text[text_pos])) {
+ current_node = tree_[current_node].GetEdge(text[text_pos]);
+ ++text_pos;
}
// Create new nodes if necessary.
- while (i != text_end) {
- tree_.push_back(AhoCorasickNode());
- tree_[current_node].SetEdge(*i, tree_.size() - 1);
+ while (text_pos < text_length) {
+ AhoCorasickNode new_node;
+ tree_.push_back(new_node);
+ tree_[current_node].SetEdge(text[text_pos], tree_.size() - 1);
current_node = tree_.size() - 1;
- ++i;
+ ++text_pos;
}
// Register match.
@@ -190,14 +130,14 @@
void SubstringSetMatcher::CreateFailureEdges() {
typedef AhoCorasickNode::Edges Edges;
- std::queue<uint32> queue;
+ std::queue<int> queue;
AhoCorasickNode& root = tree_[0];
root.set_failure(0);
- const Edges& root_edges = root.edges();
+ const AhoCorasickNode::Edges& root_edges = root.edges();
for (Edges::const_iterator e = root_edges.begin(); e != root_edges.end();
++e) {
- const uint32& leads_to = e->second;
+ int leads_to = e->second;
tree_[leads_to].set_failure(0);
queue.push(leads_to);
}
@@ -207,32 +147,24 @@
queue.pop();
for (Edges::const_iterator e = current_node.edges().begin();
e != current_node.edges().end(); ++e) {
- const char& edge_label = e->first;
- const uint32& leads_to = e->second;
+ char edge_label = e->first;
+ int leads_to = e->second;
queue.push(leads_to);
- uint32 failure = current_node.failure();
- uint32 edge_from_failure = tree_[failure].GetEdge(edge_label);
- while (edge_from_failure == AhoCorasickNode::kNoSuchEdge &&
- failure != 0) {
+ int failure = current_node.failure();
+ while (!tree_[failure].HasEdge(edge_label) && failure != 0)
failure = tree_[failure].failure();
- edge_from_failure = tree_[failure].GetEdge(edge_label);
- }
- const uint32 follow_in_case_of_failure =
- edge_from_failure != AhoCorasickNode::kNoSuchEdge
- ? edge_from_failure
- : 0;
+ int follow_in_case_of_failure = tree_[failure].HasEdge(e->first) ?
+ tree_[failure].GetEdge(edge_label) : 0;
tree_[leads_to].set_failure(follow_in_case_of_failure);
tree_[leads_to].AddMatches(tree_[follow_in_case_of_failure].matches());
}
}
}
-const uint32 SubstringSetMatcher::AhoCorasickNode::kNoSuchEdge = ~0;
-
SubstringSetMatcher::AhoCorasickNode::AhoCorasickNode()
- : failure_(kNoSuchEdge) {}
+ : failure_(-1) {}
SubstringSetMatcher::AhoCorasickNode::~AhoCorasickNode() {}
@@ -251,12 +183,16 @@
return *this;
}
-uint32 SubstringSetMatcher::AhoCorasickNode::GetEdge(char c) const {
- Edges::const_iterator i = edges_.find(c);
- return i == edges_.end() ? kNoSuchEdge : i->second;
+bool SubstringSetMatcher::AhoCorasickNode::HasEdge(char c) const {
+ return edges_.find(c) != edges_.end();
}
-void SubstringSetMatcher::AhoCorasickNode::SetEdge(char c, uint32 node) {
+int SubstringSetMatcher::AhoCorasickNode::GetEdge(char c) const {
+ std::map<char, int>::const_iterator i = edges_.find(c);
+ return i == edges_.end() ? -1 : i->second;
+}
+
+void SubstringSetMatcher::AhoCorasickNode::SetEdge(char c, int node) {
edges_[c] = node;
}
« no previous file with comments | « trunk/src/extensions/common/matcher/substring_set_matcher.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698