Index: chrome/common/extensions/matcher/regex_set_matcher.cc |
diff --git a/chrome/common/extensions/matcher/regex_set_matcher.cc b/chrome/common/extensions/matcher/regex_set_matcher.cc |
new file mode 100644 |
index 0000000000000000000000000000000000000000..455bece0bd7c662ceb1a9683e3471f95fe362d36 |
--- /dev/null |
+++ b/chrome/common/extensions/matcher/regex_set_matcher.cc |
@@ -0,0 +1,109 @@ |
+// Copyright (c) 2012 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+#include "chrome/common/extensions/matcher/regex_set_matcher.h" |
+ |
+#include "base/logging.h" |
+#include "base/string_util.h" |
+#include "base/stl_util.h" |
+#include "chrome/common/extensions/matcher/substring_set_matcher.h" |
+#include "third_party/re2/re2/filtered_re2.h" |
+#include "third_party/re2/re2/re2.h" |
+ |
+namespace extensions { |
+ |
+RegexSetMatcher::RegexSetMatcher() {} |
+ |
+RegexSetMatcher::~RegexSetMatcher() { |
+ DeleteSubstringPatterns(); |
+} |
+ |
+void RegexSetMatcher::AddPatterns( |
+ const std::vector<const StringPattern*>& regex_list) { |
+ if (regex_list.empty()) |
+ return; |
+ for (size_t i = 0; i < regex_list.size(); ++i) { |
+ regexes_[regex_list[i]->id()] = regex_list[i]; |
+ } |
+ |
+ RebuildMatcher(); |
+} |
+ |
+void RegexSetMatcher::ClearPatterns() { |
+ regexes_.clear(); |
+ RebuildMatcher(); |
+} |
+ |
+bool RegexSetMatcher::Match(const std::string& text, |
+ std::set<StringPattern::ID>* matches) const { |
+ size_t old_number_of_matches = matches->size(); |
+ if (regexes_.empty()) |
+ return false; |
+ if (!filtered_re2_.get()) { |
+ LOG(ERROR) << "RegexSetMatcher was not initialized"; |
+ return false; |
+ } |
+ |
+ // FilteredRE2 expects lowercase for prefiltering, but we still |
+ // match case-sensitively. |
+ std::vector<RE2ID> atoms(FindSubstringMatches( |
+ StringToLowerASCII(text))); |
+ |
+ std::vector<RE2ID> re2_ids; |
+ filtered_re2_->AllMatches(text, atoms, &re2_ids); |
+ |
+ std::set<StringPattern::ID> matched_ids; |
+ for (size_t i = 0; i < re2_ids.size(); ++i) { |
+ StringPattern::ID id = re2_id_map_[re2_ids[i]]; |
+ matches->insert(id); |
+ } |
+ return old_number_of_matches != matches->size(); |
+} |
+ |
+std::vector<RegexSetMatcher::RE2ID> RegexSetMatcher::FindSubstringMatches( |
+ const std::string& text) const { |
+ std::set<int> atoms_set; |
+ substring_matcher_->Match(text, &atoms_set); |
+ return std::vector<RE2ID>(atoms_set.begin(), atoms_set.end()); |
+} |
+ |
+void RegexSetMatcher::RebuildMatcher() { |
+ re2_id_map_.clear(); |
+ filtered_re2_.reset(new re2::FilteredRE2()); |
+ if (regexes_.empty()) |
+ return; |
+ |
+ for (RegexMap::iterator it = regexes_.begin(); it != regexes_.end(); ++it) { |
+ RE2ID re2_id; |
+ RE2::ErrorCode error = filtered_re2_->Add( |
+ it->second->pattern(), RE2::DefaultOptions, &re2_id); |
+ if (error == RE2::NoError) { |
+ DCHECK_EQ(static_cast<RE2ID>(re2_id_map_.size()), re2_id); |
+ re2_id_map_.push_back(it->first); |
+ } else { |
+ // TODO(yoz): Return an unparseable regex error as soon as possible. |
+ LOG(ERROR) << "Could not parse regex (id=" << it->first << ", " |
+ << it->second->pattern() << ")"; |
+ } |
+ } |
+ |
+ std::vector<std::string> strings_to_match; |
+ filtered_re2_->Compile(&strings_to_match); |
+ |
+ substring_matcher_.reset(new SubstringSetMatcher); |
+ DeleteSubstringPatterns(); |
+ // Build SubstringSetMatcher from |strings_to_match|. |
+ // SubstringSetMatcher doesn't own its strings. |
+ for (size_t i = 0; i < strings_to_match.size(); ++i) { |
+ substring_patterns_.push_back( |
+ new StringPattern(strings_to_match[i], i)); |
+ } |
+ substring_matcher_->RegisterPatterns(substring_patterns_); |
+} |
+ |
+void RegexSetMatcher::DeleteSubstringPatterns() { |
+ STLDeleteElements(&substring_patterns_); |
+} |
+ |
+} // namespace extensions |