OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "chrome/common/extensions/matcher/regex_set_matcher.h" |
| 6 |
| 7 #include "base/logging.h" |
| 8 #include "base/string_util.h" |
| 9 #include "base/stl_util.h" |
| 10 #include "chrome/common/extensions/matcher/substring_set_matcher.h" |
| 11 #include "third_party/re2/re2/filtered_re2.h" |
| 12 #include "third_party/re2/re2/re2.h" |
| 13 |
| 14 namespace extensions { |
| 15 |
| 16 RegexSetMatcher::RegexSetMatcher() {} |
| 17 |
| 18 RegexSetMatcher::~RegexSetMatcher() { |
| 19 DeleteSubstringPatterns(); |
| 20 } |
| 21 |
| 22 void RegexSetMatcher::AddPatterns( |
| 23 const std::vector<const StringPattern*>& regex_list) { |
| 24 if (regex_list.empty()) |
| 25 return; |
| 26 for (size_t i = 0; i < regex_list.size(); ++i) { |
| 27 regexes_[regex_list[i]->id()] = regex_list[i]; |
| 28 } |
| 29 |
| 30 RebuildMatcher(); |
| 31 } |
| 32 |
| 33 void RegexSetMatcher::ClearPatterns() { |
| 34 regexes_.clear(); |
| 35 RebuildMatcher(); |
| 36 } |
| 37 |
| 38 bool RegexSetMatcher::Match(const std::string& text, |
| 39 std::set<StringPattern::ID>* matches) const { |
| 40 size_t old_number_of_matches = matches->size(); |
| 41 if (regexes_.empty()) |
| 42 return false; |
| 43 if (!filtered_re2_.get()) { |
| 44 LOG(ERROR) << "RegexSetMatcher was not initialized"; |
| 45 return false; |
| 46 } |
| 47 |
| 48 // FilteredRE2 expects lowercase for prefiltering, but we still |
| 49 // match case-sensitively. |
| 50 std::vector<RE2ID> atoms(FindSubstringMatches( |
| 51 StringToLowerASCII(text))); |
| 52 |
| 53 std::vector<RE2ID> re2_ids; |
| 54 filtered_re2_->AllMatches(text, atoms, &re2_ids); |
| 55 |
| 56 std::set<StringPattern::ID> matched_ids; |
| 57 for (size_t i = 0; i < re2_ids.size(); ++i) { |
| 58 StringPattern::ID id = re2_id_map_[re2_ids[i]]; |
| 59 matches->insert(id); |
| 60 } |
| 61 return old_number_of_matches != matches->size(); |
| 62 } |
| 63 |
| 64 std::vector<RegexSetMatcher::RE2ID> RegexSetMatcher::FindSubstringMatches( |
| 65 const std::string& text) const { |
| 66 std::set<int> atoms_set; |
| 67 substring_matcher_->Match(text, &atoms_set); |
| 68 return std::vector<RE2ID>(atoms_set.begin(), atoms_set.end()); |
| 69 } |
| 70 |
| 71 void RegexSetMatcher::RebuildMatcher() { |
| 72 re2_id_map_.clear(); |
| 73 filtered_re2_.reset(new re2::FilteredRE2()); |
| 74 if (regexes_.empty()) |
| 75 return; |
| 76 |
| 77 for (RegexMap::iterator it = regexes_.begin(); it != regexes_.end(); ++it) { |
| 78 RE2ID re2_id; |
| 79 RE2::ErrorCode error = filtered_re2_->Add( |
| 80 it->second->pattern(), RE2::DefaultOptions, &re2_id); |
| 81 if (error == RE2::NoError) { |
| 82 DCHECK_EQ(static_cast<RE2ID>(re2_id_map_.size()), re2_id); |
| 83 re2_id_map_.push_back(it->first); |
| 84 } else { |
| 85 // TODO(yoz): Return an unparseable regex error as soon as possible. |
| 86 LOG(ERROR) << "Could not parse regex (id=" << it->first << ", " |
| 87 << it->second->pattern() << ")"; |
| 88 } |
| 89 } |
| 90 |
| 91 std::vector<std::string> strings_to_match; |
| 92 filtered_re2_->Compile(&strings_to_match); |
| 93 |
| 94 substring_matcher_.reset(new SubstringSetMatcher); |
| 95 DeleteSubstringPatterns(); |
| 96 // Build SubstringSetMatcher from |strings_to_match|. |
| 97 // SubstringSetMatcher doesn't own its strings. |
| 98 for (size_t i = 0; i < strings_to_match.size(); ++i) { |
| 99 substring_patterns_.push_back( |
| 100 new StringPattern(strings_to_match[i], i)); |
| 101 } |
| 102 substring_matcher_->RegisterPatterns(substring_patterns_); |
| 103 } |
| 104 |
| 105 void RegexSetMatcher::DeleteSubstringPatterns() { |
| 106 STLDeleteElements(&substring_patterns_); |
| 107 } |
| 108 |
| 109 } // namespace extensions |
OLD | NEW |