| OLD | NEW |
| (Empty) |
| 1 // Copyright 2009 The RE2 Authors. All Rights Reserved. | |
| 2 // Use of this source code is governed by a BSD-style | |
| 3 // license that can be found in the LICENSE file. | |
| 4 | |
| 5 // The class FilteredRE2 is used as a wrapper to multiple RE2 regexps. | |
| 6 // It provides a prefilter mechanism that helps in cutting down the | |
| 7 // number of regexps that need to be actually searched. | |
| 8 // | |
| 9 // By design, it does not include a string matching engine. This is to | |
| 10 // allow the user of the class to use their favorite string match | |
| 11 // engine. The overall flow is: Add all the regexps using Add, then | |
| 12 // Compile the FilteredRE2. The compile returns strings that need to | |
| 13 // be matched. Note that all returned strings are lowercase. For | |
| 14 // applying regexps to a search text, the caller does the string | |
| 15 // matching using the strings returned. When doing the string match, | |
| 16 // note that the caller has to do that on lower cased version of the | |
| 17 // search text. Then call FirstMatch or AllMatches with a vector of | |
| 18 // indices of strings that were found in the text to get the actual | |
| 19 // regexp matches. | |
| 20 | |
| 21 #ifndef RE2_FILTERED_RE2_H_ | |
| 22 #define RE2_FILTERED_RE2_H_ | |
| 23 | |
| 24 #include <vector> | |
| 25 #include "re2/re2.h" | |
| 26 | |
| 27 namespace re2 { | |
| 28 using std::vector; | |
| 29 | |
| 30 class PrefilterTree; | |
| 31 | |
| 32 class FilteredRE2 { | |
| 33 public: | |
| 34 FilteredRE2(); | |
| 35 ~FilteredRE2(); | |
| 36 | |
| 37 // Uses RE2 constructor to create a RE2 object (re). Returns | |
| 38 // re->error_code(). If error_code is other than NoError, then re is | |
| 39 // deleted and not added to re2_vec_. | |
| 40 RE2::ErrorCode Add(const StringPiece& pattern, | |
| 41 const RE2::Options& options, | |
| 42 int *id); | |
| 43 | |
| 44 // Prepares the regexps added by Add for filtering. Returns a set | |
| 45 // of strings that the caller should check for in candidate texts. | |
| 46 // The returned strings are lowercased. When doing string matching, | |
| 47 // the search text should be lowercased first to find matching | |
| 48 // strings from the set of strings returned by Compile. Call after | |
| 49 // all Add calls are done. | |
| 50 void Compile(vector<string>* strings_to_match); | |
| 51 | |
| 52 // Returns the index of the first matching regexp. | |
| 53 // Returns -1 on no match. Can be called prior to Compile. | |
| 54 // Does not do any filtering: simply tries to Match the | |
| 55 // regexps in a loop. | |
| 56 int SlowFirstMatch(const StringPiece& text) const; | |
| 57 | |
| 58 // Returns the index of the first matching regexp. | |
| 59 // Returns -1 on no match. Compile has to be called before | |
| 60 // calling this. | |
| 61 int FirstMatch(const StringPiece& text, | |
| 62 const vector<int>& atoms) const; | |
| 63 | |
| 64 // Returns the indices of all matching regexps, after first clearing | |
| 65 // matched_regexps. | |
| 66 bool AllMatches(const StringPiece& text, | |
| 67 const vector<int>& atoms, | |
| 68 vector<int>* matching_regexps) const; | |
| 69 | |
| 70 // Returns the indices of all potentially matching regexps after first | |
| 71 // clearing potential_regexps. | |
| 72 // A regexp is potentially matching if it passes the filter. | |
| 73 // If a regexp passes the filter it may still not match. | |
| 74 // A regexp that does not pass the filter is guaranteed to not match. | |
| 75 void AllPotentials(const vector<int>& atoms, | |
| 76 vector<int>* potential_regexps) const; | |
| 77 | |
| 78 // The number of regexps added. | |
| 79 int NumRegexps() const { return static_cast<int>(re2_vec_.size()); } | |
| 80 | |
| 81 private: | |
| 82 | |
| 83 // Get the individual RE2 objects. Useful for testing. | |
| 84 RE2* GetRE2(int regexpid) const { return re2_vec_[regexpid]; } | |
| 85 | |
| 86 // Print prefilter. | |
| 87 void PrintPrefilter(int regexpid); | |
| 88 | |
| 89 // Useful for testing and debugging. | |
| 90 void RegexpsGivenStrings(const vector<int>& matched_atoms, | |
| 91 vector<int>* passed_regexps); | |
| 92 | |
| 93 // All the regexps in the FilteredRE2. | |
| 94 vector<RE2*> re2_vec_; | |
| 95 | |
| 96 // Has the FilteredRE2 been compiled using Compile() | |
| 97 bool compiled_; | |
| 98 | |
| 99 // An AND-OR tree of string atoms used for filtering regexps. | |
| 100 PrefilterTree* prefilter_tree_; | |
| 101 | |
| 102 //DISALLOW_COPY_AND_ASSIGN(FilteredRE2); | |
| 103 FilteredRE2(const FilteredRE2&); | |
| 104 void operator=(const FilteredRE2&); | |
| 105 }; | |
| 106 | |
| 107 } // namespace re2 | |
| 108 | |
| 109 #endif // RE2_FILTERED_RE2_H_ | |
| OLD | NEW |