| OLD | NEW |
| (Empty) |
| 1 // Copyright 2009 The RE2 Authors. All Rights Reserved. | |
| 2 // Use of this source code is governed by a BSD-style | |
| 3 // license that can be found in the LICENSE file. | |
| 4 | |
| 5 #include "util/test.h" | |
| 6 #include "re2/filtered_re2.h" | |
| 7 #include "re2/re2.h" | |
| 8 | |
| 9 DECLARE_int32(filtered_re2_min_atom_len); // From prefilter_tree.cc | |
| 10 | |
| 11 namespace re2 { | |
| 12 | |
| 13 struct FilterTestVars { | |
| 14 vector<string> atoms; | |
| 15 vector<int> atom_indices; | |
| 16 vector<int> matches; | |
| 17 RE2::Options opts; | |
| 18 FilteredRE2 f; | |
| 19 }; | |
| 20 | |
| 21 TEST(FilteredRE2Test, EmptyTest) { | |
| 22 FilterTestVars v; | |
| 23 v.f.AllMatches("foo", v.atom_indices, &v.matches); | |
| 24 EXPECT_EQ(0, v.matches.size()); | |
| 25 } | |
| 26 | |
| 27 TEST(FilteredRE2Test, SmallOrTest) { | |
| 28 FLAGS_filtered_re2_min_atom_len = 4; | |
| 29 | |
| 30 FilterTestVars v; | |
| 31 int id; | |
| 32 v.f.Add("(foo|bar)", v.opts, &id); | |
| 33 | |
| 34 v.f.Compile(&v.atoms); | |
| 35 EXPECT_EQ(0, v.atoms.size()); | |
| 36 | |
| 37 v.f.AllMatches("lemurs bar", v.atom_indices, &v.matches); | |
| 38 EXPECT_EQ(1, v.matches.size()); | |
| 39 EXPECT_EQ(id, v.matches[0]); | |
| 40 } | |
| 41 | |
| 42 TEST(FilteredRE2Test, SmallLatinTest) { | |
| 43 FLAGS_filtered_re2_min_atom_len = 3; | |
| 44 FilterTestVars v; | |
| 45 int id; | |
| 46 | |
| 47 v.opts.set_encoding(RE2::Options::EncodingLatin1); | |
| 48 v.f.Add("\xde\xadQ\xbe\xef", v.opts, &id); | |
| 49 v.f.Compile(&v.atoms); | |
| 50 EXPECT_EQ(1, v.atoms.size()); | |
| 51 EXPECT_EQ(v.atoms[0], "\xde\xadq\xbe\xef"); | |
| 52 | |
| 53 v.atom_indices.push_back(0); | |
| 54 v.f.AllMatches("foo\xde\xadQ\xbe\xeflemur", v.atom_indices, &v.matches); | |
| 55 EXPECT_EQ(1, v.matches.size()); | |
| 56 EXPECT_EQ(id, v.matches[0]); | |
| 57 } | |
| 58 | |
| 59 struct AtomTest { | |
| 60 const char* testname; | |
| 61 // If any test needs more than this many regexps or atoms, increase | |
| 62 // the size of the corresponding array. | |
| 63 const char* regexps[20]; | |
| 64 const char* atoms[20]; | |
| 65 }; | |
| 66 | |
| 67 AtomTest atom_tests[] = { | |
| 68 { | |
| 69 // This test checks to make sure empty patterns are allowed. | |
| 70 "CheckEmptyPattern", | |
| 71 {""}, | |
| 72 {} | |
| 73 }, { | |
| 74 // This test checks that all atoms of length greater than min length | |
| 75 // are found, and no atoms that are of smaller length are found. | |
| 76 "AllAtomsGtMinLengthFound", { | |
| 77 "(abc123|def456|ghi789).*mnop[x-z]+", | |
| 78 "abc..yyy..zz", | |
| 79 "mnmnpp[a-z]+PPP" | |
| 80 }, { | |
| 81 "abc123", | |
| 82 "def456", | |
| 83 "ghi789", | |
| 84 "mnop", | |
| 85 "abc", | |
| 86 "yyy", | |
| 87 "mnmnpp", | |
| 88 "ppp" | |
| 89 } | |
| 90 }, { | |
| 91 // Test to make sure that any atoms that have another atom as a | |
| 92 // substring in an OR are removed; that is, only the shortest | |
| 93 // substring is kept. | |
| 94 "SubstrAtomRemovesSuperStrInOr", { | |
| 95 "(abc123|abc|ghi789|abc1234).*[x-z]+", | |
| 96 "abcd..yyy..yyyzzz", | |
| 97 "mnmnpp[a-z]+PPP" | |
| 98 }, { | |
| 99 "abc", | |
| 100 "ghi789", | |
| 101 "abcd", | |
| 102 "yyy", | |
| 103 "yyyzzz", | |
| 104 "mnmnpp", | |
| 105 "ppp" | |
| 106 } | |
| 107 }, { | |
| 108 // Test character class expansion. | |
| 109 "CharClassExpansion", { | |
| 110 "m[a-c][d-f]n.*[x-z]+", | |
| 111 "[x-y]bcde[ab]" | |
| 112 }, { | |
| 113 "madn", "maen", "mafn", | |
| 114 "mbdn", "mben", "mbfn", | |
| 115 "mcdn", "mcen", "mcfn", | |
| 116 "xbcdea", "xbcdeb", | |
| 117 "ybcdea", "ybcdeb" | |
| 118 } | |
| 119 }, { | |
| 120 // Test upper/lower of non-ASCII. | |
| 121 "UnicodeLower", { | |
| 122 "(?i)ΔδΠϖπΣςσ", | |
| 123 "ΛΜΝΟΠ", | |
| 124 "ψρστυ", | |
| 125 }, { | |
| 126 "δδπππσσσ", | |
| 127 "λμνοπ", | |
| 128 "ψρστυ", | |
| 129 }, | |
| 130 }, | |
| 131 }; | |
| 132 | |
| 133 void AddRegexpsAndCompile(const char* regexps[], | |
| 134 int n, | |
| 135 struct FilterTestVars* v) { | |
| 136 for (int i = 0; i < n; i++) { | |
| 137 int id; | |
| 138 v->f.Add(regexps[i], v->opts, &id); | |
| 139 } | |
| 140 v->f.Compile(&v->atoms); | |
| 141 } | |
| 142 | |
| 143 bool CheckExpectedAtoms(const char* atoms[], | |
| 144 int n, | |
| 145 const char* testname, | |
| 146 struct FilterTestVars* v) { | |
| 147 vector<string> expected; | |
| 148 for (int i = 0; i < n; i++) | |
| 149 expected.push_back(atoms[i]); | |
| 150 | |
| 151 bool pass = expected.size() == v->atoms.size(); | |
| 152 | |
| 153 sort(v->atoms.begin(), v->atoms.end()); | |
| 154 sort(expected.begin(), expected.end()); | |
| 155 for (int i = 0; pass && i < n; i++) | |
| 156 pass = pass && expected[i] == v->atoms[i]; | |
| 157 | |
| 158 if (!pass) { | |
| 159 LOG(WARNING) << "Failed " << testname; | |
| 160 LOG(WARNING) << "Expected #atoms = " << expected.size(); | |
| 161 for (size_t i = 0; i < expected.size(); i++) | |
| 162 LOG(WARNING) << expected[i]; | |
| 163 LOG(WARNING) << "Found #atoms = " << v->atoms.size(); | |
| 164 for (size_t i = 0; i < v->atoms.size(); i++) | |
| 165 LOG(WARNING) << v->atoms[i]; | |
| 166 } | |
| 167 | |
| 168 return pass; | |
| 169 } | |
| 170 | |
| 171 TEST(FilteredRE2Test, AtomTests) { | |
| 172 FLAGS_filtered_re2_min_atom_len = 3; | |
| 173 | |
| 174 int nfail = 0; | |
| 175 for (int i = 0; i < arraysize(atom_tests); i++) { | |
| 176 FilterTestVars v; | |
| 177 AtomTest* t = &atom_tests[i]; | |
| 178 int natom, nregexp; | |
| 179 for (nregexp = 0; nregexp < arraysize(t->regexps); nregexp++) | |
| 180 if (t->regexps[nregexp] == NULL) | |
| 181 break; | |
| 182 for (natom = 0; natom < arraysize(t->atoms); natom++) | |
| 183 if (t->atoms[natom] == NULL) | |
| 184 break; | |
| 185 AddRegexpsAndCompile(t->regexps, nregexp, &v); | |
| 186 if (!CheckExpectedAtoms(t->atoms, natom, t->testname, &v)) | |
| 187 nfail++; | |
| 188 } | |
| 189 EXPECT_EQ(0, nfail); | |
| 190 } | |
| 191 | |
| 192 void FindAtomIndices(const vector<string>& atoms, | |
| 193 const vector<string>& matched_atoms, | |
| 194 vector<int>* atom_indices) { | |
| 195 atom_indices->clear(); | |
| 196 for (size_t i = 0; i < matched_atoms.size(); i++) { | |
| 197 for (size_t j = 0; j < atoms.size(); j++) { | |
| 198 if (matched_atoms[i] == atoms[j]) { | |
| 199 atom_indices->push_back(static_cast<int>(j)); | |
| 200 break; | |
| 201 } | |
| 202 } | |
| 203 } | |
| 204 } | |
| 205 | |
| 206 TEST(FilteredRE2Test, MatchEmptyPattern) { | |
| 207 FLAGS_filtered_re2_min_atom_len = 3; | |
| 208 | |
| 209 FilterTestVars v; | |
| 210 AtomTest* t = &atom_tests[0]; | |
| 211 // We are using the regexps used in one of the atom tests | |
| 212 // for this test. Adding the EXPECT here to make sure | |
| 213 // the index we use for the test is for the correct test. | |
| 214 EXPECT_EQ("CheckEmptyPattern", string(t->testname)); | |
| 215 int nregexp; | |
| 216 for (nregexp = 0; nregexp < arraysize(t->regexps); nregexp++) | |
| 217 if (t->regexps[nregexp] == NULL) | |
| 218 break; | |
| 219 AddRegexpsAndCompile(t->regexps, nregexp, &v); | |
| 220 string text = "0123"; | |
| 221 vector<int> atom_ids; | |
| 222 vector<int> matching_regexps; | |
| 223 EXPECT_EQ(0, v.f.FirstMatch(text, atom_ids)); | |
| 224 } | |
| 225 | |
| 226 TEST(FilteredRE2Test, MatchTests) { | |
| 227 FLAGS_filtered_re2_min_atom_len = 3; | |
| 228 | |
| 229 FilterTestVars v; | |
| 230 AtomTest* t = &atom_tests[2]; | |
| 231 // We are using the regexps used in one of the atom tests | |
| 232 // for this test. | |
| 233 EXPECT_EQ("SubstrAtomRemovesSuperStrInOr", string(t->testname)); | |
| 234 int nregexp; | |
| 235 for (nregexp = 0; nregexp < arraysize(t->regexps); nregexp++) | |
| 236 if (t->regexps[nregexp] == NULL) | |
| 237 break; | |
| 238 AddRegexpsAndCompile(t->regexps, nregexp, &v); | |
| 239 | |
| 240 string text = "abc121212xyz"; | |
| 241 // atoms = abc | |
| 242 vector<int> atom_ids; | |
| 243 vector<string> atoms; | |
| 244 atoms.push_back("abc"); | |
| 245 FindAtomIndices(v.atoms, atoms, &atom_ids); | |
| 246 vector<int> matching_regexps; | |
| 247 v.f.AllMatches(text, atom_ids, &matching_regexps); | |
| 248 EXPECT_EQ(1, matching_regexps.size()); | |
| 249 | |
| 250 text = "abc12312yyyzzz"; | |
| 251 atoms.clear(); | |
| 252 atoms.push_back("abc"); | |
| 253 atoms.push_back("yyy"); | |
| 254 atoms.push_back("yyyzzz"); | |
| 255 FindAtomIndices(v.atoms, atoms, &atom_ids); | |
| 256 v.f.AllMatches(text, atom_ids, &matching_regexps); | |
| 257 EXPECT_EQ(1, matching_regexps.size()); | |
| 258 | |
| 259 text = "abcd12yyy32yyyzzz"; | |
| 260 atoms.clear(); | |
| 261 atoms.push_back("abc"); | |
| 262 atoms.push_back("abcd"); | |
| 263 atoms.push_back("yyy"); | |
| 264 atoms.push_back("yyyzzz"); | |
| 265 FindAtomIndices(v.atoms, atoms, &atom_ids); | |
| 266 LOG(INFO) << "S: " << atom_ids.size(); | |
| 267 for (size_t i = 0; i < atom_ids.size(); i++) | |
| 268 LOG(INFO) << "i: " << i << " : " << atom_ids[i]; | |
| 269 v.f.AllMatches(text, atom_ids, &matching_regexps); | |
| 270 EXPECT_EQ(2, matching_regexps.size()); | |
| 271 } | |
| 272 | |
| 273 } // namespace re2 | |
| OLD | NEW |