OLD | NEW |
| (Empty) |
1 // Copyright 2009 The RE2 Authors. All Rights Reserved. | |
2 // Use of this source code is governed by a BSD-style | |
3 // license that can be found in the LICENSE file. | |
4 | |
5 #include "util/test.h" | |
6 #include "re2/filtered_re2.h" | |
7 #include "re2/re2.h" | |
8 | |
9 DECLARE_int32(filtered_re2_min_atom_len); // From prefilter_tree.cc | |
10 | |
11 namespace re2 { | |
12 | |
13 struct FilterTestVars { | |
14 vector<string> atoms; | |
15 vector<int> atom_indices; | |
16 vector<int> matches; | |
17 RE2::Options opts; | |
18 FilteredRE2 f; | |
19 }; | |
20 | |
21 TEST(FilteredRE2Test, EmptyTest) { | |
22 FilterTestVars v; | |
23 v.f.AllMatches("foo", v.atom_indices, &v.matches); | |
24 EXPECT_EQ(0, v.matches.size()); | |
25 } | |
26 | |
27 TEST(FilteredRE2Test, SmallOrTest) { | |
28 FLAGS_filtered_re2_min_atom_len = 4; | |
29 | |
30 FilterTestVars v; | |
31 int id; | |
32 v.f.Add("(foo|bar)", v.opts, &id); | |
33 | |
34 v.f.Compile(&v.atoms); | |
35 EXPECT_EQ(0, v.atoms.size()); | |
36 | |
37 v.f.AllMatches("lemurs bar", v.atom_indices, &v.matches); | |
38 EXPECT_EQ(1, v.matches.size()); | |
39 EXPECT_EQ(id, v.matches[0]); | |
40 } | |
41 | |
42 TEST(FilteredRE2Test, SmallLatinTest) { | |
43 FLAGS_filtered_re2_min_atom_len = 3; | |
44 FilterTestVars v; | |
45 int id; | |
46 | |
47 v.opts.set_encoding(RE2::Options::EncodingLatin1); | |
48 v.f.Add("\xde\xadQ\xbe\xef", v.opts, &id); | |
49 v.f.Compile(&v.atoms); | |
50 EXPECT_EQ(1, v.atoms.size()); | |
51 EXPECT_EQ(v.atoms[0], "\xde\xadq\xbe\xef"); | |
52 | |
53 v.atom_indices.push_back(0); | |
54 v.f.AllMatches("foo\xde\xadQ\xbe\xeflemur", v.atom_indices, &v.matches); | |
55 EXPECT_EQ(1, v.matches.size()); | |
56 EXPECT_EQ(id, v.matches[0]); | |
57 } | |
58 | |
59 struct AtomTest { | |
60 const char* testname; | |
61 // If any test needs more than this many regexps or atoms, increase | |
62 // the size of the corresponding array. | |
63 const char* regexps[20]; | |
64 const char* atoms[20]; | |
65 }; | |
66 | |
67 AtomTest atom_tests[] = { | |
68 { | |
69 // This test checks to make sure empty patterns are allowed. | |
70 "CheckEmptyPattern", | |
71 {""}, | |
72 {} | |
73 }, { | |
74 // This test checks that all atoms of length greater than min length | |
75 // are found, and no atoms that are of smaller length are found. | |
76 "AllAtomsGtMinLengthFound", { | |
77 "(abc123|def456|ghi789).*mnop[x-z]+", | |
78 "abc..yyy..zz", | |
79 "mnmnpp[a-z]+PPP" | |
80 }, { | |
81 "abc123", | |
82 "def456", | |
83 "ghi789", | |
84 "mnop", | |
85 "abc", | |
86 "yyy", | |
87 "mnmnpp", | |
88 "ppp" | |
89 } | |
90 }, { | |
91 // Test to make sure that any atoms that have another atom as a | |
92 // substring in an OR are removed; that is, only the shortest | |
93 // substring is kept. | |
94 "SubstrAtomRemovesSuperStrInOr", { | |
95 "(abc123|abc|ghi789|abc1234).*[x-z]+", | |
96 "abcd..yyy..yyyzzz", | |
97 "mnmnpp[a-z]+PPP" | |
98 }, { | |
99 "abc", | |
100 "ghi789", | |
101 "abcd", | |
102 "yyy", | |
103 "yyyzzz", | |
104 "mnmnpp", | |
105 "ppp" | |
106 } | |
107 }, { | |
108 // Test character class expansion. | |
109 "CharClassExpansion", { | |
110 "m[a-c][d-f]n.*[x-z]+", | |
111 "[x-y]bcde[ab]" | |
112 }, { | |
113 "madn", "maen", "mafn", | |
114 "mbdn", "mben", "mbfn", | |
115 "mcdn", "mcen", "mcfn", | |
116 "xbcdea", "xbcdeb", | |
117 "ybcdea", "ybcdeb" | |
118 } | |
119 }, { | |
120 // Test upper/lower of non-ASCII. | |
121 "UnicodeLower", { | |
122 "(?i)ΔδΠϖπΣςσ", | |
123 "ΛΜΝΟΠ", | |
124 "ψρστυ", | |
125 }, { | |
126 "δδπππσσσ", | |
127 "λμνοπ", | |
128 "ψρστυ", | |
129 }, | |
130 }, | |
131 }; | |
132 | |
133 void AddRegexpsAndCompile(const char* regexps[], | |
134 int n, | |
135 struct FilterTestVars* v) { | |
136 for (int i = 0; i < n; i++) { | |
137 int id; | |
138 v->f.Add(regexps[i], v->opts, &id); | |
139 } | |
140 v->f.Compile(&v->atoms); | |
141 } | |
142 | |
143 bool CheckExpectedAtoms(const char* atoms[], | |
144 int n, | |
145 const char* testname, | |
146 struct FilterTestVars* v) { | |
147 vector<string> expected; | |
148 for (int i = 0; i < n; i++) | |
149 expected.push_back(atoms[i]); | |
150 | |
151 bool pass = expected.size() == v->atoms.size(); | |
152 | |
153 sort(v->atoms.begin(), v->atoms.end()); | |
154 sort(expected.begin(), expected.end()); | |
155 for (int i = 0; pass && i < n; i++) | |
156 pass = pass && expected[i] == v->atoms[i]; | |
157 | |
158 if (!pass) { | |
159 LOG(WARNING) << "Failed " << testname; | |
160 LOG(WARNING) << "Expected #atoms = " << expected.size(); | |
161 for (size_t i = 0; i < expected.size(); i++) | |
162 LOG(WARNING) << expected[i]; | |
163 LOG(WARNING) << "Found #atoms = " << v->atoms.size(); | |
164 for (size_t i = 0; i < v->atoms.size(); i++) | |
165 LOG(WARNING) << v->atoms[i]; | |
166 } | |
167 | |
168 return pass; | |
169 } | |
170 | |
171 TEST(FilteredRE2Test, AtomTests) { | |
172 FLAGS_filtered_re2_min_atom_len = 3; | |
173 | |
174 int nfail = 0; | |
175 for (int i = 0; i < arraysize(atom_tests); i++) { | |
176 FilterTestVars v; | |
177 AtomTest* t = &atom_tests[i]; | |
178 int natom, nregexp; | |
179 for (nregexp = 0; nregexp < arraysize(t->regexps); nregexp++) | |
180 if (t->regexps[nregexp] == NULL) | |
181 break; | |
182 for (natom = 0; natom < arraysize(t->atoms); natom++) | |
183 if (t->atoms[natom] == NULL) | |
184 break; | |
185 AddRegexpsAndCompile(t->regexps, nregexp, &v); | |
186 if (!CheckExpectedAtoms(t->atoms, natom, t->testname, &v)) | |
187 nfail++; | |
188 } | |
189 EXPECT_EQ(0, nfail); | |
190 } | |
191 | |
192 void FindAtomIndices(const vector<string>& atoms, | |
193 const vector<string>& matched_atoms, | |
194 vector<int>* atom_indices) { | |
195 atom_indices->clear(); | |
196 for (size_t i = 0; i < matched_atoms.size(); i++) { | |
197 for (size_t j = 0; j < atoms.size(); j++) { | |
198 if (matched_atoms[i] == atoms[j]) { | |
199 atom_indices->push_back(static_cast<int>(j)); | |
200 break; | |
201 } | |
202 } | |
203 } | |
204 } | |
205 | |
206 TEST(FilteredRE2Test, MatchEmptyPattern) { | |
207 FLAGS_filtered_re2_min_atom_len = 3; | |
208 | |
209 FilterTestVars v; | |
210 AtomTest* t = &atom_tests[0]; | |
211 // We are using the regexps used in one of the atom tests | |
212 // for this test. Adding the EXPECT here to make sure | |
213 // the index we use for the test is for the correct test. | |
214 EXPECT_EQ("CheckEmptyPattern", string(t->testname)); | |
215 int nregexp; | |
216 for (nregexp = 0; nregexp < arraysize(t->regexps); nregexp++) | |
217 if (t->regexps[nregexp] == NULL) | |
218 break; | |
219 AddRegexpsAndCompile(t->regexps, nregexp, &v); | |
220 string text = "0123"; | |
221 vector<int> atom_ids; | |
222 vector<int> matching_regexps; | |
223 EXPECT_EQ(0, v.f.FirstMatch(text, atom_ids)); | |
224 } | |
225 | |
226 TEST(FilteredRE2Test, MatchTests) { | |
227 FLAGS_filtered_re2_min_atom_len = 3; | |
228 | |
229 FilterTestVars v; | |
230 AtomTest* t = &atom_tests[2]; | |
231 // We are using the regexps used in one of the atom tests | |
232 // for this test. | |
233 EXPECT_EQ("SubstrAtomRemovesSuperStrInOr", string(t->testname)); | |
234 int nregexp; | |
235 for (nregexp = 0; nregexp < arraysize(t->regexps); nregexp++) | |
236 if (t->regexps[nregexp] == NULL) | |
237 break; | |
238 AddRegexpsAndCompile(t->regexps, nregexp, &v); | |
239 | |
240 string text = "abc121212xyz"; | |
241 // atoms = abc | |
242 vector<int> atom_ids; | |
243 vector<string> atoms; | |
244 atoms.push_back("abc"); | |
245 FindAtomIndices(v.atoms, atoms, &atom_ids); | |
246 vector<int> matching_regexps; | |
247 v.f.AllMatches(text, atom_ids, &matching_regexps); | |
248 EXPECT_EQ(1, matching_regexps.size()); | |
249 | |
250 text = "abc12312yyyzzz"; | |
251 atoms.clear(); | |
252 atoms.push_back("abc"); | |
253 atoms.push_back("yyy"); | |
254 atoms.push_back("yyyzzz"); | |
255 FindAtomIndices(v.atoms, atoms, &atom_ids); | |
256 v.f.AllMatches(text, atom_ids, &matching_regexps); | |
257 EXPECT_EQ(1, matching_regexps.size()); | |
258 | |
259 text = "abcd12yyy32yyyzzz"; | |
260 atoms.clear(); | |
261 atoms.push_back("abc"); | |
262 atoms.push_back("abcd"); | |
263 atoms.push_back("yyy"); | |
264 atoms.push_back("yyyzzz"); | |
265 FindAtomIndices(v.atoms, atoms, &atom_ids); | |
266 LOG(INFO) << "S: " << atom_ids.size(); | |
267 for (size_t i = 0; i < atom_ids.size(); i++) | |
268 LOG(INFO) << "i: " << i << " : " << atom_ids[i]; | |
269 v.f.AllMatches(text, atom_ids, &matching_regexps); | |
270 EXPECT_EQ(2, matching_regexps.size()); | |
271 } | |
272 | |
273 } // namespace re2 | |
OLD | NEW |