OLD | NEW |
| (Empty) |
1 // Copyright 2006 The RE2 Authors. All Rights Reserved. | |
2 // Use of this source code is governed by a BSD-style | |
3 // license that can be found in the LICENSE file. | |
4 | |
5 // Test parse.cc, dump.cc, and tostring.cc. | |
6 | |
7 #include <string> | |
8 #include <vector> | |
9 #include "util/test.h" | |
10 #include "re2/regexp.h" | |
11 | |
12 namespace re2 { | |
13 | |
14 static const Regexp::ParseFlags TestZeroFlags = Regexp::ParseFlags(1<<30); | |
15 | |
16 struct Test { | |
17 const char* regexp; | |
18 const char* parse; | |
19 Regexp::ParseFlags flags; | |
20 }; | |
21 | |
22 static Regexp::ParseFlags kTestFlags = Regexp::MatchNL | | |
23 Regexp::PerlX | | |
24 Regexp::PerlClasses | | |
25 Regexp::UnicodeGroups; | |
26 | |
27 static Test tests[] = { | |
28 // Base cases | |
29 { "a", "lit{a}" }, | |
30 { "a.", "cat{lit{a}dot{}}" }, | |
31 { "a.b", "cat{lit{a}dot{}lit{b}}" }, | |
32 { "ab", "str{ab}" }, | |
33 { "a.b.c", "cat{lit{a}dot{}lit{b}dot{}lit{c}}" }, | |
34 { "abc", "str{abc}" }, | |
35 { "a|^", "alt{lit{a}bol{}}" }, | |
36 { "a|b", "cc{0x61-0x62}" }, | |
37 { "(a)", "cap{lit{a}}" }, | |
38 { "(a)|b", "alt{cap{lit{a}}lit{b}}" }, | |
39 { "a*", "star{lit{a}}" }, | |
40 { "a+", "plus{lit{a}}" }, | |
41 { "a?", "que{lit{a}}" }, | |
42 { "a{2}", "rep{2,2 lit{a}}" }, | |
43 { "a{2,3}", "rep{2,3 lit{a}}" }, | |
44 { "a{2,}", "rep{2,-1 lit{a}}" }, | |
45 { "a*?", "nstar{lit{a}}" }, | |
46 { "a+?", "nplus{lit{a}}" }, | |
47 { "a??", "nque{lit{a}}" }, | |
48 { "a{2}?", "nrep{2,2 lit{a}}" }, | |
49 { "a{2,3}?", "nrep{2,3 lit{a}}" }, | |
50 { "a{2,}?", "nrep{2,-1 lit{a}}" }, | |
51 { "", "emp{}" }, | |
52 { "|", "emp{}" }, // alt{emp{}emp{}} but got factored | |
53 { "|x|", "alt{emp{}lit{x}emp{}}" }, | |
54 { ".", "dot{}" }, | |
55 { "^", "bol{}" }, | |
56 { "$", "eol{}" }, | |
57 { "\\|", "lit{|}" }, | |
58 { "\\(", "lit{(}" }, | |
59 { "\\)", "lit{)}" }, | |
60 { "\\*", "lit{*}" }, | |
61 { "\\+", "lit{+}" }, | |
62 { "\\?", "lit{?}" }, | |
63 { "{", "lit{{}" }, | |
64 { "}", "lit{}}" }, | |
65 { "\\.", "lit{.}" }, | |
66 { "\\^", "lit{^}" }, | |
67 { "\\$", "lit{$}" }, | |
68 { "\\\\", "lit{\\}" }, | |
69 { "[ace]", "cc{0x61 0x63 0x65}" }, | |
70 { "[abc]", "cc{0x61-0x63}" }, | |
71 { "[a-z]", "cc{0x61-0x7a}" }, | |
72 { "[a]", "lit{a}" }, | |
73 { "\\-", "lit{-}" }, | |
74 { "-", "lit{-}" }, | |
75 { "\\_", "lit{_}" }, | |
76 | |
77 // Posix and Perl extensions | |
78 { "[[:lower:]]", "cc{0x61-0x7a}" }, | |
79 { "[a-z]", "cc{0x61-0x7a}" }, | |
80 { "[^[:lower:]]", "cc{0-0x60 0x7b-0x10ffff}" }, | |
81 { "[[:^lower:]]", "cc{0-0x60 0x7b-0x10ffff}" }, | |
82 { "(?i)[[:lower:]]", "cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}" }, | |
83 { "(?i)[a-z]", "cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}" }, | |
84 { "(?i)[^[:lower:]]", "cc{0-0x40 0x5b-0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10
ffff}" }, | |
85 { "(?i)[[:^lower:]]", "cc{0-0x40 0x5b-0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10
ffff}" }, | |
86 { "\\d", "cc{0x30-0x39}" }, | |
87 { "\\D", "cc{0-0x2f 0x3a-0x10ffff}" }, | |
88 { "\\s", "cc{0x9-0xa 0xc-0xd 0x20}" }, | |
89 { "\\S", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}" }, | |
90 { "\\w", "cc{0x30-0x39 0x41-0x5a 0x5f 0x61-0x7a}" }, | |
91 { "\\W", "cc{0-0x2f 0x3a-0x40 0x5b-0x5e 0x60 0x7b-0x10ffff}" }, | |
92 { "(?i)\\w", "cc{0x30-0x39 0x41-0x5a 0x5f 0x61-0x7a 0x17f 0x212a}" }, | |
93 { "(?i)\\W", "cc{0-0x2f 0x3a-0x40 0x5b-0x5e 0x60 0x7b-0x17e 0x180-0x2129 0x212
b-0x10ffff}" }, | |
94 { "[^\\\\]", "cc{0-0x5b 0x5d-0x10ffff}" }, | |
95 { "\\C", "byte{}" }, | |
96 | |
97 // Unicode, negatives, and a double negative. | |
98 { "\\p{Braille}", "cc{0x2800-0x28ff}" }, | |
99 { "\\P{Braille}", "cc{0-0x27ff 0x2900-0x10ffff}" }, | |
100 { "\\p{^Braille}", "cc{0-0x27ff 0x2900-0x10ffff}" }, | |
101 { "\\P{^Braille}", "cc{0x2800-0x28ff}" }, | |
102 | |
103 // More interesting regular expressions. | |
104 { "a{,2}", "str{a{,2}}" }, | |
105 { "\\.\\^\\$\\\\", "str{.^$\\}" }, | |
106 { "[a-zABC]", "cc{0x41-0x43 0x61-0x7a}" }, | |
107 { "[^a]", "cc{0-0x60 0x62-0x10ffff}" }, | |
108 { "[\xce\xb1-\xce\xb5\xe2\x98\xba]", "cc{0x3b1-0x3b5 0x263a}" }, // utf-8 | |
109 { "a*{", "cat{star{lit{a}}lit{{}}" }, | |
110 | |
111 // Test precedences | |
112 { "(?:ab)*", "star{str{ab}}" }, | |
113 { "(ab)*", "star{cap{str{ab}}}" }, | |
114 { "ab|cd", "alt{str{ab}str{cd}}" }, | |
115 { "a(b|c)d", "cat{lit{a}cap{cc{0x62-0x63}}lit{d}}" }, | |
116 | |
117 // Test flattening. | |
118 { "(?:a)", "lit{a}" }, | |
119 { "(?:ab)(?:cd)", "str{abcd}" }, | |
120 { "(?:a|b)|(?:c|d)", "cc{0x61-0x64}" }, | |
121 { "a|c", "cc{0x61 0x63}" }, | |
122 { "a|[cd]", "cc{0x61 0x63-0x64}" }, | |
123 { "a|.", "dot{}" }, | |
124 { "[ab]|c", "cc{0x61-0x63}" }, | |
125 { "[ab]|[cd]", "cc{0x61-0x64}" }, | |
126 { "[ab]|.", "dot{}" }, | |
127 { ".|c", "dot{}" }, | |
128 { ".|[cd]", "dot{}" }, | |
129 { ".|.", "dot{}" }, | |
130 | |
131 // Test Perl quoted literals | |
132 { "\\Q+|*?{[\\E", "str{+|*?{[}" }, | |
133 { "\\Q+\\E+", "plus{lit{+}}" }, | |
134 { "\\Q\\\\E", "lit{\\}" }, | |
135 { "\\Q\\\\\\E", "str{\\\\}" }, | |
136 { "\\Qa\\E*", "star{lit{a}}" }, | |
137 { "\\Qab\\E*", "cat{lit{a}star{lit{b}}}" }, | |
138 { "\\Qabc\\E*", "cat{str{ab}star{lit{c}}}" }, | |
139 | |
140 // Test Perl \A and \z | |
141 { "(?m)^", "bol{}" }, | |
142 { "(?m)$", "eol{}" }, | |
143 { "(?-m)^", "bot{}" }, | |
144 { "(?-m)$", "eot{}" }, | |
145 { "(?m)\\A", "bot{}" }, | |
146 { "(?m)\\z", "eot{\\z}" }, | |
147 { "(?-m)\\A", "bot{}" }, | |
148 { "(?-m)\\z", "eot{\\z}" }, | |
149 | |
150 // Test named captures | |
151 { "(?P<name>a)", "cap{name:lit{a}}" }, | |
152 | |
153 // Case-folded literals | |
154 { "[Aa]", "litfold{a}" }, | |
155 | |
156 // Strings | |
157 { "abcde", "str{abcde}" }, | |
158 { "[Aa][Bb]cd", "cat{strfold{ab}str{cd}}" }, | |
159 | |
160 // Reported bug involving \n leaking in despite use of NeverNL. | |
161 { "[^ ]", "cc{0-0x9 0xb-0x1f 0x21-0x10ffff}", TestZeroFlags }, | |
162 { "[^ ]", "cc{0-0x9 0xb-0x1f 0x21-0x10ffff}", Regexp::FoldCase }, | |
163 { "[^ ]", "cc{0-0x9 0xb-0x1f 0x21-0x10ffff}", Regexp::NeverNL }, | |
164 { "[^ ]", "cc{0-0x9 0xb-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCa
se }, | |
165 { "[^ \f]", "cc{0-0x9 0xb 0xd-0x1f 0x21-0x10ffff}", TestZeroFlags }, | |
166 { "[^ \f]", "cc{0-0x9 0xb 0xd-0x1f 0x21-0x10ffff}", Regexp::FoldCase }, | |
167 { "[^ \f]", "cc{0-0x9 0xb 0xd-0x1f 0x21-0x10ffff}", Regexp::NeverNL }, | |
168 { "[^ \f]", "cc{0-0x9 0xb 0xd-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::
FoldCase }, | |
169 { "[^ \r]", "cc{0-0x9 0xb-0xc 0xe-0x1f 0x21-0x10ffff}", TestZeroFlags }, | |
170 { "[^ \r]", "cc{0-0x9 0xb-0xc 0xe-0x1f 0x21-0x10ffff}", Regexp::FoldCase }, | |
171 { "[^ \r]", "cc{0-0x9 0xb-0xc 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL }, | |
172 { "[^ \r]", "cc{0-0x9 0xb-0xc 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Rege
xp::FoldCase }, | |
173 { "[^ \v]", "cc{0-0x9 0xc-0x1f 0x21-0x10ffff}", TestZeroFlags }, | |
174 { "[^ \v]", "cc{0-0x9 0xc-0x1f 0x21-0x10ffff}", Regexp::FoldCase }, | |
175 { "[^ \v]", "cc{0-0x9 0xc-0x1f 0x21-0x10ffff}", Regexp::NeverNL }, | |
176 { "[^ \v]", "cc{0-0x9 0xc-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::Fold
Case }, | |
177 { "[^ \t]", "cc{0-0x8 0xb-0x1f 0x21-0x10ffff}", TestZeroFlags }, | |
178 { "[^ \t]", "cc{0-0x8 0xb-0x1f 0x21-0x10ffff}", Regexp::FoldCase }, | |
179 { "[^ \t]", "cc{0-0x8 0xb-0x1f 0x21-0x10ffff}", Regexp::NeverNL }, | |
180 { "[^ \t]", "cc{0-0x8 0xb-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::Fold
Case }, | |
181 { "[^ \r\f\v]", "cc{0-0x9 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL }, | |
182 { "[^ \r\f\v]", "cc{0-0x9 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::
FoldCase }, | |
183 { "[^ \r\f\t\v]", "cc{0-0x8 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL }, | |
184 { "[^ \r\f\t\v]", "cc{0-0x8 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp
::FoldCase }, | |
185 { "[^ \r\n\f\t\v]", "cc{0-0x8 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL }, | |
186 { "[^ \r\n\f\t\v]", "cc{0-0x8 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Rege
xp::FoldCase }, | |
187 { "[^ \r\n\f\t]", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL }, | |
188 { "[^ \r\n\f\t]", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Re
gexp::FoldCase }, | |
189 { "[^\t-\n\f-\r ]", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}", | |
190 Regexp::PerlClasses }, | |
191 { "[^\t-\n\f-\r ]", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}", | |
192 Regexp::PerlClasses | Regexp::FoldCase }, | |
193 { "[^\t-\n\f-\r ]", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}", | |
194 Regexp::PerlClasses | Regexp::NeverNL }, | |
195 { "[^\t-\n\f-\r ]", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}", | |
196 Regexp::PerlClasses | Regexp::NeverNL | Regexp::FoldCase }, | |
197 { "\\S", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}", | |
198 Regexp::PerlClasses }, | |
199 { "\\S", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}", | |
200 Regexp::PerlClasses | Regexp::FoldCase }, | |
201 { "\\S", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}", | |
202 Regexp::PerlClasses | Regexp::NeverNL }, | |
203 { "\\S", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}", | |
204 Regexp::PerlClasses | Regexp::NeverNL | Regexp::FoldCase }, | |
205 }; | |
206 | |
207 bool RegexpEqualTestingOnly(Regexp* a, Regexp* b) { | |
208 return Regexp::Equal(a, b); | |
209 } | |
210 | |
211 void TestParse(const Test* tests, int ntests, Regexp::ParseFlags flags, | |
212 const string& title) { | |
213 Regexp** re = new Regexp*[ntests]; | |
214 for (int i = 0; i < ntests; i++) { | |
215 RegexpStatus status; | |
216 Regexp::ParseFlags f = flags; | |
217 if (tests[i].flags != 0) { | |
218 f = tests[i].flags & ~TestZeroFlags; | |
219 } | |
220 re[i] = Regexp::Parse(tests[i].regexp, f, &status); | |
221 CHECK(re[i] != NULL) << " " << tests[i].regexp << " " | |
222 << status.Text(); | |
223 string s = re[i]->Dump(); | |
224 EXPECT_EQ(string(tests[i].parse), s) << "Regexp: " << tests[i].regexp | |
225 << "\nparse: " << string(tests[i].parse) << " s: " << s << " flag=" << f; | |
226 } | |
227 | |
228 for (int i = 0; i < ntests; i++) { | |
229 for (int j = 0; j < ntests; j++) { | |
230 EXPECT_EQ(string(tests[i].parse) == string(tests[j].parse), | |
231 RegexpEqualTestingOnly(re[i], re[j])) | |
232 << "Regexp: " << tests[i].regexp << " " << tests[j].regexp; | |
233 } | |
234 } | |
235 | |
236 for (int i = 0; i < ntests; i++) | |
237 re[i]->Decref(); | |
238 delete[] re; | |
239 } | |
240 | |
241 // Test that regexps parse to expected structures. | |
242 TEST(TestParse, SimpleRegexps) { | |
243 TestParse(tests, arraysize(tests), kTestFlags, "simple"); | |
244 } | |
245 | |
246 Test foldcase_tests[] = { | |
247 { "AbCdE", "strfold{abcde}" }, | |
248 { "[Aa]", "litfold{a}" }, | |
249 { "a", "litfold{a}" }, | |
250 | |
251 // 0x17F is an old English long s (looks like an f) and folds to s. | |
252 // 0x212A is the Kelvin symbol and folds to k. | |
253 { "A[F-g]", "cat{litfold{a}cc{0x41-0x7a 0x17f 0x212a}}" }, // [Aa][A-z...] | |
254 { "[[:upper:]]", "cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}" }, | |
255 { "[[:lower:]]", "cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}" }, | |
256 }; | |
257 | |
258 // Test that parsing with FoldCase works. | |
259 TEST(TestParse, FoldCase) { | |
260 TestParse(foldcase_tests, arraysize(foldcase_tests), Regexp::FoldCase, "foldca
se"); | |
261 } | |
262 | |
263 Test literal_tests[] = { | |
264 { "(|)^$.[*+?]{5,10},\\", "str{(|)^$.[*+?]{5,10},\\}" }, | |
265 }; | |
266 | |
267 // Test that parsing with Literal works. | |
268 TEST(TestParse, Literal) { | |
269 TestParse(literal_tests, arraysize(literal_tests), Regexp::Literal, "literal")
; | |
270 } | |
271 | |
272 Test matchnl_tests[] = { | |
273 { ".", "dot{}" }, | |
274 { "\n", "lit{\n}" }, | |
275 { "[^a]", "cc{0-0x60 0x62-0x10ffff}" }, | |
276 { "[a\\n]", "cc{0xa 0x61}" }, | |
277 }; | |
278 | |
279 // Test that parsing with MatchNL works. | |
280 // (Also tested above during simple cases.) | |
281 TEST(TestParse, MatchNL) { | |
282 TestParse(matchnl_tests, arraysize(matchnl_tests), Regexp::MatchNL, "with Matc
hNL"); | |
283 } | |
284 | |
285 Test nomatchnl_tests[] = { | |
286 { ".", "cc{0-0x9 0xb-0x10ffff}" }, | |
287 { "\n", "lit{\n}" }, | |
288 { "[^a]", "cc{0-0x9 0xb-0x60 0x62-0x10ffff}" }, | |
289 { "[a\\n]", "cc{0xa 0x61}" }, | |
290 }; | |
291 | |
292 // Test that parsing without MatchNL works. | |
293 TEST(TestParse, NoMatchNL) { | |
294 TestParse(nomatchnl_tests, arraysize(nomatchnl_tests), Regexp::NoParseFlags, "
without MatchNL"); | |
295 } | |
296 | |
297 Test prefix_tests[] = { | |
298 { "abc|abd", "cat{str{ab}cc{0x63-0x64}}" }, | |
299 { "a(?:b)c|abd", "cat{str{ab}cc{0x63-0x64}}" }, | |
300 { "abc|abd|aef|bcx|bcy", | |
301 "alt{cat{lit{a}alt{cat{lit{b}cc{0x63-0x64}}str{ef}}}" | |
302 "cat{str{bc}cc{0x78-0x79}}}" }, | |
303 { "abc|x|abd", "alt{str{abc}lit{x}str{abd}}" }, | |
304 { "(?i)abc|ABD", "cat{strfold{ab}cc{0x43-0x44 0x63-0x64}}" }, | |
305 { "[ab]c|[ab]d", "cat{cc{0x61-0x62}cc{0x63-0x64}}" }, | |
306 { "(?:xx|yy)c|(?:xx|yy)d", | |
307 "cat{alt{str{xx}str{yy}}cc{0x63-0x64}}" }, | |
308 { "x{2}|x{2}[0-9]", | |
309 "cat{rep{2,2 lit{x}}alt{emp{}cc{0x30-0x39}}}" }, | |
310 { "x{2}y|x{2}[0-9]y", | |
311 "cat{rep{2,2 lit{x}}alt{lit{y}cat{cc{0x30-0x39}lit{y}}}}" }, | |
312 { "n|r|rs", | |
313 "alt{lit{n}cat{lit{r}alt{emp{}lit{s}}}}" }, | |
314 { "n|rs|r", | |
315 "alt{lit{n}cat{lit{r}alt{lit{s}emp{}}}}" }, | |
316 { "r|rs|n", | |
317 "alt{cat{lit{r}alt{emp{}lit{s}}}lit{n}}" }, | |
318 { "rs|r|n", | |
319 "alt{cat{lit{r}alt{lit{s}emp{}}}lit{n}}" }, | |
320 }; | |
321 | |
322 // Test that prefix factoring works. | |
323 TEST(TestParse, Prefix) { | |
324 TestParse(prefix_tests, arraysize(prefix_tests), Regexp::PerlX, "prefix"); | |
325 } | |
326 | |
327 Test nested_tests[] = { | |
328 { "((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}))", | |
329 "cap{cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2
cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 lit{x}}}}}}}}}}}}}}}}}}}}" }, | |
330 { "((((((((((x{1}){2}){2}){2}){2}){2}){2}){2}){2}){2})", | |
331 "cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap
{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{1,1 lit{x}}}}}}}}}}}}}}}}}}}}}" }, | |
332 { "((((((((((x{0}){2}){2}){2}){2}){2}){2}){2}){2}){2})", | |
333 "cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap
{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{0,0 lit{x}}}}}}}}}}}}}}}}}}}}}" }, | |
334 { "((((((x{2}){2}){2}){5}){5}){5})", | |
335 "cap{rep{5,5 cap{rep{5,5 cap{rep{5,5 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 lit
{x}}}}}}}}}}}}}" }, | |
336 }; | |
337 | |
338 // Test that nested repetition works. | |
339 TEST(TestParse, Nested) { | |
340 TestParse(nested_tests, arraysize(nested_tests), Regexp::PerlX, "nested"); | |
341 } | |
342 | |
343 // Invalid regular expressions | |
344 const char* badtests[] = { | |
345 "(", | |
346 ")", | |
347 "(a", | |
348 "(a|b|", | |
349 "(a|b", | |
350 "[a-z", | |
351 "([a-z)", | |
352 "x{1001}", | |
353 "\xff", // Invalid UTF-8 | |
354 "[\xff]", | |
355 "[\\\xff]", | |
356 "\\\xff", | |
357 "(?P<name>a", | |
358 "(?P<name>", | |
359 "(?P<name", | |
360 "(?P<x y>a)", | |
361 "(?P<>a)", | |
362 "[a-Z]", | |
363 "(?i)[a-Z]", | |
364 "a{100000}", | |
365 "a{100000,}", | |
366 "((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}){2})", | |
367 "(((x{7}){11}){13})", | |
368 "\\Q\\E*", | |
369 }; | |
370 | |
371 // Valid in Perl, bad in POSIX | |
372 const char* only_perl[] = { | |
373 "[a-b-c]", | |
374 "\\Qabc\\E", | |
375 "\\Q*+?{[\\E", | |
376 "\\Q\\\\E", | |
377 "\\Q\\\\\\E", | |
378 "\\Q\\\\\\\\E", | |
379 "\\Q\\\\\\\\\\E", | |
380 "(?:a)", | |
381 "(?P<name>a)", | |
382 }; | |
383 | |
384 // Valid in POSIX, bad in Perl. | |
385 const char* only_posix[] = { | |
386 "a++", | |
387 "a**", | |
388 "a?*", | |
389 "a+*", | |
390 "a{1}*", | |
391 }; | |
392 | |
393 // Test that parser rejects bad regexps. | |
394 TEST(TestParse, InvalidRegexps) { | |
395 for (int i = 0; i < arraysize(badtests); i++) { | |
396 CHECK(Regexp::Parse(badtests[i], Regexp::PerlX, NULL) == NULL) | |
397 << " " << badtests[i]; | |
398 CHECK(Regexp::Parse(badtests[i], Regexp::NoParseFlags, NULL) == NULL) | |
399 << " " << badtests[i]; | |
400 } | |
401 for (int i = 0; i < arraysize(only_posix); i++) { | |
402 CHECK(Regexp::Parse(only_posix[i], Regexp::PerlX, NULL) == NULL) | |
403 << " " << only_posix[i]; | |
404 Regexp* re = Regexp::Parse(only_posix[i], Regexp::NoParseFlags, NULL); | |
405 CHECK(re) << " " << only_posix[i]; | |
406 re->Decref(); | |
407 } | |
408 for (int i = 0; i < arraysize(only_perl); i++) { | |
409 CHECK(Regexp::Parse(only_perl[i], Regexp::NoParseFlags, NULL) == NULL) | |
410 << " " << only_perl[i]; | |
411 Regexp* re = Regexp::Parse(only_perl[i], Regexp::PerlX, NULL); | |
412 CHECK(re) << " " << only_perl[i]; | |
413 re->Decref(); | |
414 } | |
415 } | |
416 | |
417 // Test that ToString produces original regexp or equivalent one. | |
418 TEST(TestToString, EquivalentParse) { | |
419 for (int i = 0; i < arraysize(tests); i++) { | |
420 RegexpStatus status; | |
421 Regexp::ParseFlags f = kTestFlags; | |
422 if (tests[i].flags != 0) { | |
423 f = tests[i].flags & ~TestZeroFlags; | |
424 } | |
425 Regexp* re = Regexp::Parse(tests[i].regexp, f, &status); | |
426 CHECK(re != NULL) << " " << tests[i].regexp << " " << status.Text(); | |
427 string s = re->Dump(); | |
428 EXPECT_EQ(string(tests[i].parse), s) << " " << tests[i].regexp << " " << str
ing(tests[i].parse) << " " << s; | |
429 string t = re->ToString(); | |
430 if (t != tests[i].regexp) { | |
431 // If ToString didn't return the original regexp, | |
432 // it must have found one with fewer parens. | |
433 // Unfortunately we can't check the length here, because | |
434 // ToString produces "\\{" for a literal brace, | |
435 // but "{" is a shorter equivalent. | |
436 // CHECK_LT(t.size(), strlen(tests[i].regexp)) | |
437 // << " t=" << t << " regexp=" << tests[i].regexp; | |
438 | |
439 // Test that if we parse the new regexp we get the same structure. | |
440 Regexp* nre = Regexp::Parse(t, Regexp::MatchNL | Regexp::PerlX, &status); | |
441 CHECK(nre != NULL) << " reparse " << t << " " << status.Text(); | |
442 string ss = nre->Dump(); | |
443 string tt = nre->ToString(); | |
444 if (s != ss || t != tt) | |
445 LOG(INFO) << "ToString(" << tests[i].regexp << ") = " << t; | |
446 EXPECT_EQ(s, ss); | |
447 EXPECT_EQ(t, tt); | |
448 nre->Decref(); | |
449 } | |
450 re->Decref(); | |
451 } | |
452 } | |
453 | |
454 // Test that capture error args are correct. | |
455 TEST(NamedCaptures, ErrorArgs) { | |
456 RegexpStatus status; | |
457 Regexp* re; | |
458 | |
459 re = Regexp::Parse("test(?P<name", Regexp::LikePerl, &status); | |
460 EXPECT_TRUE(re == NULL); | |
461 EXPECT_EQ(status.code(), kRegexpBadNamedCapture); | |
462 EXPECT_EQ(status.error_arg(), "(?P<name"); | |
463 | |
464 re = Regexp::Parse("test(?P<space bar>z)", Regexp::LikePerl, &status); | |
465 EXPECT_TRUE(re == NULL); | |
466 EXPECT_EQ(status.code(), kRegexpBadNamedCapture); | |
467 EXPECT_EQ(status.error_arg(), "(?P<space bar>"); | |
468 } | |
469 | |
470 } // namespace re2 | |
OLD | NEW |