OLD | NEW |
| (Empty) |
1 // -*- coding: utf-8 -*- | |
2 // Copyright 2002-2009 The RE2 Authors. All Rights Reserved. | |
3 // Use of this source code is governed by a BSD-style | |
4 // license that can be found in the LICENSE file. | |
5 | |
6 // TODO: Test extractions for PartialMatch/Consume | |
7 | |
8 #include <errno.h> | |
9 #ifndef _MSC_VER | |
10 #include <unistd.h> /* for sysconf */ | |
11 #include <sys/mman.h> | |
12 #endif | |
13 #include <sys/stat.h> | |
14 #include <sys/types.h> | |
15 #include <vector> | |
16 #include "util/test.h" | |
17 #include "re2/re2.h" | |
18 #include "re2/regexp.h" | |
19 | |
20 DECLARE_bool(logtostderr); | |
21 | |
22 namespace re2 { | |
23 | |
24 TEST(RE2, HexTests) { | |
25 | |
26 VLOG(1) << "hex tests"; | |
27 | |
28 #define CHECK_HEX(type, value) \ | |
29 do { \ | |
30 type v; \ | |
31 CHECK(RE2::FullMatch(#value, "([0-9a-fA-F]+)[uUlL]*", RE2::Hex(&v))); \ | |
32 CHECK_EQ(v, 0x ## value); \ | |
33 CHECK(RE2::FullMatch("0x" #value, "([0-9a-fA-FxX]+)[uUlL]*", RE2::CRadix(&v)
)); \ | |
34 CHECK_EQ(v, 0x ## value); \ | |
35 } while(0) | |
36 | |
37 CHECK_HEX(short, 2bad); | |
38 CHECK_HEX(unsigned short, 2badU); | |
39 CHECK_HEX(int, dead); | |
40 CHECK_HEX(unsigned int, deadU); | |
41 CHECK_HEX(long, 7eadbeefL); | |
42 CHECK_HEX(unsigned long, deadbeefUL); | |
43 CHECK_HEX(long long, 12345678deadbeefLL); | |
44 CHECK_HEX(unsigned long long, cafebabedeadbeefULL); | |
45 | |
46 #undef CHECK_HEX | |
47 } | |
48 | |
49 TEST(RE2, OctalTests) { | |
50 VLOG(1) << "octal tests"; | |
51 | |
52 #define CHECK_OCTAL(type, value) \ | |
53 do { \ | |
54 type v; \ | |
55 CHECK(RE2::FullMatch(#value, "([0-7]+)[uUlL]*", RE2::Octal(&v))); \ | |
56 CHECK_EQ(v, 0 ## value); \ | |
57 CHECK(RE2::FullMatch("0" #value, "([0-9a-fA-FxX]+)[uUlL]*", RE2::CRadix(&v))
); \ | |
58 CHECK_EQ(v, 0 ## value); \ | |
59 } while(0) | |
60 | |
61 CHECK_OCTAL(short, 77777); | |
62 CHECK_OCTAL(unsigned short, 177777U); | |
63 CHECK_OCTAL(int, 17777777777); | |
64 CHECK_OCTAL(unsigned int, 37777777777U); | |
65 CHECK_OCTAL(long, 17777777777L); | |
66 CHECK_OCTAL(unsigned long, 37777777777UL); | |
67 CHECK_OCTAL(long long, 777777777777777777777LL); | |
68 CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL); | |
69 | |
70 #undef CHECK_OCTAL | |
71 } | |
72 | |
73 TEST(RE2, DecimalTests) { | |
74 VLOG(1) << "decimal tests"; | |
75 | |
76 #define CHECK_DECIMAL(type, value) \ | |
77 do { \ | |
78 type v; \ | |
79 CHECK(RE2::FullMatch(#value, "(-?[0-9]+)[uUlL]*", &v)); \ | |
80 CHECK_EQ(v, value); \ | |
81 CHECK(RE2::FullMatch(#value, "(-?[0-9a-fA-FxX]+)[uUlL]*", RE2::CRadix(&v)));
\ | |
82 CHECK_EQ(v, value); \ | |
83 } while(0) | |
84 | |
85 CHECK_DECIMAL(short, -1); | |
86 CHECK_DECIMAL(unsigned short, 9999); | |
87 CHECK_DECIMAL(int, -1000); | |
88 CHECK_DECIMAL(unsigned int, 12345U); | |
89 CHECK_DECIMAL(long, -10000000L); | |
90 CHECK_DECIMAL(unsigned long, 3083324652U); | |
91 CHECK_DECIMAL(long long, -100000000000000LL); | |
92 CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL); | |
93 | |
94 #undef CHECK_DECIMAL | |
95 } | |
96 | |
97 TEST(RE2, Replace) { | |
98 VLOG(1) << "TestReplace"; | |
99 | |
100 struct ReplaceTest { | |
101 const char *regexp; | |
102 const char *rewrite; | |
103 const char *original; | |
104 const char *single; | |
105 const char *global; | |
106 int greplace_count; | |
107 }; | |
108 static const ReplaceTest tests[] = { | |
109 { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)", | |
110 "\\2\\1ay", | |
111 "the quick brown fox jumps over the lazy dogs.", | |
112 "ethay quick brown fox jumps over the lazy dogs.", | |
113 "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.", | |
114 9 }, | |
115 { "\\w+", | |
116 "\\0-NOSPAM", | |
117 "abcd.efghi@google.com", | |
118 "abcd-NOSPAM.efghi@google.com", | |
119 "abcd-NOSPAM.efghi-NOSPAM@google-NOSPAM.com-NOSPAM", | |
120 4 }, | |
121 { "^", | |
122 "(START)", | |
123 "foo", | |
124 "(START)foo", | |
125 "(START)foo", | |
126 1 }, | |
127 { "^", | |
128 "(START)", | |
129 "", | |
130 "(START)", | |
131 "(START)", | |
132 1 }, | |
133 { "$", | |
134 "(END)", | |
135 "", | |
136 "(END)", | |
137 "(END)", | |
138 1 }, | |
139 { "b", | |
140 "bb", | |
141 "ababababab", | |
142 "abbabababab", | |
143 "abbabbabbabbabb", | |
144 5 }, | |
145 { "b", | |
146 "bb", | |
147 "bbbbbb", | |
148 "bbbbbbb", | |
149 "bbbbbbbbbbbb", | |
150 6 }, | |
151 { "b+", | |
152 "bb", | |
153 "bbbbbb", | |
154 "bb", | |
155 "bb", | |
156 1 }, | |
157 { "b*", | |
158 "bb", | |
159 "bbbbbb", | |
160 "bb", | |
161 "bb", | |
162 1 }, | |
163 { "b*", | |
164 "bb", | |
165 "aaaaa", | |
166 "bbaaaaa", | |
167 "bbabbabbabbabbabb", | |
168 6 }, | |
169 // Check newline handling | |
170 { "a.*a", | |
171 "(\\0)", | |
172 "aba\naba", | |
173 "(aba)\naba", | |
174 "(aba)\n(aba)", | |
175 2 }, | |
176 { "", NULL, NULL, NULL, NULL, 0 } | |
177 }; | |
178 | |
179 for (const ReplaceTest* t = tests; t->original != NULL; t++) { | |
180 VLOG(1) << StringPrintf("\"%s\" =~ s/%s/%s/g", t->original, t->regexp, t->re
write); | |
181 string one(t->original); | |
182 CHECK(RE2::Replace(&one, t->regexp, t->rewrite)); | |
183 CHECK_EQ(one, t->single); | |
184 string all(t->original); | |
185 CHECK_EQ(RE2::GlobalReplace(&all, t->regexp, t->rewrite), t->greplace_count) | |
186 << "Got: " << all; | |
187 CHECK_EQ(all, t->global); | |
188 } | |
189 } | |
190 | |
191 static void TestCheckRewriteString(const char* regexp, const char* rewrite, | |
192 bool expect_ok) { | |
193 string error; | |
194 RE2 exp(regexp); | |
195 bool actual_ok = exp.CheckRewriteString(rewrite, &error); | |
196 EXPECT_EQ(expect_ok, actual_ok) << " for " << rewrite << " error: " << error; | |
197 } | |
198 | |
199 TEST(CheckRewriteString, all) { | |
200 TestCheckRewriteString("abc", "foo", true); | |
201 TestCheckRewriteString("abc", "foo\\", false); | |
202 TestCheckRewriteString("abc", "foo\\0bar", true); | |
203 | |
204 TestCheckRewriteString("a(b)c", "foo", true); | |
205 TestCheckRewriteString("a(b)c", "foo\\0bar", true); | |
206 TestCheckRewriteString("a(b)c", "foo\\1bar", true); | |
207 TestCheckRewriteString("a(b)c", "foo\\2bar", false); | |
208 TestCheckRewriteString("a(b)c", "f\\\\2o\\1o", true); | |
209 | |
210 TestCheckRewriteString("a(b)(c)", "foo\\12", true); | |
211 TestCheckRewriteString("a(b)(c)", "f\\2o\\1o", true); | |
212 TestCheckRewriteString("a(b)(c)", "f\\oo\\1", false); | |
213 } | |
214 | |
215 TEST(RE2, Extract) { | |
216 VLOG(1) << "TestExtract"; | |
217 | |
218 string s; | |
219 | |
220 CHECK(RE2::Extract("boris@kremvax.ru", "(.*)@([^.]*)", "\\2!\\1", &s)); | |
221 CHECK_EQ(s, "kremvax!boris"); | |
222 | |
223 CHECK(RE2::Extract("foo", ".*", "'\\0'", &s)); | |
224 CHECK_EQ(s, "'foo'"); | |
225 // check that false match doesn't overwrite | |
226 CHECK(!RE2::Extract("baz", "bar", "'\\0'", &s)); | |
227 CHECK_EQ(s, "'foo'"); | |
228 } | |
229 | |
230 TEST(RE2, Consume) { | |
231 VLOG(1) << "TestConsume"; | |
232 | |
233 RE2 r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace | |
234 string word; | |
235 | |
236 string s(" aaa b!@#$@#$cccc"); | |
237 StringPiece input(s); | |
238 | |
239 CHECK(RE2::Consume(&input, r, &word)); | |
240 CHECK_EQ(word, "aaa") << " input: " << input; | |
241 CHECK(RE2::Consume(&input, r, &word)); | |
242 CHECK_EQ(word, "b") << " input: " << input; | |
243 CHECK(! RE2::Consume(&input, r, &word)) << " input: " << input; | |
244 } | |
245 | |
246 TEST(RE2, ConsumeN) { | |
247 const string s(" one two three 4"); | |
248 StringPiece input(s); | |
249 | |
250 RE2::Arg argv[2]; | |
251 const RE2::Arg* const args[2] = { &argv[0], &argv[1] }; | |
252 | |
253 // 0 arg | |
254 EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)", args, 0)); // Skips "one". | |
255 | |
256 // 1 arg | |
257 string word; | |
258 argv[0] = &word; | |
259 EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)", args, 1)); | |
260 EXPECT_EQ("two", word); | |
261 | |
262 // Multi-args | |
263 int n; | |
264 argv[1] = &n; | |
265 EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)\\s*(\\d+)", args, 2)); | |
266 EXPECT_EQ("three", word); | |
267 EXPECT_EQ(4, n); | |
268 } | |
269 | |
270 TEST(RE2, FindAndConsume) { | |
271 VLOG(1) << "TestFindAndConsume"; | |
272 | |
273 RE2 r("(\\w+)"); // matches a word | |
274 string word; | |
275 | |
276 string s(" aaa b!@#$@#$cccc"); | |
277 StringPiece input(s); | |
278 | |
279 CHECK(RE2::FindAndConsume(&input, r, &word)); | |
280 CHECK_EQ(word, "aaa"); | |
281 CHECK(RE2::FindAndConsume(&input, r, &word)); | |
282 CHECK_EQ(word, "b"); | |
283 CHECK(RE2::FindAndConsume(&input, r, &word)); | |
284 CHECK_EQ(word, "cccc"); | |
285 CHECK(! RE2::FindAndConsume(&input, r, &word)); | |
286 | |
287 // Check that FindAndConsume works without any submatches. | |
288 // Earlier version used uninitialized data for | |
289 // length to consume. | |
290 input = "aaa"; | |
291 CHECK(RE2::FindAndConsume(&input, "aaa")); | |
292 CHECK_EQ(input, ""); | |
293 } | |
294 | |
295 TEST(RE2, FindAndConsumeN) { | |
296 const string s(" one two three 4"); | |
297 StringPiece input(s); | |
298 | |
299 RE2::Arg argv[2]; | |
300 const RE2::Arg* const args[2] = { &argv[0], &argv[1] }; | |
301 | |
302 // 0 arg | |
303 EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)", args, 0)); // Skips "one". | |
304 | |
305 // 1 arg | |
306 string word; | |
307 argv[0] = &word; | |
308 EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)", args, 1)); | |
309 EXPECT_EQ("two", word); | |
310 | |
311 // Multi-args | |
312 int n; | |
313 argv[1] = &n; | |
314 EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)\\s*(\\d+)", args, 2)); | |
315 EXPECT_EQ("three", word); | |
316 EXPECT_EQ(4, n); | |
317 } | |
318 | |
319 TEST(RE2, MatchNumberPeculiarity) { | |
320 VLOG(1) << "TestMatchNumberPeculiarity"; | |
321 | |
322 RE2 r("(foo)|(bar)|(baz)"); | |
323 string word1; | |
324 string word2; | |
325 string word3; | |
326 | |
327 CHECK(RE2::PartialMatch("foo", r, &word1, &word2, &word3)); | |
328 CHECK_EQ(word1, "foo"); | |
329 CHECK_EQ(word2, ""); | |
330 CHECK_EQ(word3, ""); | |
331 CHECK(RE2::PartialMatch("bar", r, &word1, &word2, &word3)); | |
332 CHECK_EQ(word1, ""); | |
333 CHECK_EQ(word2, "bar"); | |
334 CHECK_EQ(word3, ""); | |
335 CHECK(RE2::PartialMatch("baz", r, &word1, &word2, &word3)); | |
336 CHECK_EQ(word1, ""); | |
337 CHECK_EQ(word2, ""); | |
338 CHECK_EQ(word3, "baz"); | |
339 CHECK(!RE2::PartialMatch("f", r, &word1, &word2, &word3)); | |
340 | |
341 string a; | |
342 CHECK(RE2::FullMatch("hello", "(foo)|hello", &a)); | |
343 CHECK_EQ(a, ""); | |
344 } | |
345 | |
346 TEST(RE2, Match) { | |
347 RE2 re("((\\w+):([0-9]+))"); // extracts host and port | |
348 StringPiece group[4]; | |
349 | |
350 // No match. | |
351 StringPiece s = "zyzzyva"; | |
352 CHECK(!re.Match(s, 0, s.size(), RE2::UNANCHORED, | |
353 group, arraysize(group))); | |
354 | |
355 // Matches and extracts. | |
356 s = "a chrisr:9000 here"; | |
357 CHECK(re.Match(s, 0, s.size(), RE2::UNANCHORED, | |
358 group, arraysize(group))); | |
359 CHECK_EQ(group[0], "chrisr:9000"); | |
360 CHECK_EQ(group[1], "chrisr:9000"); | |
361 CHECK_EQ(group[2], "chrisr"); | |
362 CHECK_EQ(group[3], "9000"); | |
363 | |
364 string all, host; | |
365 int port; | |
366 CHECK(RE2::PartialMatch("a chrisr:9000 here", re, &all, &host, &port)); | |
367 CHECK_EQ(all, "chrisr:9000"); | |
368 CHECK_EQ(host, "chrisr"); | |
369 CHECK_EQ(port, 9000); | |
370 } | |
371 | |
372 static void TestRecursion(int size, const char* pattern) { | |
373 // Fill up a string repeating the pattern given | |
374 string domain; | |
375 domain.resize(size); | |
376 size_t patlen = strlen(pattern); | |
377 for (int i = 0; i < size; i++) { | |
378 domain[i] = pattern[i % patlen]; | |
379 } | |
380 // Just make sure it doesn't crash due to too much recursion. | |
381 RE2 re("([a-zA-Z0-9]|-)+(\\.([a-zA-Z0-9]|-)+)*(\\.)?", RE2::Quiet); | |
382 RE2::FullMatch(domain, re); | |
383 } | |
384 | |
385 // A meta-quoted string, interpreted as a pattern, should always match | |
386 // the original unquoted string. | |
387 static void TestQuoteMeta(string unquoted, | |
388 const RE2::Options& options = RE2::DefaultOptions) { | |
389 string quoted = RE2::QuoteMeta(unquoted); | |
390 RE2 re(quoted, options); | |
391 EXPECT_TRUE(RE2::FullMatch(unquoted, re)) | |
392 << "Unquoted='" << unquoted << "', quoted='" << quoted << "'."; | |
393 } | |
394 | |
395 // A meta-quoted string, interpreted as a pattern, should always match | |
396 // the original unquoted string. | |
397 static void NegativeTestQuoteMeta(string unquoted, string should_not_match, | |
398 const RE2::Options& options = RE2::DefaultOpti
ons) { | |
399 string quoted = RE2::QuoteMeta(unquoted); | |
400 RE2 re(quoted, options); | |
401 EXPECT_FALSE(RE2::FullMatch(should_not_match, re)) | |
402 << "Unquoted='" << unquoted << "', quoted='" << quoted << "'."; | |
403 } | |
404 | |
405 // Tests that quoted meta characters match their original strings, | |
406 // and that a few things that shouldn't match indeed do not. | |
407 TEST(QuoteMeta, Simple) { | |
408 TestQuoteMeta("foo"); | |
409 TestQuoteMeta("foo.bar"); | |
410 TestQuoteMeta("foo\\.bar"); | |
411 TestQuoteMeta("[1-9]"); | |
412 TestQuoteMeta("1.5-2.0?"); | |
413 TestQuoteMeta("\\d"); | |
414 TestQuoteMeta("Who doesn't like ice cream?"); | |
415 TestQuoteMeta("((a|b)c?d*e+[f-h]i)"); | |
416 TestQuoteMeta("((?!)xxx).*yyy"); | |
417 TestQuoteMeta("(["); | |
418 } | |
419 TEST(QuoteMeta, SimpleNegative) { | |
420 NegativeTestQuoteMeta("foo", "bar"); | |
421 NegativeTestQuoteMeta("...", "bar"); | |
422 NegativeTestQuoteMeta("\\.", "."); | |
423 NegativeTestQuoteMeta("\\.", ".."); | |
424 NegativeTestQuoteMeta("(a)", "a"); | |
425 NegativeTestQuoteMeta("(a|b)", "a"); | |
426 NegativeTestQuoteMeta("(a|b)", "(a)"); | |
427 NegativeTestQuoteMeta("(a|b)", "a|b"); | |
428 NegativeTestQuoteMeta("[0-9]", "0"); | |
429 NegativeTestQuoteMeta("[0-9]", "0-9"); | |
430 NegativeTestQuoteMeta("[0-9]", "[9]"); | |
431 NegativeTestQuoteMeta("((?!)xxx)", "xxx"); | |
432 } | |
433 | |
434 TEST(QuoteMeta, Latin1) { | |
435 TestQuoteMeta("3\xb2 = 9", RE2::Latin1); | |
436 } | |
437 | |
438 TEST(QuoteMeta, UTF8) { | |
439 TestQuoteMeta("Plácido Domingo"); | |
440 TestQuoteMeta("xyz"); // No fancy utf8. | |
441 TestQuoteMeta("\xc2\xb0"); // 2-byte utf8 -- a degree symbol. | |
442 TestQuoteMeta("27\xc2\xb0 degrees"); // As a middle character. | |
443 TestQuoteMeta("\xe2\x80\xb3"); // 3-byte utf8 -- a double prime. | |
444 TestQuoteMeta("\xf0\x9d\x85\x9f"); // 4-byte utf8 -- a music note. | |
445 TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, this should | |
446 // still work. | |
447 NegativeTestQuoteMeta("27\xc2\xb0", | |
448 "27\\\xc2\\\xb0"); // 2-byte utf8 -- a degree symbol. | |
449 } | |
450 | |
451 TEST(QuoteMeta, HasNull) { | |
452 string has_null; | |
453 | |
454 // string with one null character | |
455 has_null += '\0'; | |
456 TestQuoteMeta(has_null); | |
457 NegativeTestQuoteMeta(has_null, ""); | |
458 | |
459 // Don't want null-followed-by-'1' to be interpreted as '\01'. | |
460 has_null += '1'; | |
461 TestQuoteMeta(has_null); | |
462 NegativeTestQuoteMeta(has_null, "\1"); | |
463 } | |
464 | |
465 TEST(ProgramSize, BigProgram) { | |
466 RE2 re_simple("simple regexp"); | |
467 RE2 re_medium("medium.*regexp"); | |
468 RE2 re_complex("complex.{1,128}regexp"); | |
469 | |
470 CHECK_GT(re_simple.ProgramSize(), 0); | |
471 CHECK_GT(re_medium.ProgramSize(), re_simple.ProgramSize()); | |
472 CHECK_GT(re_complex.ProgramSize(), re_medium.ProgramSize()); | |
473 } | |
474 | |
475 TEST(ProgramFanout, BigProgram) { | |
476 RE2 re1("(?:(?:(?:(?:(?:.)?){1})*)+)"); | |
477 RE2 re10("(?:(?:(?:(?:(?:.)?){10})*)+)"); | |
478 RE2 re100("(?:(?:(?:(?:(?:.)?){100})*)+)"); | |
479 RE2 re1000("(?:(?:(?:(?:(?:.)?){1000})*)+)"); | |
480 | |
481 map<int, int> histogram; | |
482 | |
483 // 3 is the largest non-empty bucket and has 1 element. | |
484 CHECK_EQ(3, re1.ProgramFanout(&histogram)); | |
485 CHECK_EQ(1, histogram[3]); | |
486 | |
487 // 7 is the largest non-empty bucket and has 10 elements. | |
488 CHECK_EQ(7, re10.ProgramFanout(&histogram)); | |
489 CHECK_EQ(10, histogram[7]); | |
490 | |
491 // 10 is the largest non-empty bucket and has 100 elements. | |
492 CHECK_EQ(10, re100.ProgramFanout(&histogram)); | |
493 CHECK_EQ(100, histogram[10]); | |
494 | |
495 // 13 is the largest non-empty bucket and has 1000 elements. | |
496 CHECK_EQ(13, re1000.ProgramFanout(&histogram)); | |
497 CHECK_EQ(1000, histogram[13]); | |
498 } | |
499 | |
500 // Issue 956519: handling empty character sets was | |
501 // causing NULL dereference. This tests a few empty character sets. | |
502 // (The way to get an empty character set is to negate a full one.) | |
503 TEST(EmptyCharset, Fuzz) { | |
504 static const char *empties[] = { | |
505 "[^\\S\\s]", | |
506 "[^\\S[:space:]]", | |
507 "[^\\D\\d]", | |
508 "[^\\D[:digit:]]" | |
509 }; | |
510 for (int i = 0; i < arraysize(empties); i++) | |
511 CHECK(!RE2(empties[i]).Match("abc", 0, 3, RE2::UNANCHORED, NULL, 0)); | |
512 } | |
513 | |
514 // Bitstate assumes that kInstFail instructions in | |
515 // alternations or capture groups have been "compiled away". | |
516 TEST(EmptyCharset, BitstateAssumptions) { | |
517 // Captures trigger use of Bitstate. | |
518 static const char *nop_empties[] = { | |
519 "((((()))))" "[^\\S\\s]?", | |
520 "((((()))))" "([^\\S\\s])?", | |
521 "((((()))))" "([^\\S\\s]|[^\\S\\s])?", | |
522 "((((()))))" "(([^\\S\\s]|[^\\S\\s])|)" | |
523 }; | |
524 StringPiece group[6]; | |
525 for (int i = 0; i < arraysize(nop_empties); i++) | |
526 CHECK(RE2(nop_empties[i]).Match("", 0, 0, RE2::UNANCHORED, group, 6)); | |
527 } | |
528 | |
529 // Test that named groups work correctly. | |
530 TEST(Capture, NamedGroups) { | |
531 { | |
532 RE2 re("(hello world)"); | |
533 CHECK_EQ(re.NumberOfCapturingGroups(), 1); | |
534 const map<string, int>& m = re.NamedCapturingGroups(); | |
535 CHECK_EQ(m.size(), 0); | |
536 } | |
537 | |
538 { | |
539 RE2 re("(?P<A>expr(?P<B>expr)(?P<C>expr))((expr)(?P<D>expr))"); | |
540 CHECK_EQ(re.NumberOfCapturingGroups(), 6); | |
541 const map<string, int>& m = re.NamedCapturingGroups(); | |
542 CHECK_EQ(m.size(), 4); | |
543 CHECK_EQ(m.find("A")->second, 1); | |
544 CHECK_EQ(m.find("B")->second, 2); | |
545 CHECK_EQ(m.find("C")->second, 3); | |
546 CHECK_EQ(m.find("D")->second, 6); // $4 and $5 are anonymous | |
547 } | |
548 } | |
549 | |
550 TEST(RE2, CapturedGroupTest) { | |
551 RE2 re("directions from (?P<S>.*) to (?P<D>.*)"); | |
552 int num_groups = re.NumberOfCapturingGroups(); | |
553 EXPECT_EQ(2, num_groups); | |
554 string args[4]; | |
555 RE2::Arg arg0(&args[0]); | |
556 RE2::Arg arg1(&args[1]); | |
557 RE2::Arg arg2(&args[2]); | |
558 RE2::Arg arg3(&args[3]); | |
559 | |
560 const RE2::Arg* const matches[4] = {&arg0, &arg1, &arg2, &arg3}; | |
561 EXPECT_TRUE(RE2::FullMatchN("directions from mountain view to san jose", | |
562 re, matches, num_groups)); | |
563 const map<string, int>& named_groups = re.NamedCapturingGroups(); | |
564 EXPECT_TRUE(named_groups.find("S") != named_groups.end()); | |
565 EXPECT_TRUE(named_groups.find("D") != named_groups.end()); | |
566 | |
567 // The named group index is 1-based. | |
568 int source_group_index = named_groups.find("S")->second; | |
569 int destination_group_index = named_groups.find("D")->second; | |
570 EXPECT_EQ(1, source_group_index); | |
571 EXPECT_EQ(2, destination_group_index); | |
572 | |
573 // The args is zero-based. | |
574 EXPECT_EQ("mountain view", args[source_group_index - 1]); | |
575 EXPECT_EQ("san jose", args[destination_group_index - 1]); | |
576 } | |
577 | |
578 TEST(RE2, FullMatchWithNoArgs) { | |
579 CHECK(RE2::FullMatch("h", "h")); | |
580 CHECK(RE2::FullMatch("hello", "hello")); | |
581 CHECK(RE2::FullMatch("hello", "h.*o")); | |
582 CHECK(!RE2::FullMatch("othello", "h.*o")); // Must be anchored at front | |
583 CHECK(!RE2::FullMatch("hello!", "h.*o")); // Must be anchored at end | |
584 } | |
585 | |
586 TEST(RE2, PartialMatch) { | |
587 CHECK(RE2::PartialMatch("x", "x")); | |
588 CHECK(RE2::PartialMatch("hello", "h.*o")); | |
589 CHECK(RE2::PartialMatch("othello", "h.*o")); | |
590 CHECK(RE2::PartialMatch("hello!", "h.*o")); | |
591 CHECK(RE2::PartialMatch("x", "((((((((((((((((((((x))))))))))))))))))))")); | |
592 } | |
593 | |
594 TEST(RE2, PartialMatchN) { | |
595 RE2::Arg argv[2]; | |
596 const RE2::Arg* const args[2] = { &argv[0], &argv[1] }; | |
597 | |
598 // 0 arg | |
599 EXPECT_TRUE(RE2::PartialMatchN("hello", "e.*o", args, 0)); | |
600 EXPECT_FALSE(RE2::PartialMatchN("othello", "a.*o", args, 0)); | |
601 | |
602 // 1 arg | |
603 int i; | |
604 argv[0] = &i; | |
605 EXPECT_TRUE(RE2::PartialMatchN("1001 nights", "(\\d+)", args, 1)); | |
606 EXPECT_EQ(1001, i); | |
607 EXPECT_FALSE(RE2::PartialMatchN("three", "(\\d+)", args, 1)); | |
608 | |
609 // Multi-arg | |
610 string s; | |
611 argv[1] = &s; | |
612 EXPECT_TRUE(RE2::PartialMatchN("answer: 42:life", "(\\d+):(\\w+)", args, 2)); | |
613 EXPECT_EQ(42, i); | |
614 EXPECT_EQ("life", s); | |
615 EXPECT_FALSE(RE2::PartialMatchN("hi1", "(\\w+)(1)", args, 2)); | |
616 } | |
617 | |
618 TEST(RE2, FullMatchZeroArg) { | |
619 // Zero-arg | |
620 CHECK(RE2::FullMatch("1001", "\\d+")); | |
621 } | |
622 | |
623 TEST(RE2, FullMatchOneArg) { | |
624 int i; | |
625 | |
626 // Single-arg | |
627 CHECK(RE2::FullMatch("1001", "(\\d+)", &i)); | |
628 CHECK_EQ(i, 1001); | |
629 CHECK(RE2::FullMatch("-123", "(-?\\d+)", &i)); | |
630 CHECK_EQ(i, -123); | |
631 CHECK(!RE2::FullMatch("10", "()\\d+", &i)); | |
632 CHECK(!RE2::FullMatch("1234567890123456789012345678901234567890", | |
633 "(\\d+)", &i)); | |
634 } | |
635 | |
636 TEST(RE2, FullMatchIntegerArg) { | |
637 int i; | |
638 | |
639 // Digits surrounding integer-arg | |
640 CHECK(RE2::FullMatch("1234", "1(\\d*)4", &i)); | |
641 CHECK_EQ(i, 23); | |
642 CHECK(RE2::FullMatch("1234", "(\\d)\\d+", &i)); | |
643 CHECK_EQ(i, 1); | |
644 CHECK(RE2::FullMatch("-1234", "(-\\d)\\d+", &i)); | |
645 CHECK_EQ(i, -1); | |
646 CHECK(RE2::PartialMatch("1234", "(\\d)", &i)); | |
647 CHECK_EQ(i, 1); | |
648 CHECK(RE2::PartialMatch("-1234", "(-\\d)", &i)); | |
649 CHECK_EQ(i, -1); | |
650 } | |
651 | |
652 TEST(RE2, FullMatchStringArg) { | |
653 string s; | |
654 // String-arg | |
655 CHECK(RE2::FullMatch("hello", "h(.*)o", &s)); | |
656 CHECK_EQ(s, string("ell")); | |
657 } | |
658 | |
659 TEST(RE2, FullMatchStringPieceArg) { | |
660 int i; | |
661 // StringPiece-arg | |
662 StringPiece sp; | |
663 CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &sp, &i)); | |
664 CHECK_EQ(sp.size(), 4); | |
665 CHECK(memcmp(sp.data(), "ruby", 4) == 0); | |
666 CHECK_EQ(i, 1234); | |
667 } | |
668 | |
669 TEST(RE2, FullMatchMultiArg) { | |
670 int i; | |
671 string s; | |
672 // Multi-arg | |
673 CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i)); | |
674 CHECK_EQ(s, string("ruby")); | |
675 CHECK_EQ(i, 1234); | |
676 } | |
677 | |
678 TEST(RE2, FullMatchN) { | |
679 RE2::Arg argv[2]; | |
680 const RE2::Arg* const args[2] = { &argv[0], &argv[1] }; | |
681 | |
682 // 0 arg | |
683 EXPECT_TRUE(RE2::FullMatchN("hello", "h.*o", args, 0)); | |
684 EXPECT_FALSE(RE2::FullMatchN("othello", "h.*o", args, 0)); | |
685 | |
686 // 1 arg | |
687 int i; | |
688 argv[0] = &i; | |
689 EXPECT_TRUE(RE2::FullMatchN("1001", "(\\d+)", args, 1)); | |
690 EXPECT_EQ(1001, i); | |
691 EXPECT_FALSE(RE2::FullMatchN("three", "(\\d+)", args, 1)); | |
692 | |
693 // Multi-arg | |
694 string s; | |
695 argv[1] = &s; | |
696 EXPECT_TRUE(RE2::FullMatchN("42:life", "(\\d+):(\\w+)", args, 2)); | |
697 EXPECT_EQ(42, i); | |
698 EXPECT_EQ("life", s); | |
699 EXPECT_FALSE(RE2::FullMatchN("hi1", "(\\w+)(1)", args, 2)); | |
700 } | |
701 | |
702 TEST(RE2, FullMatchIgnoredArg) { | |
703 int i; | |
704 string s; | |
705 // Ignored arg | |
706 CHECK(RE2::FullMatch("ruby:1234", "(\\w+)(:)(\\d+)", &s, (void*)NULL, &i)); | |
707 CHECK_EQ(s, string("ruby")); | |
708 CHECK_EQ(i, 1234); | |
709 } | |
710 | |
711 TEST(RE2, FullMatchTypedNullArg) { | |
712 string s; | |
713 | |
714 // Ignore non-void* NULL arg | |
715 CHECK(RE2::FullMatch("hello", "he(.*)lo", (char*)NULL)); | |
716 CHECK(RE2::FullMatch("hello", "h(.*)o", (string*)NULL)); | |
717 CHECK(RE2::FullMatch("hello", "h(.*)o", (StringPiece*)NULL)); | |
718 CHECK(RE2::FullMatch("1234", "(.*)", (int*)NULL)); | |
719 CHECK(RE2::FullMatch("1234567890123456", "(.*)", (long long*)NULL)); | |
720 CHECK(RE2::FullMatch("123.4567890123456", "(.*)", (double*)NULL)); | |
721 CHECK(RE2::FullMatch("123.4567890123456", "(.*)", (float*)NULL)); | |
722 | |
723 // Fail on non-void* NULL arg if the match doesn't parse for the given type. | |
724 CHECK(!RE2::FullMatch("hello", "h(.*)lo", &s, (char*)NULL)); | |
725 CHECK(!RE2::FullMatch("hello", "(.*)", (int*)NULL)); | |
726 CHECK(!RE2::FullMatch("1234567890123456", "(.*)", (int*)NULL)); | |
727 CHECK(!RE2::FullMatch("hello", "(.*)", (double*)NULL)); | |
728 CHECK(!RE2::FullMatch("hello", "(.*)", (float*)NULL)); | |
729 } | |
730 | |
731 // Check that numeric parsing code does not read past the end of | |
732 // the number being parsed. | |
733 // This implementation requires mmap(2) et al. and thus cannot | |
734 // be used unless they are available. | |
735 TEST(RE2, NULTerminated) { | |
736 #if defined(_POSIX_MAPPED_FILES) && _POSIX_MAPPED_FILES > 0 | |
737 char *v; | |
738 int x; | |
739 long pagesize = sysconf(_SC_PAGE_SIZE); | |
740 | |
741 #ifndef MAP_ANONYMOUS | |
742 #define MAP_ANONYMOUS MAP_ANON | |
743 #endif | |
744 v = static_cast<char*>(mmap(NULL, 2*pagesize, PROT_READ|PROT_WRITE, | |
745 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0)); | |
746 CHECK(v != reinterpret_cast<char*>(-1)); | |
747 LOG(INFO) << "Memory at " << (void*)v; | |
748 CHECK_EQ(munmap(v + pagesize, pagesize), 0) << " error " << errno; | |
749 v[pagesize - 1] = '1'; | |
750 | |
751 x = 0; | |
752 CHECK(RE2::FullMatch(StringPiece(v + pagesize - 1, 1), "(.*)", &x)); | |
753 CHECK_EQ(x, 1); | |
754 #endif | |
755 } | |
756 | |
757 TEST(RE2, FullMatchTypeTests) { | |
758 // Type tests | |
759 string zeros(1000, '0'); | |
760 { | |
761 char c; | |
762 CHECK(RE2::FullMatch("Hello", "(H)ello", &c)); | |
763 CHECK_EQ(c, 'H'); | |
764 } | |
765 { | |
766 unsigned char c; | |
767 CHECK(RE2::FullMatch("Hello", "(H)ello", &c)); | |
768 CHECK_EQ(c, static_cast<unsigned char>('H')); | |
769 } | |
770 { | |
771 int16 v; | |
772 CHECK(RE2::FullMatch("100", "(-?\\d+)", &v)); CHECK_EQ(v, 100); | |
773 CHECK(RE2::FullMatch("-100", "(-?\\d+)", &v)); CHECK_EQ(v, -100); | |
774 CHECK(RE2::FullMatch("32767", "(-?\\d+)", &v)); CHECK_EQ(v, 32767); | |
775 CHECK(RE2::FullMatch("-32768", "(-?\\d+)", &v)); CHECK_EQ(v, -32768); | |
776 CHECK(!RE2::FullMatch("-32769", "(-?\\d+)", &v)); | |
777 CHECK(!RE2::FullMatch("32768", "(-?\\d+)", &v)); | |
778 } | |
779 { | |
780 uint16 v; | |
781 CHECK(RE2::FullMatch("100", "(\\d+)", &v)); CHECK_EQ(v, 100); | |
782 CHECK(RE2::FullMatch("32767", "(\\d+)", &v)); CHECK_EQ(v, 32767); | |
783 CHECK(RE2::FullMatch("65535", "(\\d+)", &v)); CHECK_EQ(v, 65535); | |
784 CHECK(!RE2::FullMatch("65536", "(\\d+)", &v)); | |
785 } | |
786 { | |
787 int32 v; | |
788 static const int32 max = 0x7fffffff; | |
789 static const int32 min = -max - 1; | |
790 CHECK(RE2::FullMatch("100", "(-?\\d+)", &v)); CHECK_EQ(v, 100); | |
791 CHECK(RE2::FullMatch("-100", "(-?\\d+)", &v)); CHECK_EQ(v, -100); | |
792 CHECK(RE2::FullMatch("2147483647", "(-?\\d+)", &v)); CHECK_EQ(v, max); | |
793 CHECK(RE2::FullMatch("-2147483648", "(-?\\d+)", &v)); CHECK_EQ(v, min); | |
794 CHECK(!RE2::FullMatch("-2147483649", "(-?\\d+)", &v)); | |
795 CHECK(!RE2::FullMatch("2147483648", "(-?\\d+)", &v)); | |
796 | |
797 CHECK(RE2::FullMatch(zeros + "2147483647", "(-?\\d+)", &v)); | |
798 CHECK_EQ(v, max); | |
799 CHECK(RE2::FullMatch("-" + zeros + "2147483648", "(-?\\d+)", &v)); | |
800 CHECK_EQ(v, min); | |
801 | |
802 CHECK(!RE2::FullMatch("-" + zeros + "2147483649", "(-?\\d+)", &v)); | |
803 CHECK(RE2::FullMatch("0x7fffffff", "(.*)", RE2::CRadix(&v))); | |
804 CHECK_EQ(v, max); | |
805 CHECK(!RE2::FullMatch("000x7fffffff", "(.*)", RE2::CRadix(&v))); | |
806 } | |
807 { | |
808 uint32 v; | |
809 static const uint32 max = 0xfffffffful; | |
810 CHECK(RE2::FullMatch("100", "(\\d+)", &v)); CHECK_EQ(v, 100); | |
811 CHECK(RE2::FullMatch("4294967295", "(\\d+)", &v)); CHECK_EQ(v, max); | |
812 CHECK(!RE2::FullMatch("4294967296", "(\\d+)", &v)); | |
813 CHECK(!RE2::FullMatch("-1", "(\\d+)", &v)); | |
814 | |
815 CHECK(RE2::FullMatch(zeros + "4294967295", "(\\d+)", &v)); CHECK_EQ(v, max); | |
816 } | |
817 { | |
818 int64 v; | |
819 static const int64 max = 0x7fffffffffffffffull; | |
820 static const int64 min = -max - 1; | |
821 char buf[32]; | |
822 | |
823 CHECK(RE2::FullMatch("100", "(-?\\d+)", &v)); CHECK_EQ(v, 100); | |
824 CHECK(RE2::FullMatch("-100", "(-?\\d+)", &v)); CHECK_EQ(v, -100); | |
825 | |
826 snprintf(buf, sizeof(buf), "%lld", (long long int)max); | |
827 CHECK(RE2::FullMatch(buf, "(-?\\d+)", &v)); CHECK_EQ(v, max); | |
828 | |
829 snprintf(buf, sizeof(buf), "%lld", (long long int)min); | |
830 CHECK(RE2::FullMatch(buf, "(-?\\d+)", &v)); CHECK_EQ(v, min); | |
831 | |
832 snprintf(buf, sizeof(buf), "%lld", (long long int)max); | |
833 assert(buf[strlen(buf)-1] != '9'); | |
834 buf[strlen(buf)-1]++; | |
835 CHECK(!RE2::FullMatch(buf, "(-?\\d+)", &v)); | |
836 | |
837 snprintf(buf, sizeof(buf), "%lld", (long long int)min); | |
838 assert(buf[strlen(buf)-1] != '9'); | |
839 buf[strlen(buf)-1]++; | |
840 CHECK(!RE2::FullMatch(buf, "(-?\\d+)", &v)); | |
841 } | |
842 { | |
843 uint64 v; | |
844 int64 v2; | |
845 static const uint64 max = 0xffffffffffffffffull; | |
846 char buf[32]; | |
847 | |
848 CHECK(RE2::FullMatch("100", "(-?\\d+)", &v)); CHECK_EQ(v, 100); | |
849 CHECK(RE2::FullMatch("-100", "(-?\\d+)", &v2)); CHECK_EQ(v2, -100); | |
850 | |
851 snprintf(buf, sizeof(buf), "%llu", (long long unsigned)max); | |
852 CHECK(RE2::FullMatch(buf, "(-?\\d+)", &v)); CHECK_EQ(v, max); | |
853 | |
854 assert(buf[strlen(buf)-1] != '9'); | |
855 buf[strlen(buf)-1]++; | |
856 CHECK(!RE2::FullMatch(buf, "(-?\\d+)", &v)); | |
857 } | |
858 } | |
859 | |
860 TEST(RE2, FloatingPointFullMatchTypes) { | |
861 string zeros(1000, '0'); | |
862 { | |
863 float v; | |
864 CHECK(RE2::FullMatch("100", "(.*)", &v)); CHECK_EQ(v, 100); | |
865 CHECK(RE2::FullMatch("-100.", "(.*)", &v)); CHECK_EQ(v, -100); | |
866 CHECK(RE2::FullMatch("1e23", "(.*)", &v)); CHECK_EQ(v, float(1e23)); | |
867 CHECK(RE2::FullMatch(" 100", "(.*)", &v)); CHECK_EQ(v, 100); | |
868 | |
869 CHECK(RE2::FullMatch(zeros + "1e23", "(.*)", &v)); | |
870 CHECK_EQ(v, float(1e23)); | |
871 | |
872 // 6700000000081920.1 is an edge case. | |
873 // 6700000000081920 is exactly halfway between | |
874 // two float32s, so the .1 should make it round up. | |
875 // However, the .1 is outside the precision possible with | |
876 // a float64: the nearest float64 is 6700000000081920. | |
877 // So if the code uses strtod and then converts to float32, | |
878 // round-to-even will make it round down instead of up. | |
879 // To pass the test, the parser must call strtof directly. | |
880 // This test case is carefully chosen to use only a 17-digit | |
881 // number, since C does not guarantee to get the correctly | |
882 // rounded answer for strtod and strtof unless the input is | |
883 // short. | |
884 CHECK(RE2::FullMatch("0.1", "(.*)", &v)); | |
885 CHECK_EQ(v, 0.1f) << StringPrintf("%.8g != %.8g", v, 0.1f); | |
886 CHECK(RE2::FullMatch("6700000000081920.1", "(.*)", &v)); | |
887 CHECK_EQ(v, 6700000000081920.1f) | |
888 << StringPrintf("%.8g != %.8g", v, 6700000000081920.1f); | |
889 } | |
890 { | |
891 double v; | |
892 CHECK(RE2::FullMatch("100", "(.*)", &v)); CHECK_EQ(v, 100); | |
893 CHECK(RE2::FullMatch("-100.", "(.*)", &v)); CHECK_EQ(v, -100); | |
894 CHECK(RE2::FullMatch("1e23", "(.*)", &v)); CHECK_EQ(v, 1e23); | |
895 CHECK(RE2::FullMatch(zeros + "1e23", "(.*)", &v)); | |
896 CHECK_EQ(v, double(1e23)); | |
897 | |
898 CHECK(RE2::FullMatch("0.1", "(.*)", &v)); | |
899 CHECK_EQ(v, 0.1) << StringPrintf("%.17g != %.17g", v, 0.1); | |
900 CHECK(RE2::FullMatch("1.00000005960464485", "(.*)", &v)); | |
901 CHECK_EQ(v, 1.0000000596046448) | |
902 << StringPrintf("%.17g != %.17g", v, 1.0000000596046448); | |
903 } | |
904 } | |
905 | |
906 TEST(RE2, FullMatchAnchored) { | |
907 int i; | |
908 // Check that matching is fully anchored | |
909 CHECK(!RE2::FullMatch("x1001", "(\\d+)", &i)); | |
910 CHECK(!RE2::FullMatch("1001x", "(\\d+)", &i)); | |
911 CHECK(RE2::FullMatch("x1001", "x(\\d+)", &i)); CHECK_EQ(i, 1001); | |
912 CHECK(RE2::FullMatch("1001x", "(\\d+)x", &i)); CHECK_EQ(i, 1001); | |
913 } | |
914 | |
915 TEST(RE2, FullMatchBraces) { | |
916 // Braces | |
917 CHECK(RE2::FullMatch("0abcd", "[0-9a-f+.-]{5,}")); | |
918 CHECK(RE2::FullMatch("0abcde", "[0-9a-f+.-]{5,}")); | |
919 CHECK(!RE2::FullMatch("0abc", "[0-9a-f+.-]{5,}")); | |
920 } | |
921 | |
922 TEST(RE2, Complicated) { | |
923 // Complicated RE2 | |
924 CHECK(RE2::FullMatch("foo", "foo|bar|[A-Z]")); | |
925 CHECK(RE2::FullMatch("bar", "foo|bar|[A-Z]")); | |
926 CHECK(RE2::FullMatch("X", "foo|bar|[A-Z]")); | |
927 CHECK(!RE2::FullMatch("XY", "foo|bar|[A-Z]")); | |
928 } | |
929 | |
930 TEST(RE2, FullMatchEnd) { | |
931 // Check full-match handling (needs '$' tacked on internally) | |
932 CHECK(RE2::FullMatch("fo", "fo|foo")); | |
933 CHECK(RE2::FullMatch("foo", "fo|foo")); | |
934 CHECK(RE2::FullMatch("fo", "fo|foo$")); | |
935 CHECK(RE2::FullMatch("foo", "fo|foo$")); | |
936 CHECK(RE2::FullMatch("foo", "foo$")); | |
937 CHECK(!RE2::FullMatch("foo$bar", "foo\\$")); | |
938 CHECK(!RE2::FullMatch("fox", "fo|bar")); | |
939 | |
940 // Uncomment the following if we change the handling of '$' to | |
941 // prevent it from matching a trailing newline | |
942 if (false) { | |
943 // Check that we don't get bitten by pcre's special handling of a | |
944 // '\n' at the end of the string matching '$' | |
945 CHECK(!RE2::PartialMatch("foo\n", "foo$")); | |
946 } | |
947 } | |
948 | |
949 TEST(RE2, FullMatchArgCount) { | |
950 // Number of args | |
951 int a[16]; | |
952 CHECK(RE2::FullMatch("", "")); | |
953 | |
954 memset(a, 0, sizeof(0)); | |
955 CHECK(RE2::FullMatch("1", | |
956 "(\\d){1}", | |
957 &a[0])); | |
958 CHECK_EQ(a[0], 1); | |
959 | |
960 memset(a, 0, sizeof(0)); | |
961 CHECK(RE2::FullMatch("12", | |
962 "(\\d)(\\d)", | |
963 &a[0], &a[1])); | |
964 CHECK_EQ(a[0], 1); | |
965 CHECK_EQ(a[1], 2); | |
966 | |
967 memset(a, 0, sizeof(0)); | |
968 CHECK(RE2::FullMatch("123", | |
969 "(\\d)(\\d)(\\d)", | |
970 &a[0], &a[1], &a[2])); | |
971 CHECK_EQ(a[0], 1); | |
972 CHECK_EQ(a[1], 2); | |
973 CHECK_EQ(a[2], 3); | |
974 | |
975 memset(a, 0, sizeof(0)); | |
976 CHECK(RE2::FullMatch("1234", | |
977 "(\\d)(\\d)(\\d)(\\d)", | |
978 &a[0], &a[1], &a[2], &a[3])); | |
979 CHECK_EQ(a[0], 1); | |
980 CHECK_EQ(a[1], 2); | |
981 CHECK_EQ(a[2], 3); | |
982 CHECK_EQ(a[3], 4); | |
983 | |
984 memset(a, 0, sizeof(0)); | |
985 CHECK(RE2::FullMatch("12345", | |
986 "(\\d)(\\d)(\\d)(\\d)(\\d)", | |
987 &a[0], &a[1], &a[2], &a[3], | |
988 &a[4])); | |
989 CHECK_EQ(a[0], 1); | |
990 CHECK_EQ(a[1], 2); | |
991 CHECK_EQ(a[2], 3); | |
992 CHECK_EQ(a[3], 4); | |
993 CHECK_EQ(a[4], 5); | |
994 | |
995 memset(a, 0, sizeof(0)); | |
996 CHECK(RE2::FullMatch("123456", | |
997 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)", | |
998 &a[0], &a[1], &a[2], &a[3], | |
999 &a[4], &a[5])); | |
1000 CHECK_EQ(a[0], 1); | |
1001 CHECK_EQ(a[1], 2); | |
1002 CHECK_EQ(a[2], 3); | |
1003 CHECK_EQ(a[3], 4); | |
1004 CHECK_EQ(a[4], 5); | |
1005 CHECK_EQ(a[5], 6); | |
1006 | |
1007 memset(a, 0, sizeof(0)); | |
1008 CHECK(RE2::FullMatch("1234567", | |
1009 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)", | |
1010 &a[0], &a[1], &a[2], &a[3], | |
1011 &a[4], &a[5], &a[6])); | |
1012 CHECK_EQ(a[0], 1); | |
1013 CHECK_EQ(a[1], 2); | |
1014 CHECK_EQ(a[2], 3); | |
1015 CHECK_EQ(a[3], 4); | |
1016 CHECK_EQ(a[4], 5); | |
1017 CHECK_EQ(a[5], 6); | |
1018 CHECK_EQ(a[6], 7); | |
1019 | |
1020 memset(a, 0, sizeof(0)); | |
1021 CHECK(RE2::FullMatch("1234567890123456", | |
1022 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)" | |
1023 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)", | |
1024 &a[0], &a[1], &a[2], &a[3], | |
1025 &a[4], &a[5], &a[6], &a[7], | |
1026 &a[8], &a[9], &a[10], &a[11], | |
1027 &a[12], &a[13], &a[14], &a[15])); | |
1028 CHECK_EQ(a[0], 1); | |
1029 CHECK_EQ(a[1], 2); | |
1030 CHECK_EQ(a[2], 3); | |
1031 CHECK_EQ(a[3], 4); | |
1032 CHECK_EQ(a[4], 5); | |
1033 CHECK_EQ(a[5], 6); | |
1034 CHECK_EQ(a[6], 7); | |
1035 CHECK_EQ(a[7], 8); | |
1036 CHECK_EQ(a[8], 9); | |
1037 CHECK_EQ(a[9], 0); | |
1038 CHECK_EQ(a[10], 1); | |
1039 CHECK_EQ(a[11], 2); | |
1040 CHECK_EQ(a[12], 3); | |
1041 CHECK_EQ(a[13], 4); | |
1042 CHECK_EQ(a[14], 5); | |
1043 CHECK_EQ(a[15], 6); | |
1044 } | |
1045 | |
1046 TEST(RE2, Accessors) { | |
1047 // Check the pattern() accessor | |
1048 { | |
1049 const string kPattern = "http://([^/]+)/.*"; | |
1050 const RE2 re(kPattern); | |
1051 CHECK_EQ(kPattern, re.pattern()); | |
1052 } | |
1053 | |
1054 // Check RE2 error field. | |
1055 { | |
1056 RE2 re("foo"); | |
1057 CHECK(re.error().empty()); // Must have no error | |
1058 CHECK(re.ok()); | |
1059 CHECK(re.error_code() == RE2::NoError); | |
1060 } | |
1061 } | |
1062 | |
1063 TEST(RE2, UTF8) { | |
1064 // Check UTF-8 handling | |
1065 // Three Japanese characters (nihongo) | |
1066 const char utf8_string[] = { | |
1067 (char)0xe6, (char)0x97, (char)0xa5, // 65e5 | |
1068 (char)0xe6, (char)0x9c, (char)0xac, // 627c | |
1069 (char)0xe8, (char)0xaa, (char)0x9e, // 8a9e | |
1070 0 | |
1071 }; | |
1072 const char utf8_pattern[] = { | |
1073 '.', | |
1074 (char)0xe6, (char)0x9c, (char)0xac, // 627c | |
1075 '.', | |
1076 0 | |
1077 }; | |
1078 | |
1079 // Both should match in either mode, bytes or UTF-8 | |
1080 RE2 re_test1(".........", RE2::Latin1); | |
1081 CHECK(RE2::FullMatch(utf8_string, re_test1)); | |
1082 RE2 re_test2("..."); | |
1083 CHECK(RE2::FullMatch(utf8_string, re_test2)); | |
1084 | |
1085 // Check that '.' matches one byte or UTF-8 character | |
1086 // according to the mode. | |
1087 string s; | |
1088 RE2 re_test3("(.)", RE2::Latin1); | |
1089 CHECK(RE2::PartialMatch(utf8_string, re_test3, &s)); | |
1090 CHECK_EQ(s, string("\xe6")); | |
1091 RE2 re_test4("(.)"); | |
1092 CHECK(RE2::PartialMatch(utf8_string, re_test4, &s)); | |
1093 CHECK_EQ(s, string("\xe6\x97\xa5")); | |
1094 | |
1095 // Check that string matches itself in either mode | |
1096 RE2 re_test5(utf8_string, RE2::Latin1); | |
1097 CHECK(RE2::FullMatch(utf8_string, re_test5)); | |
1098 RE2 re_test6(utf8_string); | |
1099 CHECK(RE2::FullMatch(utf8_string, re_test6)); | |
1100 | |
1101 // Check that pattern matches string only in UTF8 mode | |
1102 RE2 re_test7(utf8_pattern, RE2::Latin1); | |
1103 CHECK(!RE2::FullMatch(utf8_string, re_test7)); | |
1104 RE2 re_test8(utf8_pattern); | |
1105 CHECK(RE2::FullMatch(utf8_string, re_test8)); | |
1106 } | |
1107 | |
1108 TEST(RE2, UngreedyUTF8) { | |
1109 // Check that ungreedy, UTF8 regular expressions don't match when they | |
1110 // oughtn't -- see bug 82246. | |
1111 { | |
1112 // This code always worked. | |
1113 const char* pattern = "\\w+X"; | |
1114 const string target = "a aX"; | |
1115 RE2 match_sentence(pattern, RE2::Latin1); | |
1116 RE2 match_sentence_re(pattern); | |
1117 | |
1118 CHECK(!RE2::FullMatch(target, match_sentence)); | |
1119 CHECK(!RE2::FullMatch(target, match_sentence_re)); | |
1120 } | |
1121 { | |
1122 const char* pattern = "(?U)\\w+X"; | |
1123 const string target = "a aX"; | |
1124 RE2 match_sentence(pattern, RE2::Latin1); | |
1125 CHECK_EQ(match_sentence.error(), ""); | |
1126 RE2 match_sentence_re(pattern); | |
1127 | |
1128 CHECK(!RE2::FullMatch(target, match_sentence)); | |
1129 CHECK(!RE2::FullMatch(target, match_sentence_re)); | |
1130 } | |
1131 } | |
1132 | |
1133 TEST(RE2, Rejects) { | |
1134 { RE2 re("a\\1", RE2::Quiet); CHECK(!re.ok()); } | |
1135 { | |
1136 RE2 re("a[x", RE2::Quiet); | |
1137 CHECK(!re.ok()); | |
1138 } | |
1139 { | |
1140 RE2 re("a[z-a]", RE2::Quiet); | |
1141 CHECK(!re.ok()); | |
1142 } | |
1143 { | |
1144 RE2 re("a[[:foobar:]]", RE2::Quiet); | |
1145 CHECK(!re.ok()); | |
1146 } | |
1147 { | |
1148 RE2 re("a(b", RE2::Quiet); | |
1149 CHECK(!re.ok()); | |
1150 } | |
1151 { | |
1152 RE2 re("a\\", RE2::Quiet); | |
1153 CHECK(!re.ok()); | |
1154 } | |
1155 } | |
1156 | |
1157 TEST(RE2, NoCrash) { | |
1158 // Test that using a bad regexp doesn't crash. | |
1159 { | |
1160 RE2 re("a\\", RE2::Quiet); | |
1161 CHECK(!re.ok()); | |
1162 CHECK(!RE2::PartialMatch("a\\b", re)); | |
1163 } | |
1164 | |
1165 // Test that using an enormous regexp doesn't crash | |
1166 { | |
1167 RE2 re("(((.{100}){100}){100}){100}", RE2::Quiet); | |
1168 CHECK(!re.ok()); | |
1169 CHECK(!RE2::PartialMatch("aaa", re)); | |
1170 } | |
1171 | |
1172 // Test that a crazy regexp still compiles and runs. | |
1173 { | |
1174 RE2 re(".{512}x", RE2::Quiet); | |
1175 CHECK(re.ok()); | |
1176 string s; | |
1177 s.append(515, 'c'); | |
1178 s.append("x"); | |
1179 CHECK(RE2::PartialMatch(s, re)); | |
1180 } | |
1181 } | |
1182 | |
1183 TEST(RE2, Recursion) { | |
1184 // Test that recursion is stopped. | |
1185 // This test is PCRE-legacy -- there's no recursion in RE2. | |
1186 int bytes = 15 * 1024; // enough to crash PCRE | |
1187 TestRecursion(bytes, "."); | |
1188 TestRecursion(bytes, "a"); | |
1189 TestRecursion(bytes, "a."); | |
1190 TestRecursion(bytes, "ab."); | |
1191 TestRecursion(bytes, "abc."); | |
1192 } | |
1193 | |
1194 TEST(RE2, BigCountedRepetition) { | |
1195 // Test that counted repetition works, given tons of memory. | |
1196 RE2::Options opt; | |
1197 opt.set_max_mem(256<<20); | |
1198 | |
1199 RE2 re(".{512}x", opt); | |
1200 CHECK(re.ok()); | |
1201 string s; | |
1202 s.append(515, 'c'); | |
1203 s.append("x"); | |
1204 CHECK(RE2::PartialMatch(s, re)); | |
1205 } | |
1206 | |
1207 TEST(RE2, DeepRecursion) { | |
1208 // Test for deep stack recursion. This would fail with a | |
1209 // segmentation violation due to stack overflow before pcre was | |
1210 // patched. | |
1211 // Again, a PCRE legacy test. RE2 doesn't recurse. | |
1212 string comment("x*"); | |
1213 string a(131072, 'a'); | |
1214 comment += a; | |
1215 comment += "*x"; | |
1216 RE2 re("((?:\\s|xx.*\n|x[*](?:\n|.)*?[*]x)*)"); | |
1217 CHECK(RE2::FullMatch(comment, re)); | |
1218 } | |
1219 | |
1220 // Suggested by Josh Hyman. Failed when SearchOnePass was | |
1221 // not implementing case-folding. | |
1222 TEST(CaseInsensitive, MatchAndConsume) { | |
1223 string result; | |
1224 string text = "A fish named *Wanda*"; | |
1225 StringPiece sp(text); | |
1226 | |
1227 EXPECT_TRUE(RE2::PartialMatch(sp, "(?i)([wand]{5})", &result)); | |
1228 EXPECT_TRUE(RE2::FindAndConsume(&sp, "(?i)([wand]{5})", &result)); | |
1229 } | |
1230 | |
1231 // RE2 should permit implicit conversions from string, StringPiece, const char*, | |
1232 // and C string literals. | |
1233 TEST(RE2, ImplicitConversions) { | |
1234 string re_string("."); | |
1235 StringPiece re_stringpiece("."); | |
1236 const char* re_cstring = "."; | |
1237 EXPECT_TRUE(RE2::PartialMatch("e", re_string)); | |
1238 EXPECT_TRUE(RE2::PartialMatch("e", re_stringpiece)); | |
1239 EXPECT_TRUE(RE2::PartialMatch("e", re_cstring)); | |
1240 EXPECT_TRUE(RE2::PartialMatch("e", ".")); | |
1241 } | |
1242 | |
1243 // Bugs introduced by 8622304 | |
1244 TEST(RE2, CL8622304) { | |
1245 // reported by ingow | |
1246 string dir; | |
1247 EXPECT_TRUE(RE2::FullMatch("D", "([^\\\\])")); // ok | |
1248 EXPECT_TRUE(RE2::FullMatch("D", "([^\\\\])", &dir)); // fails | |
1249 | |
1250 // reported by jacobsa | |
1251 string key, val; | |
1252 EXPECT_TRUE(RE2::PartialMatch("bar:1,0x2F,030,4,5;baz:true;fooby:false,true", | |
1253 "(\\w+)(?::((?:[^;\\\\]|\\\\.)*))?;?", | |
1254 &key, | |
1255 &val)); | |
1256 EXPECT_EQ(key, "bar"); | |
1257 EXPECT_EQ(val, "1,0x2F,030,4,5"); | |
1258 } | |
1259 | |
1260 | |
1261 // Check that RE2 returns correct regexp pieces on error. | |
1262 // In particular, make sure it returns whole runes | |
1263 // and that it always reports invalid UTF-8. | |
1264 // Also check that Perl error flag piece is big enough. | |
1265 static struct ErrorTest { | |
1266 const char *regexp; | |
1267 const char *error; | |
1268 } error_tests[] = { | |
1269 { "ab\\αcd", "\\α" }, | |
1270 { "ef\\x☺01", "\\x☺0" }, | |
1271 { "gh\\x1☺01", "\\x1☺" }, | |
1272 { "ij\\x1", "\\x1" }, | |
1273 { "kl\\x", "\\x" }, | |
1274 { "uv\\x{0000☺}", "\\x{0000☺" }, | |
1275 { "wx\\p{ABC", "\\p{ABC" }, | |
1276 { "yz(?smiUX:abc)", "(?smiUX" }, // used to return (?s but the error is X | |
1277 { "aa(?sm☺i", "(?sm☺" }, | |
1278 { "bb[abc", "[abc" }, | |
1279 | |
1280 { "mn\\x1\377", "" }, // no argument string returned for invalid UTF-8 | |
1281 { "op\377qr", "" }, | |
1282 { "st\\x{00000\377", "" }, | |
1283 { "zz\\p{\377}", "" }, | |
1284 { "zz\\x{00\377}", "" }, | |
1285 { "zz(?P<name\377>abc)", "" }, | |
1286 }; | |
1287 TEST(RE2, ErrorArgs) { | |
1288 for (int i = 0; i < arraysize(error_tests); i++) { | |
1289 RE2 re(error_tests[i].regexp, RE2::Quiet); | |
1290 EXPECT_FALSE(re.ok()); | |
1291 EXPECT_EQ(re.error_arg(), error_tests[i].error) << re.error(); | |
1292 } | |
1293 } | |
1294 | |
1295 // Check that "never match \n" mode never matches \n. | |
1296 static struct NeverTest { | |
1297 const char* regexp; | |
1298 const char* text; | |
1299 const char* match; | |
1300 } never_tests[] = { | |
1301 { "(.*)", "abc\ndef\nghi\n", "abc" }, | |
1302 { "(?s)(abc.*def)", "abc\ndef\n", NULL }, | |
1303 { "(abc(.|\n)*def)", "abc\ndef\n", NULL }, | |
1304 { "(abc[^x]*def)", "abc\ndef\n", NULL }, | |
1305 { "(abc[^x]*def)", "abczzzdef\ndef\n", "abczzzdef" }, | |
1306 }; | |
1307 TEST(RE2, NeverNewline) { | |
1308 RE2::Options opt; | |
1309 opt.set_never_nl(true); | |
1310 for (int i = 0; i < arraysize(never_tests); i++) { | |
1311 const NeverTest& t = never_tests[i]; | |
1312 RE2 re(t.regexp, opt); | |
1313 if (t.match == NULL) { | |
1314 EXPECT_FALSE(re.PartialMatch(t.text, re)); | |
1315 } else { | |
1316 StringPiece m; | |
1317 EXPECT_TRUE(re.PartialMatch(t.text, re, &m)); | |
1318 EXPECT_EQ(m, t.match); | |
1319 } | |
1320 } | |
1321 } | |
1322 | |
1323 // Check that dot_nl option works. | |
1324 TEST(RE2, DotNL) { | |
1325 RE2::Options opt; | |
1326 opt.set_dot_nl(true); | |
1327 EXPECT_TRUE(RE2::PartialMatch("\n", RE2(".", opt))); | |
1328 EXPECT_FALSE(RE2::PartialMatch("\n", RE2("(?-s).", opt))); | |
1329 opt.set_never_nl(true); | |
1330 EXPECT_FALSE(RE2::PartialMatch("\n", RE2(".", opt))); | |
1331 } | |
1332 | |
1333 // Check that there are no capturing groups in "never capture" mode. | |
1334 TEST(RE2, NeverCapture) { | |
1335 RE2::Options opt; | |
1336 opt.set_never_capture(true); | |
1337 RE2 re("(r)(e)", opt); | |
1338 EXPECT_EQ(0, re.NumberOfCapturingGroups()); | |
1339 } | |
1340 | |
1341 // Bitstate bug was looking at submatch[0] even if nsubmatch == 0. | |
1342 // Triggered by a failed DFA search falling back to Bitstate when | |
1343 // using Match with a NULL submatch set. Bitstate tried to read | |
1344 // the submatch[0] entry even if nsubmatch was 0. | |
1345 TEST(RE2, BitstateCaptureBug) { | |
1346 RE2::Options opt; | |
1347 opt.set_max_mem(20000); | |
1348 RE2 re("(_________$)", opt); | |
1349 StringPiece s = "xxxxxxxxxxxxxxxxxxxxxxxxxx_________x"; | |
1350 EXPECT_FALSE(re.Match(s, 0, s.size(), RE2::UNANCHORED, NULL, 0)); | |
1351 } | |
1352 | |
1353 // C++ version of bug 609710. | |
1354 TEST(RE2, UnicodeClasses) { | |
1355 const string str = "ABCDEFGHI譚永鋒"; | |
1356 string a, b, c; | |
1357 | |
1358 EXPECT_TRUE(RE2::FullMatch("A", "\\p{L}")); | |
1359 EXPECT_TRUE(RE2::FullMatch("A", "\\p{Lu}")); | |
1360 EXPECT_FALSE(RE2::FullMatch("A", "\\p{Ll}")); | |
1361 EXPECT_FALSE(RE2::FullMatch("A", "\\P{L}")); | |
1362 EXPECT_FALSE(RE2::FullMatch("A", "\\P{Lu}")); | |
1363 EXPECT_TRUE(RE2::FullMatch("A", "\\P{Ll}")); | |
1364 | |
1365 EXPECT_TRUE(RE2::FullMatch("譚", "\\p{L}")); | |
1366 EXPECT_FALSE(RE2::FullMatch("譚", "\\p{Lu}")); | |
1367 EXPECT_FALSE(RE2::FullMatch("譚", "\\p{Ll}")); | |
1368 EXPECT_FALSE(RE2::FullMatch("譚", "\\P{L}")); | |
1369 EXPECT_TRUE(RE2::FullMatch("譚", "\\P{Lu}")); | |
1370 EXPECT_TRUE(RE2::FullMatch("譚", "\\P{Ll}")); | |
1371 | |
1372 EXPECT_TRUE(RE2::FullMatch("永", "\\p{L}")); | |
1373 EXPECT_FALSE(RE2::FullMatch("永", "\\p{Lu}")); | |
1374 EXPECT_FALSE(RE2::FullMatch("永", "\\p{Ll}")); | |
1375 EXPECT_FALSE(RE2::FullMatch("永", "\\P{L}")); | |
1376 EXPECT_TRUE(RE2::FullMatch("永", "\\P{Lu}")); | |
1377 EXPECT_TRUE(RE2::FullMatch("永", "\\P{Ll}")); | |
1378 | |
1379 EXPECT_TRUE(RE2::FullMatch("鋒", "\\p{L}")); | |
1380 EXPECT_FALSE(RE2::FullMatch("鋒", "\\p{Lu}")); | |
1381 EXPECT_FALSE(RE2::FullMatch("鋒", "\\p{Ll}")); | |
1382 EXPECT_FALSE(RE2::FullMatch("鋒", "\\P{L}")); | |
1383 EXPECT_TRUE(RE2::FullMatch("鋒", "\\P{Lu}")); | |
1384 EXPECT_TRUE(RE2::FullMatch("鋒", "\\P{Ll}")); | |
1385 | |
1386 EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?(.).*?(.)", &a, &b, &c)); | |
1387 EXPECT_EQ("A", a); | |
1388 EXPECT_EQ("B", b); | |
1389 EXPECT_EQ("C", c); | |
1390 | |
1391 EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?([\\p{L}]).*?(.)", &a, &b, &c)); | |
1392 EXPECT_EQ("A", a); | |
1393 EXPECT_EQ("B", b); | |
1394 EXPECT_EQ("C", c); | |
1395 | |
1396 EXPECT_FALSE(RE2::PartialMatch(str, "\\P{L}")); | |
1397 | |
1398 EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?([\\p{Lu}]).*?(.)", &a, &b, &c)); | |
1399 EXPECT_EQ("A", a); | |
1400 EXPECT_EQ("B", b); | |
1401 EXPECT_EQ("C", c); | |
1402 | |
1403 EXPECT_FALSE(RE2::PartialMatch(str, "[^\\p{Lu}\\p{Lo}]")); | |
1404 | |
1405 EXPECT_TRUE(RE2::PartialMatch(str, ".*(.).*?([\\p{Lu}\\p{Lo}]).*?(.)", &a, &b,
&c)); | |
1406 EXPECT_EQ("譚", a); | |
1407 EXPECT_EQ("永", b); | |
1408 EXPECT_EQ("鋒", c); | |
1409 } | |
1410 | |
1411 // Bug reported by saito. 2009/02/17 | |
1412 TEST(RE2, NullVsEmptyString) { | |
1413 RE2 re2(".*"); | |
1414 StringPiece v1(""); | |
1415 EXPECT_TRUE(RE2::FullMatch(v1, re2)); | |
1416 | |
1417 StringPiece v2; | |
1418 EXPECT_TRUE(RE2::FullMatch(v2, re2)); | |
1419 } | |
1420 | |
1421 // Issue 1816809 | |
1422 TEST(RE2, Bug1816809) { | |
1423 RE2 re("(((((llx((-3)|(4)))(;(llx((-3)|(4))))*))))"); | |
1424 StringPiece piece("llx-3;llx4"); | |
1425 string x; | |
1426 EXPECT_TRUE(RE2::Consume(&piece, re, &x)); | |
1427 } | |
1428 | |
1429 // Issue 3061120 | |
1430 TEST(RE2, Bug3061120) { | |
1431 RE2 re("(?i)\\W"); | |
1432 EXPECT_FALSE(RE2::PartialMatch("x", re)); // always worked | |
1433 EXPECT_FALSE(RE2::PartialMatch("k", re)); // broke because of kelvin | |
1434 EXPECT_FALSE(RE2::PartialMatch("s", re)); // broke because of latin long s | |
1435 } | |
1436 | |
1437 TEST(RE2, CapturingGroupNames) { | |
1438 // Opening parentheses annotated with group IDs: | |
1439 // 12 3 45 6 7 | |
1440 RE2 re("((abc)(?P<G2>)|((e+)(?P<G2>.*)(?P<G1>u+)))"); | |
1441 EXPECT_TRUE(re.ok()); | |
1442 const map<int, string>& have = re.CapturingGroupNames(); | |
1443 map<int, string> want; | |
1444 want[3] = "G2"; | |
1445 want[6] = "G2"; | |
1446 want[7] = "G1"; | |
1447 EXPECT_EQ(want, have); | |
1448 } | |
1449 | |
1450 TEST(RE2, RegexpToStringLossOfAnchor) { | |
1451 EXPECT_EQ(RE2("^[a-c]at", RE2::POSIX).Regexp()->ToString(), "^[a-c]at"); | |
1452 EXPECT_EQ(RE2("^[a-c]at").Regexp()->ToString(), "(?-m:^)[a-c]at"); | |
1453 EXPECT_EQ(RE2("ca[t-z]$", RE2::POSIX).Regexp()->ToString(), "ca[t-z]$"); | |
1454 EXPECT_EQ(RE2("ca[t-z]$").Regexp()->ToString(), "ca[t-z](?-m:$)"); | |
1455 } | |
1456 | |
1457 // Issue 10131674 | |
1458 TEST(RE2, Bug10131674) { | |
1459 // Some of these escapes describe values that do not fit in a byte. | |
1460 RE2 re("\\140\\440\\174\\271\\150\\656\\106\\201\\004\\332", RE2::Latin1); | |
1461 EXPECT_FALSE(re.ok()); | |
1462 EXPECT_FALSE(RE2::FullMatch("hello world", re)); | |
1463 } | |
1464 | |
1465 TEST(RE2, Bug18391750) { | |
1466 // Stray write past end of match_ in nfa.cc, caught by fuzzing + address sanit
izer. | |
1467 const char t[] = { | |
1468 (char)0x28, (char)0x28, (char)0xfc, (char)0xfc, (char)0x08, (char)0x08, | |
1469 (char)0x26, (char)0x26, (char)0x28, (char)0xc2, (char)0x9b, (char)0xc5, | |
1470 (char)0xc5, (char)0xd4, (char)0x8f, (char)0x8f, (char)0x69, (char)0x69, | |
1471 (char)0xe7, (char)0x29, (char)0x7b, (char)0x37, (char)0x31, (char)0x31, | |
1472 (char)0x7d, (char)0xae, (char)0x7c, (char)0x7c, (char)0xf3, (char)0x29, | |
1473 (char)0xae, (char)0xae, (char)0x2e, (char)0x2a, (char)0x29, (char)0x00, | |
1474 }; | |
1475 RE2::Options opt; | |
1476 opt.set_encoding(RE2::Options::EncodingLatin1); | |
1477 opt.set_longest_match(true); | |
1478 opt.set_dot_nl(true); | |
1479 opt.set_case_sensitive(false); | |
1480 RE2 re(t, opt); | |
1481 CHECK(re.ok()); | |
1482 RE2::PartialMatch(t, re); | |
1483 } | |
1484 | |
1485 TEST(RE2, Bug18458852) { | |
1486 // Bug in parser accepting invalid (too large) rune, | |
1487 // causing compiler to fail in DCHECK in UTF-8 | |
1488 // character class code. | |
1489 const char b[] = { | |
1490 (char)0x28, (char)0x05, (char)0x05, (char)0x41, (char)0x41, (char)0x28, | |
1491 (char)0x24, (char)0x5b, (char)0x5e, (char)0xf5, (char)0x87, (char)0x87, | |
1492 (char)0x90, (char)0x29, (char)0x5d, (char)0x29, (char)0x29, (char)0x00, | |
1493 }; | |
1494 RE2 re(b); | |
1495 CHECK(!re.ok()); | |
1496 } | |
1497 | |
1498 TEST(RE2, Bug18523943) { | |
1499 // Bug in bitstate: case kFailInst was merged into the default with LOG(DFATAL
). | |
1500 | |
1501 RE2::Options opt; | |
1502 const char a[] = { | |
1503 (char)0x29, (char)0x29, (char)0x24, (char)0x00, | |
1504 }; | |
1505 const char b[] = { | |
1506 (char)0x28, (char)0x0a, (char)0x2a, (char)0x2a, (char)0x29, (char)0x00, | |
1507 }; | |
1508 opt.set_log_errors(false); | |
1509 opt.set_encoding(RE2::Options::EncodingLatin1); | |
1510 opt.set_posix_syntax(true); | |
1511 opt.set_longest_match(true); | |
1512 opt.set_literal(false); | |
1513 opt.set_never_nl(true); | |
1514 | |
1515 RE2 re((const char*)b, opt); | |
1516 CHECK(re.ok()); | |
1517 string s1; | |
1518 CHECK(!RE2::PartialMatch((const char*)a, re, &s1)); | |
1519 } | |
1520 | |
1521 TEST(RE2, Bug21371806) { | |
1522 // Bug in parser accepting Unicode groups in Latin-1 mode, | |
1523 // causing compiler to fail in DCHECK in prog.cc. | |
1524 | |
1525 RE2::Options opt; | |
1526 opt.set_encoding(RE2::Options::EncodingLatin1); | |
1527 | |
1528 RE2 re("g\\p{Zl}]", opt); | |
1529 CHECK(re.ok()); | |
1530 } | |
1531 | |
1532 } // namespace re2 | |
OLD | NEW |