Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(96)

Side by Side Diff: third_party/re2/re2/testing/re2_test.cc

Issue 10575037: Include RE2 library (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Removed valgrind specific code that is in Chromium already Created 8 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // -*- coding: utf-8 -*-
2 // Copyright 2002-2009 The RE2 Authors. All Rights Reserved.
3 // Use of this source code is governed by a BSD-style
4 // license that can be found in the LICENSE file.
5
6 // TODO: Test extractions for PartialMatch/Consume
7
8 #include <sys/types.h>
9 #ifndef WIN32
10 #include <sys/mman.h>
11 #endif
12 #include <sys/stat.h>
13 #include <errno.h>
14 #include <vector>
15 #include "util/test.h"
16 #include "re2/re2.h"
17 #include "re2/regexp.h"
18
19 #ifdef WIN32
20 #include <stdio.h>
21 #define snprintf _snprintf
22 #endif
23
24 DECLARE_bool(logtostderr);
25
26 namespace re2 {
27
28 TEST(RE2, HexTests) {
29
30 VLOG(1) << "hex tests";
31
32 #define CHECK_HEX(type, value) \
33 do { \
34 type v; \
35 CHECK(RE2::FullMatch(#value, "([0-9a-fA-F]+)[uUlL]*", RE2::Hex(&v))); \
36 CHECK_EQ(v, 0x ## value); \
37 CHECK(RE2::FullMatch("0x" #value, "([0-9a-fA-FxX]+)[uUlL]*", RE2::CRadix(&v) )); \
38 CHECK_EQ(v, 0x ## value); \
39 } while(0)
40
41 CHECK_HEX(short, 2bad);
42 CHECK_HEX(unsigned short, 2badU);
43 CHECK_HEX(int, dead);
44 CHECK_HEX(unsigned int, deadU);
45 CHECK_HEX(long, 7eadbeefL);
46 CHECK_HEX(unsigned long, deadbeefUL);
47 CHECK_HEX(long long, 12345678deadbeefLL);
48 CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
49
50 #undef CHECK_HEX
51 }
52
53 TEST(RE2, OctalTests) {
54 VLOG(1) << "octal tests";
55
56 #define CHECK_OCTAL(type, value) \
57 do { \
58 type v; \
59 CHECK(RE2::FullMatch(#value, "([0-7]+)[uUlL]*", RE2::Octal(&v))); \
60 CHECK_EQ(v, 0 ## value); \
61 CHECK(RE2::FullMatch("0" #value, "([0-9a-fA-FxX]+)[uUlL]*", RE2::CRadix(&v)) ); \
62 CHECK_EQ(v, 0 ## value); \
63 } while(0)
64
65 CHECK_OCTAL(short, 77777);
66 CHECK_OCTAL(unsigned short, 177777U);
67 CHECK_OCTAL(int, 17777777777);
68 CHECK_OCTAL(unsigned int, 37777777777U);
69 CHECK_OCTAL(long, 17777777777L);
70 CHECK_OCTAL(unsigned long, 37777777777UL);
71 CHECK_OCTAL(long long, 777777777777777777777LL);
72 CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
73
74 #undef CHECK_OCTAL
75 }
76
77 TEST(RE2, DecimalTests) {
78 VLOG(1) << "decimal tests";
79
80 #define CHECK_DECIMAL(type, value) \
81 do { \
82 type v; \
83 CHECK(RE2::FullMatch(#value, "(-?[0-9]+)[uUlL]*", &v)); \
84 CHECK_EQ(v, value); \
85 CHECK(RE2::FullMatch(#value, "(-?[0-9a-fA-FxX]+)[uUlL]*", RE2::CRadix(&v))); \
86 CHECK_EQ(v, value); \
87 } while(0)
88
89 CHECK_DECIMAL(short, -1);
90 CHECK_DECIMAL(unsigned short, 9999);
91 CHECK_DECIMAL(int, -1000);
92 CHECK_DECIMAL(unsigned int, 12345U);
93 CHECK_DECIMAL(long, -10000000L);
94 CHECK_DECIMAL(unsigned long, 3083324652U);
95 CHECK_DECIMAL(long long, -100000000000000LL);
96 CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
97
98 #undef CHECK_DECIMAL
99 }
100
101 TEST(RE2, Replace) {
102 VLOG(1) << "TestReplace";
103
104 struct ReplaceTest {
105 const char *regexp;
106 const char *rewrite;
107 const char *original;
108 const char *single;
109 const char *global;
110 int greplace_count;
111 };
112 static const ReplaceTest tests[] = {
113 { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
114 "\\2\\1ay",
115 "the quick brown fox jumps over the lazy dogs.",
116 "ethay quick brown fox jumps over the lazy dogs.",
117 "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
118 9 },
119 { "\\w+",
120 "\\0-NOSPAM",
121 "abcd.efghi@google.com",
122 "abcd-NOSPAM.efghi@google.com",
123 "abcd-NOSPAM.efghi-NOSPAM@google-NOSPAM.com-NOSPAM",
124 4 },
125 { "^",
126 "(START)",
127 "foo",
128 "(START)foo",
129 "(START)foo",
130 1 },
131 { "^",
132 "(START)",
133 "",
134 "(START)",
135 "(START)",
136 1 },
137 { "$",
138 "(END)",
139 "",
140 "(END)",
141 "(END)",
142 1 },
143 { "b",
144 "bb",
145 "ababababab",
146 "abbabababab",
147 "abbabbabbabbabb",
148 5 },
149 { "b",
150 "bb",
151 "bbbbbb",
152 "bbbbbbb",
153 "bbbbbbbbbbbb",
154 6 },
155 { "b+",
156 "bb",
157 "bbbbbb",
158 "bb",
159 "bb",
160 1 },
161 { "b*",
162 "bb",
163 "bbbbbb",
164 "bb",
165 "bb",
166 1 },
167 { "b*",
168 "bb",
169 "aaaaa",
170 "bbaaaaa",
171 "bbabbabbabbabbabb",
172 6 },
173 // Check newline handling
174 { "a.*a",
175 "(\\0)",
176 "aba\naba",
177 "(aba)\naba",
178 "(aba)\n(aba)",
179 2 },
180 { "", NULL, NULL, NULL, NULL, 0 }
181 };
182
183 for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
184 VLOG(1) << StringPrintf("\"%s\" =~ s/%s/%s/g", t->original, t->regexp, t->re write);
185 string one(t->original);
186 CHECK(RE2::Replace(&one, t->regexp, t->rewrite));
187 CHECK_EQ(one, t->single);
188 string all(t->original);
189 CHECK_EQ(RE2::GlobalReplace(&all, t->regexp, t->rewrite), t->greplace_count)
190 << "Got: " << all;
191 CHECK_EQ(all, t->global);
192 }
193 }
194
195 static void TestCheckRewriteString(const char* regexp, const char* rewrite,
196 bool expect_ok) {
197 string error;
198 RE2 exp(regexp);
199 bool actual_ok = exp.CheckRewriteString(rewrite, &error);
200 EXPECT_EQ(expect_ok, actual_ok) << " for " << rewrite << " error: " << error;
201 }
202
203 TEST(CheckRewriteString, all) {
204 TestCheckRewriteString("abc", "foo", true);
205 TestCheckRewriteString("abc", "foo\\", false);
206 TestCheckRewriteString("abc", "foo\\0bar", true);
207
208 TestCheckRewriteString("a(b)c", "foo", true);
209 TestCheckRewriteString("a(b)c", "foo\\0bar", true);
210 TestCheckRewriteString("a(b)c", "foo\\1bar", true);
211 TestCheckRewriteString("a(b)c", "foo\\2bar", false);
212 TestCheckRewriteString("a(b)c", "f\\\\2o\\1o", true);
213
214 TestCheckRewriteString("a(b)(c)", "foo\\12", true);
215 TestCheckRewriteString("a(b)(c)", "f\\2o\\1o", true);
216 TestCheckRewriteString("a(b)(c)", "f\\oo\\1", false);
217 }
218
219 TEST(RE2, Extract) {
220 VLOG(1) << "TestExtract";
221
222 string s;
223
224 CHECK(RE2::Extract("boris@kremvax.ru", "(.*)@([^.]*)", "\\2!\\1", &s));
225 CHECK_EQ(s, "kremvax!boris");
226
227 CHECK(RE2::Extract("foo", ".*", "'\\0'", &s));
228 CHECK_EQ(s, "'foo'");
229 // check that false match doesn't overwrite
230 CHECK(!RE2::Extract("baz", "bar", "'\\0'", &s));
231 CHECK_EQ(s, "'foo'");
232 }
233
234 TEST(RE2, Consume) {
235 VLOG(1) << "TestConsume";
236
237 RE2 r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
238 string word;
239
240 string s(" aaa b!@#$@#$cccc");
241 StringPiece input(s);
242
243 CHECK(RE2::Consume(&input, r, &word));
244 CHECK_EQ(word, "aaa") << " input: " << input;
245 CHECK(RE2::Consume(&input, r, &word));
246 CHECK_EQ(word, "b") << " input: " << input;
247 CHECK(! RE2::Consume(&input, r, &word)) << " input: " << input;
248 }
249
250 TEST(RE2, ConsumeN) {
251 const string s(" one two three 4");
252 StringPiece input(s);
253
254 RE2::Arg argv[2];
255 const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
256
257 // 0 arg
258 EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)", args, 0)); // Skips "one".
259
260 // 1 arg
261 string word;
262 argv[0] = &word;
263 EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)", args, 1));
264 EXPECT_EQ("two", word);
265
266 // Multi-args
267 int n;
268 argv[1] = &n;
269 EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)\\s*(\\d+)", args, 2));
270 EXPECT_EQ("three", word);
271 EXPECT_EQ(4, n);
272 }
273
274 TEST(RE2, FindAndConsume) {
275 VLOG(1) << "TestFindAndConsume";
276
277 RE2 r("(\\w+)"); // matches a word
278 string word;
279
280 string s(" aaa b!@#$@#$cccc");
281 StringPiece input(s);
282
283 CHECK(RE2::FindAndConsume(&input, r, &word));
284 CHECK_EQ(word, "aaa");
285 CHECK(RE2::FindAndConsume(&input, r, &word));
286 CHECK_EQ(word, "b");
287 CHECK(RE2::FindAndConsume(&input, r, &word));
288 CHECK_EQ(word, "cccc");
289 CHECK(! RE2::FindAndConsume(&input, r, &word));
290
291 // Check that FindAndConsume works without any submatches.
292 // Earlier version used uninitialized data for
293 // length to consume.
294 input = "aaa";
295 CHECK(RE2::FindAndConsume(&input, "aaa"));
296 CHECK_EQ(input, "");
297 }
298
299 TEST(RE2, FindAndConsumeN) {
300 const string s(" one two three 4");
301 StringPiece input(s);
302
303 RE2::Arg argv[2];
304 const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
305
306 // 0 arg
307 EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)", args, 0)); // Skips "one".
308
309 // 1 arg
310 string word;
311 argv[0] = &word;
312 EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)", args, 1));
313 EXPECT_EQ("two", word);
314
315 // Multi-args
316 int n;
317 argv[1] = &n;
318 EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)\\s*(\\d+)", args, 2));
319 EXPECT_EQ("three", word);
320 EXPECT_EQ(4, n);
321 }
322
323 TEST(RE2, MatchNumberPeculiarity) {
324 VLOG(1) << "TestMatchNumberPeculiarity";
325
326 RE2 r("(foo)|(bar)|(baz)");
327 string word1;
328 string word2;
329 string word3;
330
331 CHECK(RE2::PartialMatch("foo", r, &word1, &word2, &word3));
332 CHECK_EQ(word1, "foo");
333 CHECK_EQ(word2, "");
334 CHECK_EQ(word3, "");
335 CHECK(RE2::PartialMatch("bar", r, &word1, &word2, &word3));
336 CHECK_EQ(word1, "");
337 CHECK_EQ(word2, "bar");
338 CHECK_EQ(word3, "");
339 CHECK(RE2::PartialMatch("baz", r, &word1, &word2, &word3));
340 CHECK_EQ(word1, "");
341 CHECK_EQ(word2, "");
342 CHECK_EQ(word3, "baz");
343 CHECK(!RE2::PartialMatch("f", r, &word1, &word2, &word3));
344
345 string a;
346 CHECK(RE2::FullMatch("hello", "(foo)|hello", &a));
347 CHECK_EQ(a, "");
348 }
349
350 TEST(RE2, Match) {
351 RE2 re("((\\w+):([0-9]+))"); // extracts host and port
352 StringPiece group[4];
353
354 // No match.
355 StringPiece s = "zyzzyva";
356 CHECK(!re.Match(s, 0, s.size(), RE2::UNANCHORED,
357 group, arraysize(group)));
358
359 // Matches and extracts.
360 s = "a chrisr:9000 here";
361 CHECK(re.Match(s, 0, s.size(), RE2::UNANCHORED,
362 group, arraysize(group)));
363 CHECK_EQ(group[0], "chrisr:9000");
364 CHECK_EQ(group[1], "chrisr:9000");
365 CHECK_EQ(group[2], "chrisr");
366 CHECK_EQ(group[3], "9000");
367
368 string all, host;
369 int port;
370 CHECK(RE2::PartialMatch("a chrisr:9000 here", re, &all, &host, &port));
371 CHECK_EQ(all, "chrisr:9000");
372 CHECK_EQ(host, "chrisr");
373 CHECK_EQ(port, 9000);
374 }
375
376 static void TestRecursion(int size, const char *pattern) {
377 // Fill up a string repeating the pattern given
378 string domain;
379 domain.resize(size);
380 int patlen = strlen(pattern);
381 for (int i = 0; i < size; ++i) {
382 domain[i] = pattern[i % patlen];
383 }
384 // Just make sure it doesn't crash due to too much recursion.
385 RE2 re("([a-zA-Z0-9]|-)+(\\.([a-zA-Z0-9]|-)+)*(\\.)?", RE2::Quiet);
386 RE2::FullMatch(domain, re);
387 }
388
389 // A meta-quoted string, interpreted as a pattern, should always match
390 // the original unquoted string.
391 static void TestQuoteMeta(string unquoted,
392 const RE2::Options& options = RE2::DefaultOptions) {
393 string quoted = RE2::QuoteMeta(unquoted);
394 RE2 re(quoted, options);
395 EXPECT_TRUE_M(RE2::FullMatch(unquoted, re),
396 "Unquoted='" + unquoted + "', quoted='" + quoted + "'.");
397 }
398
399 // A meta-quoted string, interpreted as a pattern, should always match
400 // the original unquoted string.
401 static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
402 const RE2::Options& options = RE2::DefaultOpti ons) {
403 string quoted = RE2::QuoteMeta(unquoted);
404 RE2 re(quoted, options);
405 EXPECT_FALSE_M(RE2::FullMatch(should_not_match, re),
406 "Unquoted='" + unquoted + "', quoted='" + quoted + "'.");
407 }
408
409 // Tests that quoted meta characters match their original strings,
410 // and that a few things that shouldn't match indeed do not.
411 TEST(QuoteMeta, Simple) {
412 TestQuoteMeta("foo");
413 TestQuoteMeta("foo.bar");
414 TestQuoteMeta("foo\\.bar");
415 TestQuoteMeta("[1-9]");
416 TestQuoteMeta("1.5-2.0?");
417 TestQuoteMeta("\\d");
418 TestQuoteMeta("Who doesn't like ice cream?");
419 TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
420 TestQuoteMeta("((?!)xxx).*yyy");
421 TestQuoteMeta("([");
422 }
423 TEST(QuoteMeta, SimpleNegative) {
424 NegativeTestQuoteMeta("foo", "bar");
425 NegativeTestQuoteMeta("...", "bar");
426 NegativeTestQuoteMeta("\\.", ".");
427 NegativeTestQuoteMeta("\\.", "..");
428 NegativeTestQuoteMeta("(a)", "a");
429 NegativeTestQuoteMeta("(a|b)", "a");
430 NegativeTestQuoteMeta("(a|b)", "(a)");
431 NegativeTestQuoteMeta("(a|b)", "a|b");
432 NegativeTestQuoteMeta("[0-9]", "0");
433 NegativeTestQuoteMeta("[0-9]", "0-9");
434 NegativeTestQuoteMeta("[0-9]", "[9]");
435 NegativeTestQuoteMeta("((?!)xxx)", "xxx");
436 }
437
438 TEST(QuoteMeta, Latin1) {
439 TestQuoteMeta("3\xb2 = 9", RE2::Latin1);
440 }
441
442 TEST(QuoteMeta, UTF8) {
443 TestQuoteMeta("Plácido Domingo");
444 TestQuoteMeta("xyz"); // No fancy utf8.
445 TestQuoteMeta("\xc2\xb0"); // 2-byte utf8 -- a degree symbol.
446 TestQuoteMeta("27\xc2\xb0 degrees"); // As a middle character.
447 TestQuoteMeta("\xe2\x80\xb3"); // 3-byte utf8 -- a double prime.
448 TestQuoteMeta("\xf0\x9d\x85\x9f"); // 4-byte utf8 -- a music note.
449 TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, this should
450 // still work.
451 NegativeTestQuoteMeta("27\xc2\xb0",
452 "27\\\xc2\\\xb0"); // 2-byte utf8 -- a degree symbol.
453 }
454
455 TEST(QuoteMeta, HasNull) {
456 string has_null;
457
458 // string with one null character
459 has_null += '\0';
460 TestQuoteMeta(has_null);
461 NegativeTestQuoteMeta(has_null, "");
462
463 // Don't want null-followed-by-'1' to be interpreted as '\01'.
464 has_null += '1';
465 TestQuoteMeta(has_null);
466 NegativeTestQuoteMeta(has_null, "\1");
467 }
468
469 TEST(ProgramSize, BigProgram) {
470 RE2 re_simple("simple regexp");
471 RE2 re_medium("medium.*regexp");
472 RE2 re_complex("hard.{1,128}regexp");
473
474 CHECK_GT(re_simple.ProgramSize(), 0);
475 CHECK_GT(re_medium.ProgramSize(), re_simple.ProgramSize());
476 CHECK_GT(re_complex.ProgramSize(), re_medium.ProgramSize());
477 }
478
479 // Issue 956519: handling empty character sets was
480 // causing NULL dereference. This tests a few empty character sets.
481 // (The way to get an empty character set is to negate a full one.)
482 TEST(EmptyCharset, Fuzz) {
483 static const char *empties[] = {
484 "[^\\S\\s]",
485 "[^\\S[:space:]]",
486 "[^\\D\\d]",
487 "[^\\D[:digit:]]"
488 };
489 for (int i = 0; i < arraysize(empties); i++)
490 CHECK(!RE2(empties[i]).Match("abc", 0, 3, RE2::UNANCHORED, NULL, 0));
491 }
492
493 // Test that named groups work correctly.
494 TEST(Capture, NamedGroups) {
495 {
496 RE2 re("(hello world)");
497 CHECK_EQ(re.NumberOfCapturingGroups(), 1);
498 const map<string, int>& m = re.NamedCapturingGroups();
499 CHECK_EQ(m.size(), 0);
500 }
501
502 {
503 RE2 re("(?P<A>expr(?P<B>expr)(?P<C>expr))((expr)(?P<D>expr))");
504 CHECK_EQ(re.NumberOfCapturingGroups(), 6);
505 const map<string, int>& m = re.NamedCapturingGroups();
506 CHECK_EQ(m.size(), 4);
507 CHECK_EQ(m.find("A")->second, 1);
508 CHECK_EQ(m.find("B")->second, 2);
509 CHECK_EQ(m.find("C")->second, 3);
510 CHECK_EQ(m.find("D")->second, 6); // $4 and $5 are anonymous
511 }
512 }
513
514 TEST(RE2, FullMatchWithNoArgs) {
515 CHECK(RE2::FullMatch("h", "h"));
516 CHECK(RE2::FullMatch("hello", "hello"));
517 CHECK(RE2::FullMatch("hello", "h.*o"));
518 CHECK(!RE2::FullMatch("othello", "h.*o")); // Must be anchored at front
519 CHECK(!RE2::FullMatch("hello!", "h.*o")); // Must be anchored at end
520 }
521
522 TEST(RE2, PartialMatch) {
523 CHECK(RE2::PartialMatch("x", "x"));
524 CHECK(RE2::PartialMatch("hello", "h.*o"));
525 CHECK(RE2::PartialMatch("othello", "h.*o"));
526 CHECK(RE2::PartialMatch("hello!", "h.*o"));
527 CHECK(RE2::PartialMatch("x", "((((((((((((((((((((x))))))))))))))))))))"));
528 }
529
530 TEST(RE2, PartialMatchN) {
531 RE2::Arg argv[2];
532 const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
533
534 // 0 arg
535 EXPECT_TRUE(RE2::PartialMatchN("hello", "e.*o", args, 0));
536 EXPECT_FALSE(RE2::PartialMatchN("othello", "a.*o", args, 0));
537
538 // 1 arg
539 int i;
540 argv[0] = &i;
541 EXPECT_TRUE(RE2::PartialMatchN("1001 nights", "(\\d+)", args, 1));
542 EXPECT_EQ(1001, i);
543 EXPECT_FALSE(RE2::PartialMatchN("three", "(\\d+)", args, 1));
544
545 // Multi-arg
546 string s;
547 argv[1] = &s;
548 EXPECT_TRUE(RE2::PartialMatchN("answer: 42:life", "(\\d+):(\\w+)", args, 2));
549 EXPECT_EQ(42, i);
550 EXPECT_EQ("life", s);
551 EXPECT_FALSE(RE2::PartialMatchN("hi1", "(\\w+)(1)", args, 2));
552 }
553
554 TEST(RE2, FullMatchZeroArg) {
555 // Zero-arg
556 CHECK(RE2::FullMatch("1001", "\\d+"));
557 }
558
559 TEST(RE2, FullMatchOneArg) {
560 int i;
561
562 // Single-arg
563 CHECK(RE2::FullMatch("1001", "(\\d+)", &i));
564 CHECK_EQ(i, 1001);
565 CHECK(RE2::FullMatch("-123", "(-?\\d+)", &i));
566 CHECK_EQ(i, -123);
567 CHECK(!RE2::FullMatch("10", "()\\d+", &i));
568 CHECK(!RE2::FullMatch("1234567890123456789012345678901234567890",
569 "(\\d+)", &i));
570 }
571
572 TEST(RE2, FullMatchIntegerArg) {
573 int i;
574
575 // Digits surrounding integer-arg
576 CHECK(RE2::FullMatch("1234", "1(\\d*)4", &i));
577 CHECK_EQ(i, 23);
578 CHECK(RE2::FullMatch("1234", "(\\d)\\d+", &i));
579 CHECK_EQ(i, 1);
580 CHECK(RE2::FullMatch("-1234", "(-\\d)\\d+", &i));
581 CHECK_EQ(i, -1);
582 CHECK(RE2::PartialMatch("1234", "(\\d)", &i));
583 CHECK_EQ(i, 1);
584 CHECK(RE2::PartialMatch("-1234", "(-\\d)", &i));
585 CHECK_EQ(i, -1);
586 }
587
588 TEST(RE2, FullMatchStringArg) {
589 string s;
590 // String-arg
591 CHECK(RE2::FullMatch("hello", "h(.*)o", &s));
592 CHECK_EQ(s, string("ell"));
593 }
594
595 TEST(RE2, FullMatchStringPieceArg) {
596 int i;
597 // StringPiece-arg
598 StringPiece sp;
599 CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &sp, &i));
600 CHECK_EQ(sp.size(), 4);
601 CHECK(memcmp(sp.data(), "ruby", 4) == 0);
602 CHECK_EQ(i, 1234);
603 }
604
605 TEST(RE2, FullMatchMultiArg) {
606 int i;
607 string s;
608 // Multi-arg
609 CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
610 CHECK_EQ(s, string("ruby"));
611 CHECK_EQ(i, 1234);
612 }
613
614 TEST(RE2, FullMatchN) {
615 RE2::Arg argv[2];
616 const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
617
618 // 0 arg
619 EXPECT_TRUE(RE2::FullMatchN("hello", "h.*o", args, 0));
620 EXPECT_FALSE(RE2::FullMatchN("othello", "h.*o", args, 0));
621
622 // 1 arg
623 int i;
624 argv[0] = &i;
625 EXPECT_TRUE(RE2::FullMatchN("1001", "(\\d+)", args, 1));
626 EXPECT_EQ(1001, i);
627 EXPECT_FALSE(RE2::FullMatchN("three", "(\\d+)", args, 1));
628
629 // Multi-arg
630 string s;
631 argv[1] = &s;
632 EXPECT_TRUE(RE2::FullMatchN("42:life", "(\\d+):(\\w+)", args, 2));
633 EXPECT_EQ(42, i);
634 EXPECT_EQ("life", s);
635 EXPECT_FALSE(RE2::FullMatchN("hi1", "(\\w+)(1)", args, 2));
636 }
637
638 TEST(RE2, FullMatchIgnoredArg) {
639 int i;
640 string s;
641 // Ignored arg
642 CHECK(RE2::FullMatch("ruby:1234", "(\\w+)(:)(\\d+)", &s, (void*)NULL, &i));
643 CHECK_EQ(s, string("ruby"));
644 CHECK_EQ(i, 1234);
645 }
646
647 TEST(RE2, FullMatchTypedNullArg) {
648 string s;
649
650 // Ignore non-void* NULL arg
651 CHECK(RE2::FullMatch("hello", "he(.*)lo", (char*)NULL));
652 CHECK(RE2::FullMatch("hello", "h(.*)o", (string*)NULL));
653 CHECK(RE2::FullMatch("hello", "h(.*)o", (StringPiece*)NULL));
654 CHECK(RE2::FullMatch("1234", "(.*)", (int*)NULL));
655 CHECK(RE2::FullMatch("1234567890123456", "(.*)", (long long*)NULL));
656 CHECK(RE2::FullMatch("123.4567890123456", "(.*)", (double*)NULL));
657 CHECK(RE2::FullMatch("123.4567890123456", "(.*)", (float*)NULL));
658
659 // Fail on non-void* NULL arg if the match doesn't parse for the given type.
660 CHECK(!RE2::FullMatch("hello", "h(.*)lo", &s, (char*)NULL));
661 CHECK(!RE2::FullMatch("hello", "(.*)", (int*)NULL));
662 CHECK(!RE2::FullMatch("1234567890123456", "(.*)", (int*)NULL));
663 CHECK(!RE2::FullMatch("hello", "(.*)", (double*)NULL));
664 CHECK(!RE2::FullMatch("hello", "(.*)", (float*)NULL));
665 }
666
667 #ifndef WIN32
668 // Check that numeric parsing code does not read past the end of
669 // the number being parsed.
670 TEST(RE2, NULTerminated) {
671 char *v;
672 int x;
673 long pagesize = sysconf(_SC_PAGE_SIZE);
674
675 #ifndef MAP_ANONYMOUS
676 #define MAP_ANONYMOUS MAP_ANON
677 #endif
678 v = static_cast<char*>(mmap(NULL, 2*pagesize, PROT_READ|PROT_WRITE,
679 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0));
680 CHECK(v != reinterpret_cast<char*>(-1));
681 LOG(INFO) << "Memory at " << (void*)v;
682 CHECK_EQ(munmap(v + pagesize, pagesize), 0) << " error " << errno;
683 v[pagesize - 1] = '1';
684
685 x = 0;
686 CHECK(RE2::FullMatch(StringPiece(v + pagesize - 1, 1), "(.*)", &x));
687 CHECK_EQ(x, 1);
688 }
689 #endif
690
691 TEST(RE2, FullMatchTypeTests) {
692 // Type tests
693 string zeros(100, '0');
694 {
695 char c;
696 CHECK(RE2::FullMatch("Hello", "(H)ello", &c));
697 CHECK_EQ(c, 'H');
698 }
699 {
700 unsigned char c;
701 CHECK(RE2::FullMatch("Hello", "(H)ello", &c));
702 CHECK_EQ(c, static_cast<unsigned char>('H'));
703 }
704 {
705 int16 v;
706 CHECK(RE2::FullMatch("100", "(-?\\d+)", &v)); CHECK_EQ(v, 100);
707 CHECK(RE2::FullMatch("-100", "(-?\\d+)", &v)); CHECK_EQ(v, -100);
708 CHECK(RE2::FullMatch("32767", "(-?\\d+)", &v)); CHECK_EQ(v, 32767);
709 CHECK(RE2::FullMatch("-32768", "(-?\\d+)", &v)); CHECK_EQ(v, -32768);
710 CHECK(!RE2::FullMatch("-32769", "(-?\\d+)", &v));
711 CHECK(!RE2::FullMatch("32768", "(-?\\d+)", &v));
712 }
713 {
714 uint16 v;
715 CHECK(RE2::FullMatch("100", "(\\d+)", &v)); CHECK_EQ(v, 100);
716 CHECK(RE2::FullMatch("32767", "(\\d+)", &v)); CHECK_EQ(v, 32767);
717 CHECK(RE2::FullMatch("65535", "(\\d+)", &v)); CHECK_EQ(v, 65535);
718 CHECK(!RE2::FullMatch("65536", "(\\d+)", &v));
719 }
720 {
721 int32 v;
722 static const int32 max = 0x7fffffff;
723 static const int32 min = -max - 1;
724 CHECK(RE2::FullMatch("100", "(-?\\d+)", &v)); CHECK_EQ(v, 100);
725 CHECK(RE2::FullMatch("-100", "(-?\\d+)", &v)); CHECK_EQ(v, -100);
726 CHECK(RE2::FullMatch("2147483647", "(-?\\d+)", &v)); CHECK_EQ(v, max);
727 CHECK(RE2::FullMatch("-2147483648", "(-?\\d+)", &v)); CHECK_EQ(v, min);
728 CHECK(!RE2::FullMatch("-2147483649", "(-?\\d+)", &v));
729 CHECK(!RE2::FullMatch("2147483648", "(-?\\d+)", &v));
730
731 CHECK(RE2::FullMatch(zeros + "2147483647", "(-?\\d+)", &v));
732 CHECK_EQ(v, max);
733 CHECK(RE2::FullMatch("-" + zeros + "2147483648", "(-?\\d+)", &v));
734 CHECK_EQ(v, min);
735
736 CHECK(!RE2::FullMatch("-" + zeros + "2147483649", "(-?\\d+)", &v));
737 CHECK(RE2::FullMatch("0x7fffffff", "(.*)", RE2::CRadix(&v)));
738 CHECK_EQ(v, max);
739 CHECK(!RE2::FullMatch("000x7fffffff", "(.*)", RE2::CRadix(&v)));
740 }
741 {
742 uint32 v;
743 static const uint32 max = 0xfffffffful;
744 CHECK(RE2::FullMatch("100", "(\\d+)", &v)); CHECK_EQ(v, 100);
745 CHECK(RE2::FullMatch("4294967295", "(\\d+)", &v)); CHECK_EQ(v, max);
746 CHECK(!RE2::FullMatch("4294967296", "(\\d+)", &v));
747 CHECK(!RE2::FullMatch("-1", "(\\d+)", &v));
748
749 CHECK(RE2::FullMatch(zeros + "4294967295", "(\\d+)", &v)); CHECK_EQ(v, max);
750 }
751 {
752 int64 v;
753 static const int64 max = 0x7fffffffffffffffull;
754 static const int64 min = -max - 1;
755 char buf[32];
756
757 CHECK(RE2::FullMatch("100", "(-?\\d+)", &v)); CHECK_EQ(v, 100);
758 CHECK(RE2::FullMatch("-100", "(-?\\d+)", &v)); CHECK_EQ(v, -100);
759
760 snprintf(buf, sizeof(buf), "%lld", max);
761 CHECK(RE2::FullMatch(buf, "(-?\\d+)", &v)); CHECK_EQ(v, max);
762
763 snprintf(buf, sizeof(buf), "%lld", min);
764 CHECK(RE2::FullMatch(buf, "(-?\\d+)", &v)); CHECK_EQ(v, min);
765
766 snprintf(buf, sizeof(buf), "%lld", max);
767 assert(buf[strlen(buf)-1] != '9');
768 buf[strlen(buf)-1]++;
769 CHECK(!RE2::FullMatch(buf, "(-?\\d+)", &v));
770
771 snprintf(buf, sizeof(buf), "%lld", min);
772 assert(buf[strlen(buf)-1] != '9');
773 buf[strlen(buf)-1]++;
774 CHECK(!RE2::FullMatch(buf, "(-?\\d+)", &v));
775 }
776 {
777 uint64 v;
778 int64 v2;
779 static const uint64 max = 0xffffffffffffffffull;
780 char buf[32];
781
782 CHECK(RE2::FullMatch("100", "(-?\\d+)", &v)); CHECK_EQ(v, 100);
783 CHECK(RE2::FullMatch("-100", "(-?\\d+)", &v2)); CHECK_EQ(v2, -100);
784
785 snprintf(buf, sizeof(buf), "%llu", max);
786 CHECK(RE2::FullMatch(buf, "(-?\\d+)", &v)); CHECK_EQ(v, max);
787
788 assert(buf[strlen(buf)-1] != '9');
789 buf[strlen(buf)-1]++;
790 CHECK(!RE2::FullMatch(buf, "(-?\\d+)", &v));
791 }
792 }
793
794 TEST(RE2, FloatingPointFullMatchTypes) {
795 string zeros(100, '0');
796 {
797 float v;
798 CHECK(RE2::FullMatch("100", "(.*)", &v)); CHECK_EQ(v, 100);
799 CHECK(RE2::FullMatch("-100.", "(.*)", &v)); CHECK_EQ(v, -100);
800 CHECK(RE2::FullMatch("1e23", "(.*)", &v)); CHECK_EQ(v, float(1e23));
801
802 CHECK(RE2::FullMatch(zeros + "1e23", "(.*)", &v));
803 CHECK_EQ(v, float(1e23));
804
805 // 6700000000081920.1 is an edge case.
806 // 6700000000081920 is exactly halfway between
807 // two float32s, so the .1 should make it round up.
808 // However, the .1 is outside the precision possible with
809 // a float64: the nearest float64 is 6700000000081920.
810 // So if the code uses strtod and then converts to float32,
811 // round-to-even will make it round down instead of up.
812 // To pass the test, the parser must call strtof directly.
813 // This test case is carefully chosen to use only a 17-digit
814 // number, since C does not guarantee to get the correctly
815 // rounded answer for strtod and strtof unless the input is
816 // short.
817 CHECK(RE2::FullMatch("0.1", "(.*)", &v));
818 CHECK_EQ(v, 0.1f) << StringPrintf("%.8g != %.8g", v, 0.1f);
819 CHECK(RE2::FullMatch("6700000000081920.1", "(.*)", &v));
820 CHECK_EQ(v, 6700000000081920.1f)
821 << StringPrintf("%.8g != %.8g", v, 6700000000081920.1f);
822 }
823 {
824 double v;
825 CHECK(RE2::FullMatch("100", "(.*)", &v)); CHECK_EQ(v, 100);
826 CHECK(RE2::FullMatch("-100.", "(.*)", &v)); CHECK_EQ(v, -100);
827 CHECK(RE2::FullMatch("1e23", "(.*)", &v)); CHECK_EQ(v, 1e23);
828 CHECK(RE2::FullMatch(zeros + "1e23", "(.*)", &v));
829 CHECK_EQ(v, double(1e23));
830
831 CHECK(RE2::FullMatch("0.1", "(.*)", &v));
832 CHECK_EQ(v, 0.1) << StringPrintf("%.17g != %.17g", v, 0.1);
833 CHECK(RE2::FullMatch("1.00000005960464485", "(.*)", &v));
834 CHECK_EQ(v, 1.0000000596046448)
835 << StringPrintf("%.17g != %.17g", v, 1.0000000596046448);
836 }
837 }
838
839 TEST(RE2, FullMatchAnchored) {
840 int i;
841 // Check that matching is fully anchored
842 CHECK(!RE2::FullMatch("x1001", "(\\d+)", &i));
843 CHECK(!RE2::FullMatch("1001x", "(\\d+)", &i));
844 CHECK(RE2::FullMatch("x1001", "x(\\d+)", &i)); CHECK_EQ(i, 1001);
845 CHECK(RE2::FullMatch("1001x", "(\\d+)x", &i)); CHECK_EQ(i, 1001);
846 }
847
848 TEST(RE2, FullMatchBraces) {
849 // Braces
850 CHECK(RE2::FullMatch("0abcd", "[0-9a-f+.-]{5,}"));
851 CHECK(RE2::FullMatch("0abcde", "[0-9a-f+.-]{5,}"));
852 CHECK(!RE2::FullMatch("0abc", "[0-9a-f+.-]{5,}"));
853 }
854
855 TEST(RE2, Complicated) {
856 // Complicated RE2
857 CHECK(RE2::FullMatch("foo", "foo|bar|[A-Z]"));
858 CHECK(RE2::FullMatch("bar", "foo|bar|[A-Z]"));
859 CHECK(RE2::FullMatch("X", "foo|bar|[A-Z]"));
860 CHECK(!RE2::FullMatch("XY", "foo|bar|[A-Z]"));
861 }
862
863 TEST(RE2, FullMatchEnd) {
864 // Check full-match handling (needs '$' tacked on internally)
865 CHECK(RE2::FullMatch("fo", "fo|foo"));
866 CHECK(RE2::FullMatch("foo", "fo|foo"));
867 CHECK(RE2::FullMatch("fo", "fo|foo$"));
868 CHECK(RE2::FullMatch("foo", "fo|foo$"));
869 CHECK(RE2::FullMatch("foo", "foo$"));
870 CHECK(!RE2::FullMatch("foo$bar", "foo\\$"));
871 CHECK(!RE2::FullMatch("fox", "fo|bar"));
872
873 // Uncomment the following if we change the handling of '$' to
874 // prevent it from matching a trailing newline
875 if (false) {
876 // Check that we don't get bitten by pcre's special handling of a
877 // '\n' at the end of the string matching '$'
878 CHECK(!RE2::PartialMatch("foo\n", "foo$"));
879 }
880 }
881
882 TEST(RE2, FullMatchArgCount) {
883 // Number of args
884 int a[16];
885 CHECK(RE2::FullMatch("", ""));
886
887 memset(a, 0, sizeof(0));
888 CHECK(RE2::FullMatch("1",
889 "(\\d){1}",
890 &a[0]));
891 CHECK_EQ(a[0], 1);
892
893 memset(a, 0, sizeof(0));
894 CHECK(RE2::FullMatch("12",
895 "(\\d)(\\d)",
896 &a[0], &a[1]));
897 CHECK_EQ(a[0], 1);
898 CHECK_EQ(a[1], 2);
899
900 memset(a, 0, sizeof(0));
901 CHECK(RE2::FullMatch("123",
902 "(\\d)(\\d)(\\d)",
903 &a[0], &a[1], &a[2]));
904 CHECK_EQ(a[0], 1);
905 CHECK_EQ(a[1], 2);
906 CHECK_EQ(a[2], 3);
907
908 memset(a, 0, sizeof(0));
909 CHECK(RE2::FullMatch("1234",
910 "(\\d)(\\d)(\\d)(\\d)",
911 &a[0], &a[1], &a[2], &a[3]));
912 CHECK_EQ(a[0], 1);
913 CHECK_EQ(a[1], 2);
914 CHECK_EQ(a[2], 3);
915 CHECK_EQ(a[3], 4);
916
917 memset(a, 0, sizeof(0));
918 CHECK(RE2::FullMatch("12345",
919 "(\\d)(\\d)(\\d)(\\d)(\\d)",
920 &a[0], &a[1], &a[2], &a[3],
921 &a[4]));
922 CHECK_EQ(a[0], 1);
923 CHECK_EQ(a[1], 2);
924 CHECK_EQ(a[2], 3);
925 CHECK_EQ(a[3], 4);
926 CHECK_EQ(a[4], 5);
927
928 memset(a, 0, sizeof(0));
929 CHECK(RE2::FullMatch("123456",
930 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)",
931 &a[0], &a[1], &a[2], &a[3],
932 &a[4], &a[5]));
933 CHECK_EQ(a[0], 1);
934 CHECK_EQ(a[1], 2);
935 CHECK_EQ(a[2], 3);
936 CHECK_EQ(a[3], 4);
937 CHECK_EQ(a[4], 5);
938 CHECK_EQ(a[5], 6);
939
940 memset(a, 0, sizeof(0));
941 CHECK(RE2::FullMatch("1234567",
942 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)",
943 &a[0], &a[1], &a[2], &a[3],
944 &a[4], &a[5], &a[6]));
945 CHECK_EQ(a[0], 1);
946 CHECK_EQ(a[1], 2);
947 CHECK_EQ(a[2], 3);
948 CHECK_EQ(a[3], 4);
949 CHECK_EQ(a[4], 5);
950 CHECK_EQ(a[5], 6);
951 CHECK_EQ(a[6], 7);
952
953 memset(a, 0, sizeof(0));
954 CHECK(RE2::FullMatch("1234567890123456",
955 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
956 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)",
957 &a[0], &a[1], &a[2], &a[3],
958 &a[4], &a[5], &a[6], &a[7],
959 &a[8], &a[9], &a[10], &a[11],
960 &a[12], &a[13], &a[14], &a[15]));
961 CHECK_EQ(a[0], 1);
962 CHECK_EQ(a[1], 2);
963 CHECK_EQ(a[2], 3);
964 CHECK_EQ(a[3], 4);
965 CHECK_EQ(a[4], 5);
966 CHECK_EQ(a[5], 6);
967 CHECK_EQ(a[6], 7);
968 CHECK_EQ(a[7], 8);
969 CHECK_EQ(a[8], 9);
970 CHECK_EQ(a[9], 0);
971 CHECK_EQ(a[10], 1);
972 CHECK_EQ(a[11], 2);
973 CHECK_EQ(a[12], 3);
974 CHECK_EQ(a[13], 4);
975 CHECK_EQ(a[14], 5);
976 CHECK_EQ(a[15], 6);
977 }
978
979 TEST(RE2, Accessors) {
980 // Check the pattern() accessor
981 {
982 const string kPattern = "http://([^/]+)/.*";
983 const RE2 re(kPattern);
984 CHECK_EQ(kPattern, re.pattern());
985 }
986
987 // Check RE2 error field.
988 {
989 RE2 re("foo");
990 CHECK(re.error().empty()); // Must have no error
991 CHECK(re.ok());
992 CHECK(re.error_code() == RE2::NoError);
993 }
994 }
995
996 TEST(RE2, UTF8) {
997 // Check UTF-8 handling
998 // Three Japanese characters (nihongo)
999 const char utf8_string[] = {
1000 0xe6, 0x97, 0xa5, // 65e5
1001 0xe6, 0x9c, 0xac, // 627c
1002 0xe8, 0xaa, 0x9e, // 8a9e
1003 0
1004 };
1005 const char utf8_pattern[] = {
1006 '.',
1007 0xe6, 0x9c, 0xac, // 627c
1008 '.',
1009 0
1010 };
1011
1012 // Both should match in either mode, bytes or UTF-8
1013 RE2 re_test1(".........", RE2::Latin1);
1014 CHECK(RE2::FullMatch(utf8_string, re_test1));
1015 RE2 re_test2("...");
1016 CHECK(RE2::FullMatch(utf8_string, re_test2));
1017
1018 // Check that '.' matches one byte or UTF-8 character
1019 // according to the mode.
1020 string s;
1021 RE2 re_test3("(.)", RE2::Latin1);
1022 CHECK(RE2::PartialMatch(utf8_string, re_test3, &s));
1023 CHECK_EQ(s, string("\xe6"));
1024 RE2 re_test4("(.)");
1025 CHECK(RE2::PartialMatch(utf8_string, re_test4, &s));
1026 CHECK_EQ(s, string("\xe6\x97\xa5"));
1027
1028 // Check that string matches itself in either mode
1029 RE2 re_test5(utf8_string, RE2::Latin1);
1030 CHECK(RE2::FullMatch(utf8_string, re_test5));
1031 RE2 re_test6(utf8_string);
1032 CHECK(RE2::FullMatch(utf8_string, re_test6));
1033
1034 // Check that pattern matches string only in UTF8 mode
1035 RE2 re_test7(utf8_pattern, RE2::Latin1);
1036 CHECK(!RE2::FullMatch(utf8_string, re_test7));
1037 RE2 re_test8(utf8_pattern);
1038 CHECK(RE2::FullMatch(utf8_string, re_test8));
1039 }
1040
1041 TEST(RE2, UngreedyUTF8) {
1042 // Check that ungreedy, UTF8 regular expressions don't match when they
1043 // oughtn't -- see bug 82246.
1044 {
1045 // This code always worked.
1046 const char* pattern = "\\w+X";
1047 const string target = "a aX";
1048 RE2 match_sentence(pattern, RE2::Latin1);
1049 RE2 match_sentence_re(pattern);
1050
1051 CHECK(!RE2::FullMatch(target, match_sentence));
1052 CHECK(!RE2::FullMatch(target, match_sentence_re));
1053 }
1054 {
1055 const char* pattern = "(?U)\\w+X";
1056 const string target = "a aX";
1057 RE2 match_sentence(pattern, RE2::Latin1);
1058 CHECK_EQ(match_sentence.error(), "");
1059 RE2 match_sentence_re(pattern);
1060
1061 CHECK(!RE2::FullMatch(target, match_sentence));
1062 CHECK(!RE2::FullMatch(target, match_sentence_re));
1063 }
1064 }
1065
1066 TEST(RE2, Rejects) {
1067 { RE2 re("a\\1", RE2::Quiet); CHECK(!re.ok()); }
1068 {
1069 RE2 re("a[x", RE2::Quiet);
1070 CHECK(!re.ok());
1071 }
1072 {
1073 RE2 re("a[z-a]", RE2::Quiet);
1074 CHECK(!re.ok());
1075 }
1076 {
1077 RE2 re("a[[:foobar:]]", RE2::Quiet);
1078 CHECK(!re.ok());
1079 }
1080 {
1081 RE2 re("a(b", RE2::Quiet);
1082 CHECK(!re.ok());
1083 }
1084 {
1085 RE2 re("a\\", RE2::Quiet);
1086 CHECK(!re.ok());
1087 }
1088 }
1089
1090 TEST(RE2, NoCrash) {
1091 // Test that using a bad regexp doesn't crash.
1092 {
1093 RE2 re("a\\", RE2::Quiet);
1094 CHECK(!re.ok());
1095 CHECK(!RE2::PartialMatch("a\\b", re));
1096 }
1097
1098 // Test that using an enormous regexp doesn't crash
1099 {
1100 RE2 re("(((.{100}){100}){100}){100}", RE2::Quiet);
1101 CHECK(!re.ok());
1102 CHECK(!RE2::PartialMatch("aaa", re));
1103 }
1104
1105 // Test that a crazy regexp still compiles and runs.
1106 {
1107 RE2 re(".{512}x", RE2::Quiet);
1108 CHECK(re.ok());
1109 string s;
1110 s.append(515, 'c');
1111 s.append("x");
1112 CHECK(RE2::PartialMatch(s, re));
1113 }
1114 }
1115
1116 TEST(RE2, Recursion) {
1117 // Test that recursion is stopped.
1118 // This test is PCRE-legacy -- there's no recursion in RE2.
1119 int bytes = 15 * 1024; // enough to crash PCRE
1120 TestRecursion(bytes, ".");
1121 TestRecursion(bytes, "a");
1122 TestRecursion(bytes, "a.");
1123 TestRecursion(bytes, "ab.");
1124 TestRecursion(bytes, "abc.");
1125 }
1126
1127 TEST(RE2, BigCountedRepetition) {
1128 // Test that counted repetition works, given tons of memory.
1129 RE2::Options opt;
1130 opt.set_max_mem(256<<20);
1131
1132 RE2 re(".{512}x", opt);
1133 CHECK(re.ok());
1134 string s;
1135 s.append(515, 'c');
1136 s.append("x");
1137 CHECK(RE2::PartialMatch(s, re));
1138 }
1139
1140 TEST(RE2, DeepRecursion) {
1141 // Test for deep stack recursion. This would fail with a
1142 // segmentation violation due to stack overflow before pcre was
1143 // patched.
1144 // Again, a PCRE legacy test. RE2 doesn't recurse.
1145 string comment("x*");
1146 string a(131072, 'a');
1147 comment += a;
1148 comment += "*x";
1149 RE2 re("((?:\\s|xx.*\n|x[*](?:\n|.)*?[*]x)*)");
1150 CHECK(RE2::FullMatch(comment, re));
1151 }
1152
1153 // Suggested by Josh Hyman. Failed when SearchOnePass was
1154 // not implementing case-folding.
1155 TEST(CaseInsensitive, MatchAndConsume) {
1156 string result;
1157 string text = "A fish named *Wanda*";
1158 StringPiece sp(text);
1159
1160 EXPECT_TRUE(RE2::PartialMatch(sp, "(?i)([wand]{5})", &result));
1161 EXPECT_TRUE(RE2::FindAndConsume(&sp, "(?i)([wand]{5})", &result));
1162 }
1163
1164 // RE2 should permit implicit conversions from string, StringPiece, const char*,
1165 // and C string literals.
1166 TEST(RE2, ImplicitConversions) {
1167 string re_string(".");
1168 StringPiece re_stringpiece(".");
1169 const char* re_cstring = ".";
1170 EXPECT_TRUE(RE2::PartialMatch("e", re_string));
1171 EXPECT_TRUE(RE2::PartialMatch("e", re_stringpiece));
1172 EXPECT_TRUE(RE2::PartialMatch("e", re_cstring));
1173 EXPECT_TRUE(RE2::PartialMatch("e", "."));
1174 }
1175
1176 // Bugs introduced by 8622304
1177 TEST(RE2, CL8622304) {
1178 // reported by ingow
1179 string dir;
1180 EXPECT_TRUE(RE2::FullMatch("D", "([^\\\\])")); // ok
1181 EXPECT_TRUE(RE2::FullMatch("D", "([^\\\\])", &dir)); // fails
1182
1183 // reported by jacobsa
1184 string key, val;
1185 EXPECT_TRUE(RE2::PartialMatch("bar:1,0x2F,030,4,5;baz:true;fooby:false,true",
1186 "(\\w+)(?::((?:[^;\\\\]|\\\\.)*))?;?",
1187 &key,
1188 &val));
1189 EXPECT_EQ(key, "bar");
1190 EXPECT_EQ(val, "1,0x2F,030,4,5");
1191 }
1192
1193
1194 // Check that RE2 returns correct regexp pieces on error.
1195 // In particular, make sure it returns whole runes
1196 // and that it always reports invalid UTF-8.
1197 // Also check that Perl error flag piece is big enough.
1198 static struct ErrorTest {
1199 const char *regexp;
1200 const char *error;
1201 } error_tests[] = {
1202 { "ab\\αcd", "\\α" },
1203 { "ef\\x☺01", "\\x☺0" },
1204 { "gh\\x1☺01", "\\x1☺" },
1205 { "ij\\x1", "\\x1" },
1206 { "kl\\x", "\\x" },
1207 { "uv\\x{0000☺}", "\\x{0000☺" },
1208 { "wx\\p{ABC", "\\p{ABC" },
1209 { "yz(?smiUX:abc)", "(?smiUX" }, // used to return (?s but the error is X
1210 { "aa(?sm☺i", "(?sm☺" },
1211 { "bb[abc", "[abc" },
1212
1213 { "mn\\x1\377", "" }, // no argument string returned for invalid UTF-8
1214 { "op\377qr", "" },
1215 { "st\\x{00000\377", "" },
1216 { "zz\\p{\377}", "" },
1217 { "zz\\x{00\377}", "" },
1218 { "zz(?P<name\377>abc)", "" },
1219 };
1220 TEST(RE2, ErrorArgs) {
1221 for (int i = 0; i < arraysize(error_tests); i++) {
1222 RE2 re(error_tests[i].regexp, RE2::Quiet);
1223 EXPECT_FALSE(re.ok());
1224 EXPECT_EQ(re.error_arg(), error_tests[i].error) << re.error();
1225 }
1226 }
1227
1228 // Check that "never match \n" mode never matches \n.
1229 static struct NeverTest {
1230 const char* regexp;
1231 const char* text;
1232 const char* match;
1233 } never_tests[] = {
1234 { "(.*)", "abc\ndef\nghi\n", "abc" },
1235 { "(?s)(abc.*def)", "abc\ndef\n", NULL },
1236 { "(abc(.|\n)*def)", "abc\ndef\n", NULL },
1237 { "(abc[^x]*def)", "abc\ndef\n", NULL },
1238 { "(abc[^x]*def)", "abczzzdef\ndef\n", "abczzzdef" },
1239 };
1240 TEST(RE2, NeverNewline) {
1241 RE2::Options opt;
1242 opt.set_never_nl(true);
1243 for (int i = 0; i < arraysize(never_tests); i++) {
1244 const NeverTest& t = never_tests[i];
1245 RE2 re(t.regexp, opt);
1246 if (t.match == NULL) {
1247 EXPECT_FALSE(re.PartialMatch(t.text, re));
1248 } else {
1249 StringPiece m;
1250 EXPECT_TRUE(re.PartialMatch(t.text, re, &m));
1251 EXPECT_EQ(m, t.match);
1252 }
1253 }
1254 }
1255
1256 // Bitstate bug was looking at submatch[0] even if nsubmatch == 0.
1257 // Triggered by a failed DFA search falling back to Bitstate when
1258 // using Match with a NULL submatch set. Bitstate tried to read
1259 // the submatch[0] entry even if nsubmatch was 0.
1260 TEST(RE2, BitstateCaptureBug) {
1261 RE2::Options opt;
1262 opt.set_max_mem(20000);
1263 RE2 re("(_________$)", opt);
1264 StringPiece s = "xxxxxxxxxxxxxxxxxxxxxxxxxx_________x";
1265 EXPECT_FALSE(re.Match(s, 0, s.size(), RE2::UNANCHORED, NULL, 0));
1266 }
1267
1268 // C++ version of bug 609710.
1269 TEST(RE2, UnicodeClasses) {
1270 const string str = "ABCDEFGHI譚永鋒";
1271 string a, b, c;
1272
1273 EXPECT_TRUE(RE2::FullMatch("A", "\\p{L}"));
1274 EXPECT_TRUE(RE2::FullMatch("A", "\\p{Lu}"));
1275 EXPECT_FALSE(RE2::FullMatch("A", "\\p{Ll}"));
1276 EXPECT_FALSE(RE2::FullMatch("A", "\\P{L}"));
1277 EXPECT_FALSE(RE2::FullMatch("A", "\\P{Lu}"));
1278 EXPECT_TRUE(RE2::FullMatch("A", "\\P{Ll}"));
1279
1280 EXPECT_TRUE(RE2::FullMatch("譚", "\\p{L}"));
1281 EXPECT_FALSE(RE2::FullMatch("譚", "\\p{Lu}"));
1282 EXPECT_FALSE(RE2::FullMatch("譚", "\\p{Ll}"));
1283 EXPECT_FALSE(RE2::FullMatch("譚", "\\P{L}"));
1284 EXPECT_TRUE(RE2::FullMatch("譚", "\\P{Lu}"));
1285 EXPECT_TRUE(RE2::FullMatch("譚", "\\P{Ll}"));
1286
1287 EXPECT_TRUE(RE2::FullMatch("永", "\\p{L}"));
1288 EXPECT_FALSE(RE2::FullMatch("永", "\\p{Lu}"));
1289 EXPECT_FALSE(RE2::FullMatch("永", "\\p{Ll}"));
1290 EXPECT_FALSE(RE2::FullMatch("永", "\\P{L}"));
1291 EXPECT_TRUE(RE2::FullMatch("永", "\\P{Lu}"));
1292 EXPECT_TRUE(RE2::FullMatch("永", "\\P{Ll}"));
1293
1294 EXPECT_TRUE(RE2::FullMatch("鋒", "\\p{L}"));
1295 EXPECT_FALSE(RE2::FullMatch("鋒", "\\p{Lu}"));
1296 EXPECT_FALSE(RE2::FullMatch("鋒", "\\p{Ll}"));
1297 EXPECT_FALSE(RE2::FullMatch("鋒", "\\P{L}"));
1298 EXPECT_TRUE(RE2::FullMatch("鋒", "\\P{Lu}"));
1299 EXPECT_TRUE(RE2::FullMatch("鋒", "\\P{Ll}"));
1300
1301 EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?(.).*?(.)", &a, &b, &c));
1302 EXPECT_EQ("A", a);
1303 EXPECT_EQ("B", b);
1304 EXPECT_EQ("C", c);
1305
1306 EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?([\\p{L}]).*?(.)", &a, &b, &c));
1307 EXPECT_EQ("A", a);
1308 EXPECT_EQ("B", b);
1309 EXPECT_EQ("C", c);
1310
1311 EXPECT_FALSE(RE2::PartialMatch(str, "\\P{L}"));
1312
1313 EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?([\\p{Lu}]).*?(.)", &a, &b, &c));
1314 EXPECT_EQ("A", a);
1315 EXPECT_EQ("B", b);
1316 EXPECT_EQ("C", c);
1317
1318 EXPECT_FALSE(RE2::PartialMatch(str, "[^\\p{Lu}\\p{Lo}]"));
1319
1320 EXPECT_TRUE(RE2::PartialMatch(str, ".*(.).*?([\\p{Lu}\\p{Lo}]).*?(.)", &a, &b, &c));
1321 EXPECT_EQ("譚", a);
1322 EXPECT_EQ("永", b);
1323 EXPECT_EQ("鋒", c);
1324 }
1325
1326 // Bug reported by saito. 2009/02/17
1327 TEST(RE2, NullVsEmptyString) {
1328 RE2 re2(".*");
1329 StringPiece v1("");
1330 EXPECT_TRUE(RE2::FullMatch(v1, re2));
1331
1332 StringPiece v2;
1333 EXPECT_TRUE(RE2::FullMatch(v2, re2));
1334 }
1335
1336 // Issue 1816809
1337 TEST(RE2, Bug1816809) {
1338 RE2 re("(((((llx((-3)|(4)))(;(llx((-3)|(4))))*))))");
1339 StringPiece piece("llx-3;llx4");
1340 string x;
1341 EXPECT_TRUE(RE2::Consume(&piece, re, &x));
1342 }
1343
1344 // Issue 3061120
1345 TEST(RE2, Bug3061120) {
1346 RE2 re("(?i)\\W");
1347 EXPECT_FALSE(RE2::PartialMatch("x", re)); // always worked
1348 EXPECT_FALSE(RE2::PartialMatch("k", re)); // broke because of kelvin
1349 EXPECT_FALSE(RE2::PartialMatch("s", re)); // broke because of latin long s
1350 }
1351
1352 TEST(RE2, CapturingGroupNames) {
1353 // Opening parentheses annotated with group IDs:
1354 // 12 3 45 6 7
1355 RE2 re("((abc)(?P<G2>)|((e+)(?P<G2>.*)(?P<G1>u+)))");
1356 EXPECT_TRUE(re.ok());
1357 const map<int, string>& have = re.CapturingGroupNames();
1358 map<int, string> want;
1359 want[3] = "G2";
1360 want[6] = "G2";
1361 want[7] = "G1";
1362 EXPECT_EQ(want, have);
1363 }
1364
1365 TEST(RE2, RegexpToStringLossOfAnchor) {
1366 EXPECT_EQ(RE2("^[a-c]at", RE2::POSIX).Regexp()->ToString(), "^[a-c]at");
1367 EXPECT_EQ(RE2("^[a-c]at").Regexp()->ToString(), "(?-m:^)[a-c]at");
1368 EXPECT_EQ(RE2("ca[t-z]$", RE2::POSIX).Regexp()->ToString(), "ca[t-z]$");
1369 EXPECT_EQ(RE2("ca[t-z]$").Regexp()->ToString(), "ca[t-z](?-m:$)");
1370 }
1371
1372 } // namespace re2
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698