OLD | NEW |
1 // -*- coding: utf-8 -*- | 1 // -*- coding: utf-8 -*- |
2 // Copyright 2002-2009 The RE2 Authors. All Rights Reserved. | 2 // Copyright 2002-2009 The RE2 Authors. All Rights Reserved. |
3 // Use of this source code is governed by a BSD-style | 3 // Use of this source code is governed by a BSD-style |
4 // license that can be found in the LICENSE file. | 4 // license that can be found in the LICENSE file. |
5 | 5 |
6 // TODO: Test extractions for PartialMatch/Consume | 6 // TODO: Test extractions for PartialMatch/Consume |
7 | 7 |
8 #include <errno.h> | 8 #include <sys/types.h> |
9 #ifndef _MSC_VER | 9 #ifndef WIN32 |
10 #include <unistd.h> /* for sysconf */ | |
11 #include <sys/mman.h> | 10 #include <sys/mman.h> |
12 #endif | 11 #endif |
13 #include <sys/stat.h> | 12 #include <sys/stat.h> |
14 #include <sys/types.h> | 13 #include <errno.h> |
15 #include <vector> | 14 #include <vector> |
16 #include "util/test.h" | 15 #include "util/test.h" |
17 #include "re2/re2.h" | 16 #include "re2/re2.h" |
18 #include "re2/regexp.h" | 17 #include "re2/regexp.h" |
19 | 18 |
| 19 #ifdef WIN32 |
| 20 #include <stdio.h> |
| 21 #define snprintf _snprintf |
| 22 #endif |
| 23 |
20 DECLARE_bool(logtostderr); | 24 DECLARE_bool(logtostderr); |
21 | 25 |
22 namespace re2 { | 26 namespace re2 { |
23 | 27 |
24 TEST(RE2, HexTests) { | 28 TEST(RE2, HexTests) { |
25 | 29 |
26 VLOG(1) << "hex tests"; | 30 VLOG(1) << "hex tests"; |
27 | 31 |
28 #define CHECK_HEX(type, value) \ | 32 #define CHECK_HEX(type, value) \ |
29 do { \ | 33 do { \ |
(...skipping 139 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
169 // Check newline handling | 173 // Check newline handling |
170 { "a.*a", | 174 { "a.*a", |
171 "(\\0)", | 175 "(\\0)", |
172 "aba\naba", | 176 "aba\naba", |
173 "(aba)\naba", | 177 "(aba)\naba", |
174 "(aba)\n(aba)", | 178 "(aba)\n(aba)", |
175 2 }, | 179 2 }, |
176 { "", NULL, NULL, NULL, NULL, 0 } | 180 { "", NULL, NULL, NULL, NULL, 0 } |
177 }; | 181 }; |
178 | 182 |
179 for (const ReplaceTest* t = tests; t->original != NULL; t++) { | 183 for (const ReplaceTest *t = tests; t->original != NULL; ++t) { |
180 VLOG(1) << StringPrintf("\"%s\" =~ s/%s/%s/g", t->original, t->regexp, t->re
write); | 184 VLOG(1) << StringPrintf("\"%s\" =~ s/%s/%s/g", t->original, t->regexp, t->re
write); |
181 string one(t->original); | 185 string one(t->original); |
182 CHECK(RE2::Replace(&one, t->regexp, t->rewrite)); | 186 CHECK(RE2::Replace(&one, t->regexp, t->rewrite)); |
183 CHECK_EQ(one, t->single); | 187 CHECK_EQ(one, t->single); |
184 string all(t->original); | 188 string all(t->original); |
185 CHECK_EQ(RE2::GlobalReplace(&all, t->regexp, t->rewrite), t->greplace_count) | 189 CHECK_EQ(RE2::GlobalReplace(&all, t->regexp, t->rewrite), t->greplace_count) |
186 << "Got: " << all; | 190 << "Got: " << all; |
187 CHECK_EQ(all, t->global); | 191 CHECK_EQ(all, t->global); |
188 } | 192 } |
189 } | 193 } |
(...skipping 172 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
362 CHECK_EQ(group[3], "9000"); | 366 CHECK_EQ(group[3], "9000"); |
363 | 367 |
364 string all, host; | 368 string all, host; |
365 int port; | 369 int port; |
366 CHECK(RE2::PartialMatch("a chrisr:9000 here", re, &all, &host, &port)); | 370 CHECK(RE2::PartialMatch("a chrisr:9000 here", re, &all, &host, &port)); |
367 CHECK_EQ(all, "chrisr:9000"); | 371 CHECK_EQ(all, "chrisr:9000"); |
368 CHECK_EQ(host, "chrisr"); | 372 CHECK_EQ(host, "chrisr"); |
369 CHECK_EQ(port, 9000); | 373 CHECK_EQ(port, 9000); |
370 } | 374 } |
371 | 375 |
372 static void TestRecursion(int size, const char* pattern) { | 376 static void TestRecursion(int size, const char *pattern) { |
373 // Fill up a string repeating the pattern given | 377 // Fill up a string repeating the pattern given |
374 string domain; | 378 string domain; |
375 domain.resize(size); | 379 domain.resize(size); |
376 size_t patlen = strlen(pattern); | 380 int patlen = strlen(pattern); |
377 for (int i = 0; i < size; i++) { | 381 for (int i = 0; i < size; ++i) { |
378 domain[i] = pattern[i % patlen]; | 382 domain[i] = pattern[i % patlen]; |
379 } | 383 } |
380 // Just make sure it doesn't crash due to too much recursion. | 384 // Just make sure it doesn't crash due to too much recursion. |
381 RE2 re("([a-zA-Z0-9]|-)+(\\.([a-zA-Z0-9]|-)+)*(\\.)?", RE2::Quiet); | 385 RE2 re("([a-zA-Z0-9]|-)+(\\.([a-zA-Z0-9]|-)+)*(\\.)?", RE2::Quiet); |
382 RE2::FullMatch(domain, re); | 386 RE2::FullMatch(domain, re); |
383 } | 387 } |
384 | 388 |
385 // A meta-quoted string, interpreted as a pattern, should always match | 389 // A meta-quoted string, interpreted as a pattern, should always match |
386 // the original unquoted string. | 390 // the original unquoted string. |
387 static void TestQuoteMeta(string unquoted, | 391 static void TestQuoteMeta(string unquoted, |
388 const RE2::Options& options = RE2::DefaultOptions) { | 392 const RE2::Options& options = RE2::DefaultOptions) { |
389 string quoted = RE2::QuoteMeta(unquoted); | 393 string quoted = RE2::QuoteMeta(unquoted); |
390 RE2 re(quoted, options); | 394 RE2 re(quoted, options); |
391 EXPECT_TRUE(RE2::FullMatch(unquoted, re)) | 395 EXPECT_TRUE_M(RE2::FullMatch(unquoted, re), |
392 << "Unquoted='" << unquoted << "', quoted='" << quoted << "'."; | 396 "Unquoted='" + unquoted + "', quoted='" + quoted + "'."); |
393 } | 397 } |
394 | 398 |
395 // A meta-quoted string, interpreted as a pattern, should always match | 399 // A meta-quoted string, interpreted as a pattern, should always match |
396 // the original unquoted string. | 400 // the original unquoted string. |
397 static void NegativeTestQuoteMeta(string unquoted, string should_not_match, | 401 static void NegativeTestQuoteMeta(string unquoted, string should_not_match, |
398 const RE2::Options& options = RE2::DefaultOpti
ons) { | 402 const RE2::Options& options = RE2::DefaultOpti
ons) { |
399 string quoted = RE2::QuoteMeta(unquoted); | 403 string quoted = RE2::QuoteMeta(unquoted); |
400 RE2 re(quoted, options); | 404 RE2 re(quoted, options); |
401 EXPECT_FALSE(RE2::FullMatch(should_not_match, re)) | 405 EXPECT_FALSE_M(RE2::FullMatch(should_not_match, re), |
402 << "Unquoted='" << unquoted << "', quoted='" << quoted << "'."; | 406 "Unquoted='" + unquoted + "', quoted='" + quoted + "'."); |
403 } | 407 } |
404 | 408 |
405 // Tests that quoted meta characters match their original strings, | 409 // Tests that quoted meta characters match their original strings, |
406 // and that a few things that shouldn't match indeed do not. | 410 // and that a few things that shouldn't match indeed do not. |
407 TEST(QuoteMeta, Simple) { | 411 TEST(QuoteMeta, Simple) { |
408 TestQuoteMeta("foo"); | 412 TestQuoteMeta("foo"); |
409 TestQuoteMeta("foo.bar"); | 413 TestQuoteMeta("foo.bar"); |
410 TestQuoteMeta("foo\\.bar"); | 414 TestQuoteMeta("foo\\.bar"); |
411 TestQuoteMeta("[1-9]"); | 415 TestQuoteMeta("[1-9]"); |
412 TestQuoteMeta("1.5-2.0?"); | 416 TestQuoteMeta("1.5-2.0?"); |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
458 | 462 |
459 // Don't want null-followed-by-'1' to be interpreted as '\01'. | 463 // Don't want null-followed-by-'1' to be interpreted as '\01'. |
460 has_null += '1'; | 464 has_null += '1'; |
461 TestQuoteMeta(has_null); | 465 TestQuoteMeta(has_null); |
462 NegativeTestQuoteMeta(has_null, "\1"); | 466 NegativeTestQuoteMeta(has_null, "\1"); |
463 } | 467 } |
464 | 468 |
465 TEST(ProgramSize, BigProgram) { | 469 TEST(ProgramSize, BigProgram) { |
466 RE2 re_simple("simple regexp"); | 470 RE2 re_simple("simple regexp"); |
467 RE2 re_medium("medium.*regexp"); | 471 RE2 re_medium("medium.*regexp"); |
468 RE2 re_complex("complex.{1,128}regexp"); | 472 RE2 re_complex("hard.{1,128}regexp"); |
469 | 473 |
470 CHECK_GT(re_simple.ProgramSize(), 0); | 474 CHECK_GT(re_simple.ProgramSize(), 0); |
471 CHECK_GT(re_medium.ProgramSize(), re_simple.ProgramSize()); | 475 CHECK_GT(re_medium.ProgramSize(), re_simple.ProgramSize()); |
472 CHECK_GT(re_complex.ProgramSize(), re_medium.ProgramSize()); | 476 CHECK_GT(re_complex.ProgramSize(), re_medium.ProgramSize()); |
473 } | 477 } |
474 | 478 |
475 TEST(ProgramFanout, BigProgram) { | |
476 RE2 re1("(?:(?:(?:(?:(?:.)?){1})*)+)"); | |
477 RE2 re10("(?:(?:(?:(?:(?:.)?){10})*)+)"); | |
478 RE2 re100("(?:(?:(?:(?:(?:.)?){100})*)+)"); | |
479 RE2 re1000("(?:(?:(?:(?:(?:.)?){1000})*)+)"); | |
480 | |
481 map<int, int> histogram; | |
482 | |
483 // 3 is the largest non-empty bucket and has 1 element. | |
484 CHECK_EQ(3, re1.ProgramFanout(&histogram)); | |
485 CHECK_EQ(1, histogram[3]); | |
486 | |
487 // 7 is the largest non-empty bucket and has 10 elements. | |
488 CHECK_EQ(7, re10.ProgramFanout(&histogram)); | |
489 CHECK_EQ(10, histogram[7]); | |
490 | |
491 // 10 is the largest non-empty bucket and has 100 elements. | |
492 CHECK_EQ(10, re100.ProgramFanout(&histogram)); | |
493 CHECK_EQ(100, histogram[10]); | |
494 | |
495 // 13 is the largest non-empty bucket and has 1000 elements. | |
496 CHECK_EQ(13, re1000.ProgramFanout(&histogram)); | |
497 CHECK_EQ(1000, histogram[13]); | |
498 } | |
499 | |
500 // Issue 956519: handling empty character sets was | 479 // Issue 956519: handling empty character sets was |
501 // causing NULL dereference. This tests a few empty character sets. | 480 // causing NULL dereference. This tests a few empty character sets. |
502 // (The way to get an empty character set is to negate a full one.) | 481 // (The way to get an empty character set is to negate a full one.) |
503 TEST(EmptyCharset, Fuzz) { | 482 TEST(EmptyCharset, Fuzz) { |
504 static const char *empties[] = { | 483 static const char *empties[] = { |
505 "[^\\S\\s]", | 484 "[^\\S\\s]", |
506 "[^\\S[:space:]]", | 485 "[^\\S[:space:]]", |
507 "[^\\D\\d]", | 486 "[^\\D\\d]", |
508 "[^\\D[:digit:]]" | 487 "[^\\D[:digit:]]" |
509 }; | 488 }; |
510 for (int i = 0; i < arraysize(empties); i++) | 489 for (int i = 0; i < arraysize(empties); i++) |
511 CHECK(!RE2(empties[i]).Match("abc", 0, 3, RE2::UNANCHORED, NULL, 0)); | 490 CHECK(!RE2(empties[i]).Match("abc", 0, 3, RE2::UNANCHORED, NULL, 0)); |
512 } | 491 } |
513 | 492 |
514 // Bitstate assumes that kInstFail instructions in | |
515 // alternations or capture groups have been "compiled away". | |
516 TEST(EmptyCharset, BitstateAssumptions) { | |
517 // Captures trigger use of Bitstate. | |
518 static const char *nop_empties[] = { | |
519 "((((()))))" "[^\\S\\s]?", | |
520 "((((()))))" "([^\\S\\s])?", | |
521 "((((()))))" "([^\\S\\s]|[^\\S\\s])?", | |
522 "((((()))))" "(([^\\S\\s]|[^\\S\\s])|)" | |
523 }; | |
524 StringPiece group[6]; | |
525 for (int i = 0; i < arraysize(nop_empties); i++) | |
526 CHECK(RE2(nop_empties[i]).Match("", 0, 0, RE2::UNANCHORED, group, 6)); | |
527 } | |
528 | |
529 // Test that named groups work correctly. | 493 // Test that named groups work correctly. |
530 TEST(Capture, NamedGroups) { | 494 TEST(Capture, NamedGroups) { |
531 { | 495 { |
532 RE2 re("(hello world)"); | 496 RE2 re("(hello world)"); |
533 CHECK_EQ(re.NumberOfCapturingGroups(), 1); | 497 CHECK_EQ(re.NumberOfCapturingGroups(), 1); |
534 const map<string, int>& m = re.NamedCapturingGroups(); | 498 const map<string, int>& m = re.NamedCapturingGroups(); |
535 CHECK_EQ(m.size(), 0); | 499 CHECK_EQ(m.size(), 0); |
536 } | 500 } |
537 | 501 |
538 { | 502 { |
539 RE2 re("(?P<A>expr(?P<B>expr)(?P<C>expr))((expr)(?P<D>expr))"); | 503 RE2 re("(?P<A>expr(?P<B>expr)(?P<C>expr))((expr)(?P<D>expr))"); |
540 CHECK_EQ(re.NumberOfCapturingGroups(), 6); | 504 CHECK_EQ(re.NumberOfCapturingGroups(), 6); |
541 const map<string, int>& m = re.NamedCapturingGroups(); | 505 const map<string, int>& m = re.NamedCapturingGroups(); |
542 CHECK_EQ(m.size(), 4); | 506 CHECK_EQ(m.size(), 4); |
543 CHECK_EQ(m.find("A")->second, 1); | 507 CHECK_EQ(m.find("A")->second, 1); |
544 CHECK_EQ(m.find("B")->second, 2); | 508 CHECK_EQ(m.find("B")->second, 2); |
545 CHECK_EQ(m.find("C")->second, 3); | 509 CHECK_EQ(m.find("C")->second, 3); |
546 CHECK_EQ(m.find("D")->second, 6); // $4 and $5 are anonymous | 510 CHECK_EQ(m.find("D")->second, 6); // $4 and $5 are anonymous |
547 } | 511 } |
548 } | 512 } |
549 | 513 |
550 TEST(RE2, CapturedGroupTest) { | |
551 RE2 re("directions from (?P<S>.*) to (?P<D>.*)"); | |
552 int num_groups = re.NumberOfCapturingGroups(); | |
553 EXPECT_EQ(2, num_groups); | |
554 string args[4]; | |
555 RE2::Arg arg0(&args[0]); | |
556 RE2::Arg arg1(&args[1]); | |
557 RE2::Arg arg2(&args[2]); | |
558 RE2::Arg arg3(&args[3]); | |
559 | |
560 const RE2::Arg* const matches[4] = {&arg0, &arg1, &arg2, &arg3}; | |
561 EXPECT_TRUE(RE2::FullMatchN("directions from mountain view to san jose", | |
562 re, matches, num_groups)); | |
563 const map<string, int>& named_groups = re.NamedCapturingGroups(); | |
564 EXPECT_TRUE(named_groups.find("S") != named_groups.end()); | |
565 EXPECT_TRUE(named_groups.find("D") != named_groups.end()); | |
566 | |
567 // The named group index is 1-based. | |
568 int source_group_index = named_groups.find("S")->second; | |
569 int destination_group_index = named_groups.find("D")->second; | |
570 EXPECT_EQ(1, source_group_index); | |
571 EXPECT_EQ(2, destination_group_index); | |
572 | |
573 // The args is zero-based. | |
574 EXPECT_EQ("mountain view", args[source_group_index - 1]); | |
575 EXPECT_EQ("san jose", args[destination_group_index - 1]); | |
576 } | |
577 | |
578 TEST(RE2, FullMatchWithNoArgs) { | 514 TEST(RE2, FullMatchWithNoArgs) { |
579 CHECK(RE2::FullMatch("h", "h")); | 515 CHECK(RE2::FullMatch("h", "h")); |
580 CHECK(RE2::FullMatch("hello", "hello")); | 516 CHECK(RE2::FullMatch("hello", "hello")); |
581 CHECK(RE2::FullMatch("hello", "h.*o")); | 517 CHECK(RE2::FullMatch("hello", "h.*o")); |
582 CHECK(!RE2::FullMatch("othello", "h.*o")); // Must be anchored at front | 518 CHECK(!RE2::FullMatch("othello", "h.*o")); // Must be anchored at front |
583 CHECK(!RE2::FullMatch("hello!", "h.*o")); // Must be anchored at end | 519 CHECK(!RE2::FullMatch("hello!", "h.*o")); // Must be anchored at end |
584 } | 520 } |
585 | 521 |
586 TEST(RE2, PartialMatch) { | 522 TEST(RE2, PartialMatch) { |
587 CHECK(RE2::PartialMatch("x", "x")); | 523 CHECK(RE2::PartialMatch("x", "x")); |
(...skipping 133 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
721 CHECK(RE2::FullMatch("123.4567890123456", "(.*)", (float*)NULL)); | 657 CHECK(RE2::FullMatch("123.4567890123456", "(.*)", (float*)NULL)); |
722 | 658 |
723 // Fail on non-void* NULL arg if the match doesn't parse for the given type. | 659 // Fail on non-void* NULL arg if the match doesn't parse for the given type. |
724 CHECK(!RE2::FullMatch("hello", "h(.*)lo", &s, (char*)NULL)); | 660 CHECK(!RE2::FullMatch("hello", "h(.*)lo", &s, (char*)NULL)); |
725 CHECK(!RE2::FullMatch("hello", "(.*)", (int*)NULL)); | 661 CHECK(!RE2::FullMatch("hello", "(.*)", (int*)NULL)); |
726 CHECK(!RE2::FullMatch("1234567890123456", "(.*)", (int*)NULL)); | 662 CHECK(!RE2::FullMatch("1234567890123456", "(.*)", (int*)NULL)); |
727 CHECK(!RE2::FullMatch("hello", "(.*)", (double*)NULL)); | 663 CHECK(!RE2::FullMatch("hello", "(.*)", (double*)NULL)); |
728 CHECK(!RE2::FullMatch("hello", "(.*)", (float*)NULL)); | 664 CHECK(!RE2::FullMatch("hello", "(.*)", (float*)NULL)); |
729 } | 665 } |
730 | 666 |
| 667 #ifndef WIN32 |
731 // Check that numeric parsing code does not read past the end of | 668 // Check that numeric parsing code does not read past the end of |
732 // the number being parsed. | 669 // the number being parsed. |
733 // This implementation requires mmap(2) et al. and thus cannot | |
734 // be used unless they are available. | |
735 TEST(RE2, NULTerminated) { | 670 TEST(RE2, NULTerminated) { |
736 #if defined(_POSIX_MAPPED_FILES) && _POSIX_MAPPED_FILES > 0 | |
737 char *v; | 671 char *v; |
738 int x; | 672 int x; |
739 long pagesize = sysconf(_SC_PAGE_SIZE); | 673 long pagesize = sysconf(_SC_PAGE_SIZE); |
740 | 674 |
741 #ifndef MAP_ANONYMOUS | 675 #ifndef MAP_ANONYMOUS |
742 #define MAP_ANONYMOUS MAP_ANON | 676 #define MAP_ANONYMOUS MAP_ANON |
743 #endif | 677 #endif |
744 v = static_cast<char*>(mmap(NULL, 2*pagesize, PROT_READ|PROT_WRITE, | 678 v = static_cast<char*>(mmap(NULL, 2*pagesize, PROT_READ|PROT_WRITE, |
745 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0)); | 679 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0)); |
746 CHECK(v != reinterpret_cast<char*>(-1)); | 680 CHECK(v != reinterpret_cast<char*>(-1)); |
747 LOG(INFO) << "Memory at " << (void*)v; | 681 LOG(INFO) << "Memory at " << (void*)v; |
748 CHECK_EQ(munmap(v + pagesize, pagesize), 0) << " error " << errno; | 682 CHECK_EQ(munmap(v + pagesize, pagesize), 0) << " error " << errno; |
749 v[pagesize - 1] = '1'; | 683 v[pagesize - 1] = '1'; |
750 | 684 |
751 x = 0; | 685 x = 0; |
752 CHECK(RE2::FullMatch(StringPiece(v + pagesize - 1, 1), "(.*)", &x)); | 686 CHECK(RE2::FullMatch(StringPiece(v + pagesize - 1, 1), "(.*)", &x)); |
753 CHECK_EQ(x, 1); | 687 CHECK_EQ(x, 1); |
| 688 } |
754 #endif | 689 #endif |
755 } | |
756 | 690 |
757 TEST(RE2, FullMatchTypeTests) { | 691 TEST(RE2, FullMatchTypeTests) { |
758 // Type tests | 692 // Type tests |
759 string zeros(1000, '0'); | 693 string zeros(100, '0'); |
760 { | 694 { |
761 char c; | 695 char c; |
762 CHECK(RE2::FullMatch("Hello", "(H)ello", &c)); | 696 CHECK(RE2::FullMatch("Hello", "(H)ello", &c)); |
763 CHECK_EQ(c, 'H'); | 697 CHECK_EQ(c, 'H'); |
764 } | 698 } |
765 { | 699 { |
766 unsigned char c; | 700 unsigned char c; |
767 CHECK(RE2::FullMatch("Hello", "(H)ello", &c)); | 701 CHECK(RE2::FullMatch("Hello", "(H)ello", &c)); |
768 CHECK_EQ(c, static_cast<unsigned char>('H')); | 702 CHECK_EQ(c, static_cast<unsigned char>('H')); |
769 } | 703 } |
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
851 snprintf(buf, sizeof(buf), "%llu", (long long unsigned)max); | 785 snprintf(buf, sizeof(buf), "%llu", (long long unsigned)max); |
852 CHECK(RE2::FullMatch(buf, "(-?\\d+)", &v)); CHECK_EQ(v, max); | 786 CHECK(RE2::FullMatch(buf, "(-?\\d+)", &v)); CHECK_EQ(v, max); |
853 | 787 |
854 assert(buf[strlen(buf)-1] != '9'); | 788 assert(buf[strlen(buf)-1] != '9'); |
855 buf[strlen(buf)-1]++; | 789 buf[strlen(buf)-1]++; |
856 CHECK(!RE2::FullMatch(buf, "(-?\\d+)", &v)); | 790 CHECK(!RE2::FullMatch(buf, "(-?\\d+)", &v)); |
857 } | 791 } |
858 } | 792 } |
859 | 793 |
860 TEST(RE2, FloatingPointFullMatchTypes) { | 794 TEST(RE2, FloatingPointFullMatchTypes) { |
861 string zeros(1000, '0'); | 795 string zeros(100, '0'); |
862 { | 796 { |
863 float v; | 797 float v; |
864 CHECK(RE2::FullMatch("100", "(.*)", &v)); CHECK_EQ(v, 100); | 798 CHECK(RE2::FullMatch("100", "(.*)", &v)); CHECK_EQ(v, 100); |
865 CHECK(RE2::FullMatch("-100.", "(.*)", &v)); CHECK_EQ(v, -100); | 799 CHECK(RE2::FullMatch("-100.", "(.*)", &v)); CHECK_EQ(v, -100); |
866 CHECK(RE2::FullMatch("1e23", "(.*)", &v)); CHECK_EQ(v, float(1e23)); | 800 CHECK(RE2::FullMatch("1e23", "(.*)", &v)); CHECK_EQ(v, float(1e23)); |
867 CHECK(RE2::FullMatch(" 100", "(.*)", &v)); CHECK_EQ(v, 100); | |
868 | 801 |
869 CHECK(RE2::FullMatch(zeros + "1e23", "(.*)", &v)); | 802 CHECK(RE2::FullMatch(zeros + "1e23", "(.*)", &v)); |
870 CHECK_EQ(v, float(1e23)); | 803 CHECK_EQ(v, float(1e23)); |
871 | 804 |
872 // 6700000000081920.1 is an edge case. | 805 // 6700000000081920.1 is an edge case. |
873 // 6700000000081920 is exactly halfway between | 806 // 6700000000081920 is exactly halfway between |
874 // two float32s, so the .1 should make it round up. | 807 // two float32s, so the .1 should make it round up. |
875 // However, the .1 is outside the precision possible with | 808 // However, the .1 is outside the precision possible with |
876 // a float64: the nearest float64 is 6700000000081920. | 809 // a float64: the nearest float64 is 6700000000081920. |
877 // So if the code uses strtod and then converts to float32, | 810 // So if the code uses strtod and then converts to float32, |
(...skipping 179 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1057 CHECK(re.error().empty()); // Must have no error | 990 CHECK(re.error().empty()); // Must have no error |
1058 CHECK(re.ok()); | 991 CHECK(re.ok()); |
1059 CHECK(re.error_code() == RE2::NoError); | 992 CHECK(re.error_code() == RE2::NoError); |
1060 } | 993 } |
1061 } | 994 } |
1062 | 995 |
1063 TEST(RE2, UTF8) { | 996 TEST(RE2, UTF8) { |
1064 // Check UTF-8 handling | 997 // Check UTF-8 handling |
1065 // Three Japanese characters (nihongo) | 998 // Three Japanese characters (nihongo) |
1066 const char utf8_string[] = { | 999 const char utf8_string[] = { |
1067 (char)0xe6, (char)0x97, (char)0xa5, // 65e5 | 1000 0xe6, 0x97, 0xa5, // 65e5 |
1068 (char)0xe6, (char)0x9c, (char)0xac, // 627c | 1001 0xe6, 0x9c, 0xac, // 627c |
1069 (char)0xe8, (char)0xaa, (char)0x9e, // 8a9e | 1002 0xe8, 0xaa, 0x9e, // 8a9e |
1070 0 | 1003 0 |
1071 }; | 1004 }; |
1072 const char utf8_pattern[] = { | 1005 const char utf8_pattern[] = { |
1073 '.', | 1006 '.', |
1074 (char)0xe6, (char)0x9c, (char)0xac, // 627c | 1007 0xe6, 0x9c, 0xac, // 627c |
1075 '.', | 1008 '.', |
1076 0 | 1009 0 |
1077 }; | 1010 }; |
1078 | 1011 |
1079 // Both should match in either mode, bytes or UTF-8 | 1012 // Both should match in either mode, bytes or UTF-8 |
1080 RE2 re_test1(".........", RE2::Latin1); | 1013 RE2 re_test1(".........", RE2::Latin1); |
1081 CHECK(RE2::FullMatch(utf8_string, re_test1)); | 1014 CHECK(RE2::FullMatch(utf8_string, re_test1)); |
1082 RE2 re_test2("..."); | 1015 RE2 re_test2("..."); |
1083 CHECK(RE2::FullMatch(utf8_string, re_test2)); | 1016 CHECK(RE2::FullMatch(utf8_string, re_test2)); |
1084 | 1017 |
(...skipping 228 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1313 if (t.match == NULL) { | 1246 if (t.match == NULL) { |
1314 EXPECT_FALSE(re.PartialMatch(t.text, re)); | 1247 EXPECT_FALSE(re.PartialMatch(t.text, re)); |
1315 } else { | 1248 } else { |
1316 StringPiece m; | 1249 StringPiece m; |
1317 EXPECT_TRUE(re.PartialMatch(t.text, re, &m)); | 1250 EXPECT_TRUE(re.PartialMatch(t.text, re, &m)); |
1318 EXPECT_EQ(m, t.match); | 1251 EXPECT_EQ(m, t.match); |
1319 } | 1252 } |
1320 } | 1253 } |
1321 } | 1254 } |
1322 | 1255 |
1323 // Check that dot_nl option works. | |
1324 TEST(RE2, DotNL) { | |
1325 RE2::Options opt; | |
1326 opt.set_dot_nl(true); | |
1327 EXPECT_TRUE(RE2::PartialMatch("\n", RE2(".", opt))); | |
1328 EXPECT_FALSE(RE2::PartialMatch("\n", RE2("(?-s).", opt))); | |
1329 opt.set_never_nl(true); | |
1330 EXPECT_FALSE(RE2::PartialMatch("\n", RE2(".", opt))); | |
1331 } | |
1332 | |
1333 // Check that there are no capturing groups in "never capture" mode. | 1256 // Check that there are no capturing groups in "never capture" mode. |
1334 TEST(RE2, NeverCapture) { | 1257 TEST(RE2, NeverCapture) { |
1335 RE2::Options opt; | 1258 RE2::Options opt; |
1336 opt.set_never_capture(true); | 1259 opt.set_never_capture(true); |
1337 RE2 re("(r)(e)", opt); | 1260 RE2 re("(r)(e)", opt); |
1338 EXPECT_EQ(0, re.NumberOfCapturingGroups()); | 1261 EXPECT_EQ(0, re.NumberOfCapturingGroups()); |
1339 } | 1262 } |
1340 | 1263 |
1341 // Bitstate bug was looking at submatch[0] even if nsubmatch == 0. | 1264 // Bitstate bug was looking at submatch[0] even if nsubmatch == 0. |
1342 // Triggered by a failed DFA search falling back to Bitstate when | 1265 // Triggered by a failed DFA search falling back to Bitstate when |
(...skipping 104 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1447 EXPECT_EQ(want, have); | 1370 EXPECT_EQ(want, have); |
1448 } | 1371 } |
1449 | 1372 |
1450 TEST(RE2, RegexpToStringLossOfAnchor) { | 1373 TEST(RE2, RegexpToStringLossOfAnchor) { |
1451 EXPECT_EQ(RE2("^[a-c]at", RE2::POSIX).Regexp()->ToString(), "^[a-c]at"); | 1374 EXPECT_EQ(RE2("^[a-c]at", RE2::POSIX).Regexp()->ToString(), "^[a-c]at"); |
1452 EXPECT_EQ(RE2("^[a-c]at").Regexp()->ToString(), "(?-m:^)[a-c]at"); | 1375 EXPECT_EQ(RE2("^[a-c]at").Regexp()->ToString(), "(?-m:^)[a-c]at"); |
1453 EXPECT_EQ(RE2("ca[t-z]$", RE2::POSIX).Regexp()->ToString(), "ca[t-z]$"); | 1376 EXPECT_EQ(RE2("ca[t-z]$", RE2::POSIX).Regexp()->ToString(), "ca[t-z]$"); |
1454 EXPECT_EQ(RE2("ca[t-z]$").Regexp()->ToString(), "ca[t-z](?-m:$)"); | 1377 EXPECT_EQ(RE2("ca[t-z]$").Regexp()->ToString(), "ca[t-z](?-m:$)"); |
1455 } | 1378 } |
1456 | 1379 |
1457 // Issue 10131674 | |
1458 TEST(RE2, Bug10131674) { | |
1459 // Some of these escapes describe values that do not fit in a byte. | |
1460 RE2 re("\\140\\440\\174\\271\\150\\656\\106\\201\\004\\332", RE2::Latin1); | |
1461 EXPECT_FALSE(re.ok()); | |
1462 EXPECT_FALSE(RE2::FullMatch("hello world", re)); | |
1463 } | |
1464 | |
1465 TEST(RE2, Bug18391750) { | |
1466 // Stray write past end of match_ in nfa.cc, caught by fuzzing + address sanit
izer. | |
1467 const char t[] = { | |
1468 (char)0x28, (char)0x28, (char)0xfc, (char)0xfc, (char)0x08, (char)0x08, | |
1469 (char)0x26, (char)0x26, (char)0x28, (char)0xc2, (char)0x9b, (char)0xc5, | |
1470 (char)0xc5, (char)0xd4, (char)0x8f, (char)0x8f, (char)0x69, (char)0x69, | |
1471 (char)0xe7, (char)0x29, (char)0x7b, (char)0x37, (char)0x31, (char)0x31, | |
1472 (char)0x7d, (char)0xae, (char)0x7c, (char)0x7c, (char)0xf3, (char)0x29, | |
1473 (char)0xae, (char)0xae, (char)0x2e, (char)0x2a, (char)0x29, (char)0x00, | |
1474 }; | |
1475 RE2::Options opt; | |
1476 opt.set_encoding(RE2::Options::EncodingLatin1); | |
1477 opt.set_longest_match(true); | |
1478 opt.set_dot_nl(true); | |
1479 opt.set_case_sensitive(false); | |
1480 RE2 re(t, opt); | |
1481 CHECK(re.ok()); | |
1482 RE2::PartialMatch(t, re); | |
1483 } | |
1484 | |
1485 TEST(RE2, Bug18458852) { | |
1486 // Bug in parser accepting invalid (too large) rune, | |
1487 // causing compiler to fail in DCHECK in UTF-8 | |
1488 // character class code. | |
1489 const char b[] = { | |
1490 (char)0x28, (char)0x05, (char)0x05, (char)0x41, (char)0x41, (char)0x28, | |
1491 (char)0x24, (char)0x5b, (char)0x5e, (char)0xf5, (char)0x87, (char)0x87, | |
1492 (char)0x90, (char)0x29, (char)0x5d, (char)0x29, (char)0x29, (char)0x00, | |
1493 }; | |
1494 RE2 re(b); | |
1495 CHECK(!re.ok()); | |
1496 } | |
1497 | |
1498 TEST(RE2, Bug18523943) { | |
1499 // Bug in bitstate: case kFailInst was merged into the default with LOG(DFATAL
). | |
1500 | |
1501 RE2::Options opt; | |
1502 const char a[] = { | |
1503 (char)0x29, (char)0x29, (char)0x24, (char)0x00, | |
1504 }; | |
1505 const char b[] = { | |
1506 (char)0x28, (char)0x0a, (char)0x2a, (char)0x2a, (char)0x29, (char)0x00, | |
1507 }; | |
1508 opt.set_log_errors(false); | |
1509 opt.set_encoding(RE2::Options::EncodingLatin1); | |
1510 opt.set_posix_syntax(true); | |
1511 opt.set_longest_match(true); | |
1512 opt.set_literal(false); | |
1513 opt.set_never_nl(true); | |
1514 | |
1515 RE2 re((const char*)b, opt); | |
1516 CHECK(re.ok()); | |
1517 string s1; | |
1518 CHECK(!RE2::PartialMatch((const char*)a, re, &s1)); | |
1519 } | |
1520 | |
1521 TEST(RE2, Bug21371806) { | |
1522 // Bug in parser accepting Unicode groups in Latin-1 mode, | |
1523 // causing compiler to fail in DCHECK in prog.cc. | |
1524 | |
1525 RE2::Options opt; | |
1526 opt.set_encoding(RE2::Options::EncodingLatin1); | |
1527 | |
1528 RE2 re("g\\p{Zl}]", opt); | |
1529 CHECK(re.ok()); | |
1530 } | |
1531 | |
1532 } // namespace re2 | 1380 } // namespace re2 |
OLD | NEW |