Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(100)

Side by Side Diff: third_party/re2/re2/testing/re2_test.cc

Issue 1516543002: Update re2 (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Remove sparse-array-valgrind.patch Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // -*- coding: utf-8 -*- 1 // -*- coding: utf-8 -*-
2 // Copyright 2002-2009 The RE2 Authors. All Rights Reserved. 2 // Copyright 2002-2009 The RE2 Authors. All Rights Reserved.
3 // Use of this source code is governed by a BSD-style 3 // Use of this source code is governed by a BSD-style
4 // license that can be found in the LICENSE file. 4 // license that can be found in the LICENSE file.
5 5
6 // TODO: Test extractions for PartialMatch/Consume 6 // TODO: Test extractions for PartialMatch/Consume
7 7
8 #include <sys/types.h> 8 #include <errno.h>
9 #ifndef WIN32 9 #ifndef _MSC_VER
10 #include <unistd.h> /* for sysconf */
10 #include <sys/mman.h> 11 #include <sys/mman.h>
11 #endif 12 #endif
12 #include <sys/stat.h> 13 #include <sys/stat.h>
13 #include <errno.h> 14 #include <sys/types.h>
14 #include <vector> 15 #include <vector>
15 #include "util/test.h" 16 #include "util/test.h"
16 #include "re2/re2.h" 17 #include "re2/re2.h"
17 #include "re2/regexp.h" 18 #include "re2/regexp.h"
18 19
19 #ifdef WIN32
20 #include <stdio.h>
21 #define snprintf _snprintf
22 #endif
23
24 DECLARE_bool(logtostderr); 20 DECLARE_bool(logtostderr);
25 21
26 namespace re2 { 22 namespace re2 {
27 23
28 TEST(RE2, HexTests) { 24 TEST(RE2, HexTests) {
29 25
30 VLOG(1) << "hex tests"; 26 VLOG(1) << "hex tests";
31 27
32 #define CHECK_HEX(type, value) \ 28 #define CHECK_HEX(type, value) \
33 do { \ 29 do { \
(...skipping 139 matching lines...) Expand 10 before | Expand all | Expand 10 after
173 // Check newline handling 169 // Check newline handling
174 { "a.*a", 170 { "a.*a",
175 "(\\0)", 171 "(\\0)",
176 "aba\naba", 172 "aba\naba",
177 "(aba)\naba", 173 "(aba)\naba",
178 "(aba)\n(aba)", 174 "(aba)\n(aba)",
179 2 }, 175 2 },
180 { "", NULL, NULL, NULL, NULL, 0 } 176 { "", NULL, NULL, NULL, NULL, 0 }
181 }; 177 };
182 178
183 for (const ReplaceTest *t = tests; t->original != NULL; ++t) { 179 for (const ReplaceTest* t = tests; t->original != NULL; t++) {
184 VLOG(1) << StringPrintf("\"%s\" =~ s/%s/%s/g", t->original, t->regexp, t->re write); 180 VLOG(1) << StringPrintf("\"%s\" =~ s/%s/%s/g", t->original, t->regexp, t->re write);
185 string one(t->original); 181 string one(t->original);
186 CHECK(RE2::Replace(&one, t->regexp, t->rewrite)); 182 CHECK(RE2::Replace(&one, t->regexp, t->rewrite));
187 CHECK_EQ(one, t->single); 183 CHECK_EQ(one, t->single);
188 string all(t->original); 184 string all(t->original);
189 CHECK_EQ(RE2::GlobalReplace(&all, t->regexp, t->rewrite), t->greplace_count) 185 CHECK_EQ(RE2::GlobalReplace(&all, t->regexp, t->rewrite), t->greplace_count)
190 << "Got: " << all; 186 << "Got: " << all;
191 CHECK_EQ(all, t->global); 187 CHECK_EQ(all, t->global);
192 } 188 }
193 } 189 }
(...skipping 172 matching lines...) Expand 10 before | Expand all | Expand 10 after
366 CHECK_EQ(group[3], "9000"); 362 CHECK_EQ(group[3], "9000");
367 363
368 string all, host; 364 string all, host;
369 int port; 365 int port;
370 CHECK(RE2::PartialMatch("a chrisr:9000 here", re, &all, &host, &port)); 366 CHECK(RE2::PartialMatch("a chrisr:9000 here", re, &all, &host, &port));
371 CHECK_EQ(all, "chrisr:9000"); 367 CHECK_EQ(all, "chrisr:9000");
372 CHECK_EQ(host, "chrisr"); 368 CHECK_EQ(host, "chrisr");
373 CHECK_EQ(port, 9000); 369 CHECK_EQ(port, 9000);
374 } 370 }
375 371
376 static void TestRecursion(int size, const char *pattern) { 372 static void TestRecursion(int size, const char* pattern) {
377 // Fill up a string repeating the pattern given 373 // Fill up a string repeating the pattern given
378 string domain; 374 string domain;
379 domain.resize(size); 375 domain.resize(size);
380 int patlen = strlen(pattern); 376 size_t patlen = strlen(pattern);
381 for (int i = 0; i < size; ++i) { 377 for (int i = 0; i < size; i++) {
382 domain[i] = pattern[i % patlen]; 378 domain[i] = pattern[i % patlen];
383 } 379 }
384 // Just make sure it doesn't crash due to too much recursion. 380 // Just make sure it doesn't crash due to too much recursion.
385 RE2 re("([a-zA-Z0-9]|-)+(\\.([a-zA-Z0-9]|-)+)*(\\.)?", RE2::Quiet); 381 RE2 re("([a-zA-Z0-9]|-)+(\\.([a-zA-Z0-9]|-)+)*(\\.)?", RE2::Quiet);
386 RE2::FullMatch(domain, re); 382 RE2::FullMatch(domain, re);
387 } 383 }
388 384
389 // A meta-quoted string, interpreted as a pattern, should always match 385 // A meta-quoted string, interpreted as a pattern, should always match
390 // the original unquoted string. 386 // the original unquoted string.
391 static void TestQuoteMeta(string unquoted, 387 static void TestQuoteMeta(string unquoted,
392 const RE2::Options& options = RE2::DefaultOptions) { 388 const RE2::Options& options = RE2::DefaultOptions) {
393 string quoted = RE2::QuoteMeta(unquoted); 389 string quoted = RE2::QuoteMeta(unquoted);
394 RE2 re(quoted, options); 390 RE2 re(quoted, options);
395 EXPECT_TRUE_M(RE2::FullMatch(unquoted, re), 391 EXPECT_TRUE(RE2::FullMatch(unquoted, re))
396 "Unquoted='" + unquoted + "', quoted='" + quoted + "'."); 392 << "Unquoted='" << unquoted << "', quoted='" << quoted << "'.";
397 } 393 }
398 394
399 // A meta-quoted string, interpreted as a pattern, should always match 395 // A meta-quoted string, interpreted as a pattern, should always match
400 // the original unquoted string. 396 // the original unquoted string.
401 static void NegativeTestQuoteMeta(string unquoted, string should_not_match, 397 static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
402 const RE2::Options& options = RE2::DefaultOpti ons) { 398 const RE2::Options& options = RE2::DefaultOpti ons) {
403 string quoted = RE2::QuoteMeta(unquoted); 399 string quoted = RE2::QuoteMeta(unquoted);
404 RE2 re(quoted, options); 400 RE2 re(quoted, options);
405 EXPECT_FALSE_M(RE2::FullMatch(should_not_match, re), 401 EXPECT_FALSE(RE2::FullMatch(should_not_match, re))
406 "Unquoted='" + unquoted + "', quoted='" + quoted + "'."); 402 << "Unquoted='" << unquoted << "', quoted='" << quoted << "'.";
407 } 403 }
408 404
409 // Tests that quoted meta characters match their original strings, 405 // Tests that quoted meta characters match their original strings,
410 // and that a few things that shouldn't match indeed do not. 406 // and that a few things that shouldn't match indeed do not.
411 TEST(QuoteMeta, Simple) { 407 TEST(QuoteMeta, Simple) {
412 TestQuoteMeta("foo"); 408 TestQuoteMeta("foo");
413 TestQuoteMeta("foo.bar"); 409 TestQuoteMeta("foo.bar");
414 TestQuoteMeta("foo\\.bar"); 410 TestQuoteMeta("foo\\.bar");
415 TestQuoteMeta("[1-9]"); 411 TestQuoteMeta("[1-9]");
416 TestQuoteMeta("1.5-2.0?"); 412 TestQuoteMeta("1.5-2.0?");
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
462 458
463 // Don't want null-followed-by-'1' to be interpreted as '\01'. 459 // Don't want null-followed-by-'1' to be interpreted as '\01'.
464 has_null += '1'; 460 has_null += '1';
465 TestQuoteMeta(has_null); 461 TestQuoteMeta(has_null);
466 NegativeTestQuoteMeta(has_null, "\1"); 462 NegativeTestQuoteMeta(has_null, "\1");
467 } 463 }
468 464
469 TEST(ProgramSize, BigProgram) { 465 TEST(ProgramSize, BigProgram) {
470 RE2 re_simple("simple regexp"); 466 RE2 re_simple("simple regexp");
471 RE2 re_medium("medium.*regexp"); 467 RE2 re_medium("medium.*regexp");
472 RE2 re_complex("hard.{1,128}regexp"); 468 RE2 re_complex("complex.{1,128}regexp");
473 469
474 CHECK_GT(re_simple.ProgramSize(), 0); 470 CHECK_GT(re_simple.ProgramSize(), 0);
475 CHECK_GT(re_medium.ProgramSize(), re_simple.ProgramSize()); 471 CHECK_GT(re_medium.ProgramSize(), re_simple.ProgramSize());
476 CHECK_GT(re_complex.ProgramSize(), re_medium.ProgramSize()); 472 CHECK_GT(re_complex.ProgramSize(), re_medium.ProgramSize());
477 } 473 }
478 474
475 TEST(ProgramFanout, BigProgram) {
476 RE2 re1("(?:(?:(?:(?:(?:.)?){1})*)+)");
477 RE2 re10("(?:(?:(?:(?:(?:.)?){10})*)+)");
478 RE2 re100("(?:(?:(?:(?:(?:.)?){100})*)+)");
479 RE2 re1000("(?:(?:(?:(?:(?:.)?){1000})*)+)");
480
481 map<int, int> histogram;
482
483 // 3 is the largest non-empty bucket and has 1 element.
484 CHECK_EQ(3, re1.ProgramFanout(&histogram));
485 CHECK_EQ(1, histogram[3]);
486
487 // 7 is the largest non-empty bucket and has 10 elements.
488 CHECK_EQ(7, re10.ProgramFanout(&histogram));
489 CHECK_EQ(10, histogram[7]);
490
491 // 10 is the largest non-empty bucket and has 100 elements.
492 CHECK_EQ(10, re100.ProgramFanout(&histogram));
493 CHECK_EQ(100, histogram[10]);
494
495 // 13 is the largest non-empty bucket and has 1000 elements.
496 CHECK_EQ(13, re1000.ProgramFanout(&histogram));
497 CHECK_EQ(1000, histogram[13]);
498 }
499
479 // Issue 956519: handling empty character sets was 500 // Issue 956519: handling empty character sets was
480 // causing NULL dereference. This tests a few empty character sets. 501 // causing NULL dereference. This tests a few empty character sets.
481 // (The way to get an empty character set is to negate a full one.) 502 // (The way to get an empty character set is to negate a full one.)
482 TEST(EmptyCharset, Fuzz) { 503 TEST(EmptyCharset, Fuzz) {
483 static const char *empties[] = { 504 static const char *empties[] = {
484 "[^\\S\\s]", 505 "[^\\S\\s]",
485 "[^\\S[:space:]]", 506 "[^\\S[:space:]]",
486 "[^\\D\\d]", 507 "[^\\D\\d]",
487 "[^\\D[:digit:]]" 508 "[^\\D[:digit:]]"
488 }; 509 };
489 for (int i = 0; i < arraysize(empties); i++) 510 for (int i = 0; i < arraysize(empties); i++)
490 CHECK(!RE2(empties[i]).Match("abc", 0, 3, RE2::UNANCHORED, NULL, 0)); 511 CHECK(!RE2(empties[i]).Match("abc", 0, 3, RE2::UNANCHORED, NULL, 0));
491 } 512 }
492 513
514 // Bitstate assumes that kInstFail instructions in
515 // alternations or capture groups have been "compiled away".
516 TEST(EmptyCharset, BitstateAssumptions) {
517 // Captures trigger use of Bitstate.
518 static const char *nop_empties[] = {
519 "((((()))))" "[^\\S\\s]?",
520 "((((()))))" "([^\\S\\s])?",
521 "((((()))))" "([^\\S\\s]|[^\\S\\s])?",
522 "((((()))))" "(([^\\S\\s]|[^\\S\\s])|)"
523 };
524 StringPiece group[6];
525 for (int i = 0; i < arraysize(nop_empties); i++)
526 CHECK(RE2(nop_empties[i]).Match("", 0, 0, RE2::UNANCHORED, group, 6));
527 }
528
493 // Test that named groups work correctly. 529 // Test that named groups work correctly.
494 TEST(Capture, NamedGroups) { 530 TEST(Capture, NamedGroups) {
495 { 531 {
496 RE2 re("(hello world)"); 532 RE2 re("(hello world)");
497 CHECK_EQ(re.NumberOfCapturingGroups(), 1); 533 CHECK_EQ(re.NumberOfCapturingGroups(), 1);
498 const map<string, int>& m = re.NamedCapturingGroups(); 534 const map<string, int>& m = re.NamedCapturingGroups();
499 CHECK_EQ(m.size(), 0); 535 CHECK_EQ(m.size(), 0);
500 } 536 }
501 537
502 { 538 {
503 RE2 re("(?P<A>expr(?P<B>expr)(?P<C>expr))((expr)(?P<D>expr))"); 539 RE2 re("(?P<A>expr(?P<B>expr)(?P<C>expr))((expr)(?P<D>expr))");
504 CHECK_EQ(re.NumberOfCapturingGroups(), 6); 540 CHECK_EQ(re.NumberOfCapturingGroups(), 6);
505 const map<string, int>& m = re.NamedCapturingGroups(); 541 const map<string, int>& m = re.NamedCapturingGroups();
506 CHECK_EQ(m.size(), 4); 542 CHECK_EQ(m.size(), 4);
507 CHECK_EQ(m.find("A")->second, 1); 543 CHECK_EQ(m.find("A")->second, 1);
508 CHECK_EQ(m.find("B")->second, 2); 544 CHECK_EQ(m.find("B")->second, 2);
509 CHECK_EQ(m.find("C")->second, 3); 545 CHECK_EQ(m.find("C")->second, 3);
510 CHECK_EQ(m.find("D")->second, 6); // $4 and $5 are anonymous 546 CHECK_EQ(m.find("D")->second, 6); // $4 and $5 are anonymous
511 } 547 }
512 } 548 }
513 549
550 TEST(RE2, CapturedGroupTest) {
551 RE2 re("directions from (?P<S>.*) to (?P<D>.*)");
552 int num_groups = re.NumberOfCapturingGroups();
553 EXPECT_EQ(2, num_groups);
554 string args[4];
555 RE2::Arg arg0(&args[0]);
556 RE2::Arg arg1(&args[1]);
557 RE2::Arg arg2(&args[2]);
558 RE2::Arg arg3(&args[3]);
559
560 const RE2::Arg* const matches[4] = {&arg0, &arg1, &arg2, &arg3};
561 EXPECT_TRUE(RE2::FullMatchN("directions from mountain view to san jose",
562 re, matches, num_groups));
563 const map<string, int>& named_groups = re.NamedCapturingGroups();
564 EXPECT_TRUE(named_groups.find("S") != named_groups.end());
565 EXPECT_TRUE(named_groups.find("D") != named_groups.end());
566
567 // The named group index is 1-based.
568 int source_group_index = named_groups.find("S")->second;
569 int destination_group_index = named_groups.find("D")->second;
570 EXPECT_EQ(1, source_group_index);
571 EXPECT_EQ(2, destination_group_index);
572
573 // The args is zero-based.
574 EXPECT_EQ("mountain view", args[source_group_index - 1]);
575 EXPECT_EQ("san jose", args[destination_group_index - 1]);
576 }
577
514 TEST(RE2, FullMatchWithNoArgs) { 578 TEST(RE2, FullMatchWithNoArgs) {
515 CHECK(RE2::FullMatch("h", "h")); 579 CHECK(RE2::FullMatch("h", "h"));
516 CHECK(RE2::FullMatch("hello", "hello")); 580 CHECK(RE2::FullMatch("hello", "hello"));
517 CHECK(RE2::FullMatch("hello", "h.*o")); 581 CHECK(RE2::FullMatch("hello", "h.*o"));
518 CHECK(!RE2::FullMatch("othello", "h.*o")); // Must be anchored at front 582 CHECK(!RE2::FullMatch("othello", "h.*o")); // Must be anchored at front
519 CHECK(!RE2::FullMatch("hello!", "h.*o")); // Must be anchored at end 583 CHECK(!RE2::FullMatch("hello!", "h.*o")); // Must be anchored at end
520 } 584 }
521 585
522 TEST(RE2, PartialMatch) { 586 TEST(RE2, PartialMatch) {
523 CHECK(RE2::PartialMatch("x", "x")); 587 CHECK(RE2::PartialMatch("x", "x"));
(...skipping 133 matching lines...) Expand 10 before | Expand all | Expand 10 after
657 CHECK(RE2::FullMatch("123.4567890123456", "(.*)", (float*)NULL)); 721 CHECK(RE2::FullMatch("123.4567890123456", "(.*)", (float*)NULL));
658 722
659 // Fail on non-void* NULL arg if the match doesn't parse for the given type. 723 // Fail on non-void* NULL arg if the match doesn't parse for the given type.
660 CHECK(!RE2::FullMatch("hello", "h(.*)lo", &s, (char*)NULL)); 724 CHECK(!RE2::FullMatch("hello", "h(.*)lo", &s, (char*)NULL));
661 CHECK(!RE2::FullMatch("hello", "(.*)", (int*)NULL)); 725 CHECK(!RE2::FullMatch("hello", "(.*)", (int*)NULL));
662 CHECK(!RE2::FullMatch("1234567890123456", "(.*)", (int*)NULL)); 726 CHECK(!RE2::FullMatch("1234567890123456", "(.*)", (int*)NULL));
663 CHECK(!RE2::FullMatch("hello", "(.*)", (double*)NULL)); 727 CHECK(!RE2::FullMatch("hello", "(.*)", (double*)NULL));
664 CHECK(!RE2::FullMatch("hello", "(.*)", (float*)NULL)); 728 CHECK(!RE2::FullMatch("hello", "(.*)", (float*)NULL));
665 } 729 }
666 730
667 #ifndef WIN32
668 // Check that numeric parsing code does not read past the end of 731 // Check that numeric parsing code does not read past the end of
669 // the number being parsed. 732 // the number being parsed.
733 // This implementation requires mmap(2) et al. and thus cannot
734 // be used unless they are available.
670 TEST(RE2, NULTerminated) { 735 TEST(RE2, NULTerminated) {
736 #if defined(_POSIX_MAPPED_FILES) && _POSIX_MAPPED_FILES > 0
671 char *v; 737 char *v;
672 int x; 738 int x;
673 long pagesize = sysconf(_SC_PAGE_SIZE); 739 long pagesize = sysconf(_SC_PAGE_SIZE);
674 740
675 #ifndef MAP_ANONYMOUS 741 #ifndef MAP_ANONYMOUS
676 #define MAP_ANONYMOUS MAP_ANON 742 #define MAP_ANONYMOUS MAP_ANON
677 #endif 743 #endif
678 v = static_cast<char*>(mmap(NULL, 2*pagesize, PROT_READ|PROT_WRITE, 744 v = static_cast<char*>(mmap(NULL, 2*pagesize, PROT_READ|PROT_WRITE,
679 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0)); 745 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0));
680 CHECK(v != reinterpret_cast<char*>(-1)); 746 CHECK(v != reinterpret_cast<char*>(-1));
681 LOG(INFO) << "Memory at " << (void*)v; 747 LOG(INFO) << "Memory at " << (void*)v;
682 CHECK_EQ(munmap(v + pagesize, pagesize), 0) << " error " << errno; 748 CHECK_EQ(munmap(v + pagesize, pagesize), 0) << " error " << errno;
683 v[pagesize - 1] = '1'; 749 v[pagesize - 1] = '1';
684 750
685 x = 0; 751 x = 0;
686 CHECK(RE2::FullMatch(StringPiece(v + pagesize - 1, 1), "(.*)", &x)); 752 CHECK(RE2::FullMatch(StringPiece(v + pagesize - 1, 1), "(.*)", &x));
687 CHECK_EQ(x, 1); 753 CHECK_EQ(x, 1);
754 #endif
688 } 755 }
689 #endif
690 756
691 TEST(RE2, FullMatchTypeTests) { 757 TEST(RE2, FullMatchTypeTests) {
692 // Type tests 758 // Type tests
693 string zeros(100, '0'); 759 string zeros(1000, '0');
694 { 760 {
695 char c; 761 char c;
696 CHECK(RE2::FullMatch("Hello", "(H)ello", &c)); 762 CHECK(RE2::FullMatch("Hello", "(H)ello", &c));
697 CHECK_EQ(c, 'H'); 763 CHECK_EQ(c, 'H');
698 } 764 }
699 { 765 {
700 unsigned char c; 766 unsigned char c;
701 CHECK(RE2::FullMatch("Hello", "(H)ello", &c)); 767 CHECK(RE2::FullMatch("Hello", "(H)ello", &c));
702 CHECK_EQ(c, static_cast<unsigned char>('H')); 768 CHECK_EQ(c, static_cast<unsigned char>('H'));
703 } 769 }
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after
785 snprintf(buf, sizeof(buf), "%llu", (long long unsigned)max); 851 snprintf(buf, sizeof(buf), "%llu", (long long unsigned)max);
786 CHECK(RE2::FullMatch(buf, "(-?\\d+)", &v)); CHECK_EQ(v, max); 852 CHECK(RE2::FullMatch(buf, "(-?\\d+)", &v)); CHECK_EQ(v, max);
787 853
788 assert(buf[strlen(buf)-1] != '9'); 854 assert(buf[strlen(buf)-1] != '9');
789 buf[strlen(buf)-1]++; 855 buf[strlen(buf)-1]++;
790 CHECK(!RE2::FullMatch(buf, "(-?\\d+)", &v)); 856 CHECK(!RE2::FullMatch(buf, "(-?\\d+)", &v));
791 } 857 }
792 } 858 }
793 859
794 TEST(RE2, FloatingPointFullMatchTypes) { 860 TEST(RE2, FloatingPointFullMatchTypes) {
795 string zeros(100, '0'); 861 string zeros(1000, '0');
796 { 862 {
797 float v; 863 float v;
798 CHECK(RE2::FullMatch("100", "(.*)", &v)); CHECK_EQ(v, 100); 864 CHECK(RE2::FullMatch("100", "(.*)", &v)); CHECK_EQ(v, 100);
799 CHECK(RE2::FullMatch("-100.", "(.*)", &v)); CHECK_EQ(v, -100); 865 CHECK(RE2::FullMatch("-100.", "(.*)", &v)); CHECK_EQ(v, -100);
800 CHECK(RE2::FullMatch("1e23", "(.*)", &v)); CHECK_EQ(v, float(1e23)); 866 CHECK(RE2::FullMatch("1e23", "(.*)", &v)); CHECK_EQ(v, float(1e23));
867 CHECK(RE2::FullMatch(" 100", "(.*)", &v)); CHECK_EQ(v, 100);
801 868
802 CHECK(RE2::FullMatch(zeros + "1e23", "(.*)", &v)); 869 CHECK(RE2::FullMatch(zeros + "1e23", "(.*)", &v));
803 CHECK_EQ(v, float(1e23)); 870 CHECK_EQ(v, float(1e23));
804 871
805 // 6700000000081920.1 is an edge case. 872 // 6700000000081920.1 is an edge case.
806 // 6700000000081920 is exactly halfway between 873 // 6700000000081920 is exactly halfway between
807 // two float32s, so the .1 should make it round up. 874 // two float32s, so the .1 should make it round up.
808 // However, the .1 is outside the precision possible with 875 // However, the .1 is outside the precision possible with
809 // a float64: the nearest float64 is 6700000000081920. 876 // a float64: the nearest float64 is 6700000000081920.
810 // So if the code uses strtod and then converts to float32, 877 // So if the code uses strtod and then converts to float32,
(...skipping 179 matching lines...) Expand 10 before | Expand all | Expand 10 after
990 CHECK(re.error().empty()); // Must have no error 1057 CHECK(re.error().empty()); // Must have no error
991 CHECK(re.ok()); 1058 CHECK(re.ok());
992 CHECK(re.error_code() == RE2::NoError); 1059 CHECK(re.error_code() == RE2::NoError);
993 } 1060 }
994 } 1061 }
995 1062
996 TEST(RE2, UTF8) { 1063 TEST(RE2, UTF8) {
997 // Check UTF-8 handling 1064 // Check UTF-8 handling
998 // Three Japanese characters (nihongo) 1065 // Three Japanese characters (nihongo)
999 const char utf8_string[] = { 1066 const char utf8_string[] = {
1000 0xe6, 0x97, 0xa5, // 65e5 1067 (char)0xe6, (char)0x97, (char)0xa5, // 65e5
1001 0xe6, 0x9c, 0xac, // 627c 1068 (char)0xe6, (char)0x9c, (char)0xac, // 627c
1002 0xe8, 0xaa, 0x9e, // 8a9e 1069 (char)0xe8, (char)0xaa, (char)0x9e, // 8a9e
1003 0 1070 0
1004 }; 1071 };
1005 const char utf8_pattern[] = { 1072 const char utf8_pattern[] = {
1006 '.', 1073 '.',
1007 0xe6, 0x9c, 0xac, // 627c 1074 (char)0xe6, (char)0x9c, (char)0xac, // 627c
1008 '.', 1075 '.',
1009 0 1076 0
1010 }; 1077 };
1011 1078
1012 // Both should match in either mode, bytes or UTF-8 1079 // Both should match in either mode, bytes or UTF-8
1013 RE2 re_test1(".........", RE2::Latin1); 1080 RE2 re_test1(".........", RE2::Latin1);
1014 CHECK(RE2::FullMatch(utf8_string, re_test1)); 1081 CHECK(RE2::FullMatch(utf8_string, re_test1));
1015 RE2 re_test2("..."); 1082 RE2 re_test2("...");
1016 CHECK(RE2::FullMatch(utf8_string, re_test2)); 1083 CHECK(RE2::FullMatch(utf8_string, re_test2));
1017 1084
(...skipping 228 matching lines...) Expand 10 before | Expand all | Expand 10 after
1246 if (t.match == NULL) { 1313 if (t.match == NULL) {
1247 EXPECT_FALSE(re.PartialMatch(t.text, re)); 1314 EXPECT_FALSE(re.PartialMatch(t.text, re));
1248 } else { 1315 } else {
1249 StringPiece m; 1316 StringPiece m;
1250 EXPECT_TRUE(re.PartialMatch(t.text, re, &m)); 1317 EXPECT_TRUE(re.PartialMatch(t.text, re, &m));
1251 EXPECT_EQ(m, t.match); 1318 EXPECT_EQ(m, t.match);
1252 } 1319 }
1253 } 1320 }
1254 } 1321 }
1255 1322
1323 // Check that dot_nl option works.
1324 TEST(RE2, DotNL) {
1325 RE2::Options opt;
1326 opt.set_dot_nl(true);
1327 EXPECT_TRUE(RE2::PartialMatch("\n", RE2(".", opt)));
1328 EXPECT_FALSE(RE2::PartialMatch("\n", RE2("(?-s).", opt)));
1329 opt.set_never_nl(true);
1330 EXPECT_FALSE(RE2::PartialMatch("\n", RE2(".", opt)));
1331 }
1332
1256 // Check that there are no capturing groups in "never capture" mode. 1333 // Check that there are no capturing groups in "never capture" mode.
1257 TEST(RE2, NeverCapture) { 1334 TEST(RE2, NeverCapture) {
1258 RE2::Options opt; 1335 RE2::Options opt;
1259 opt.set_never_capture(true); 1336 opt.set_never_capture(true);
1260 RE2 re("(r)(e)", opt); 1337 RE2 re("(r)(e)", opt);
1261 EXPECT_EQ(0, re.NumberOfCapturingGroups()); 1338 EXPECT_EQ(0, re.NumberOfCapturingGroups());
1262 } 1339 }
1263 1340
1264 // Bitstate bug was looking at submatch[0] even if nsubmatch == 0. 1341 // Bitstate bug was looking at submatch[0] even if nsubmatch == 0.
1265 // Triggered by a failed DFA search falling back to Bitstate when 1342 // Triggered by a failed DFA search falling back to Bitstate when
(...skipping 104 matching lines...) Expand 10 before | Expand all | Expand 10 after
1370 EXPECT_EQ(want, have); 1447 EXPECT_EQ(want, have);
1371 } 1448 }
1372 1449
1373 TEST(RE2, RegexpToStringLossOfAnchor) { 1450 TEST(RE2, RegexpToStringLossOfAnchor) {
1374 EXPECT_EQ(RE2("^[a-c]at", RE2::POSIX).Regexp()->ToString(), "^[a-c]at"); 1451 EXPECT_EQ(RE2("^[a-c]at", RE2::POSIX).Regexp()->ToString(), "^[a-c]at");
1375 EXPECT_EQ(RE2("^[a-c]at").Regexp()->ToString(), "(?-m:^)[a-c]at"); 1452 EXPECT_EQ(RE2("^[a-c]at").Regexp()->ToString(), "(?-m:^)[a-c]at");
1376 EXPECT_EQ(RE2("ca[t-z]$", RE2::POSIX).Regexp()->ToString(), "ca[t-z]$"); 1453 EXPECT_EQ(RE2("ca[t-z]$", RE2::POSIX).Regexp()->ToString(), "ca[t-z]$");
1377 EXPECT_EQ(RE2("ca[t-z]$").Regexp()->ToString(), "ca[t-z](?-m:$)"); 1454 EXPECT_EQ(RE2("ca[t-z]$").Regexp()->ToString(), "ca[t-z](?-m:$)");
1378 } 1455 }
1379 1456
1457 // Issue 10131674
1458 TEST(RE2, Bug10131674) {
1459 // Some of these escapes describe values that do not fit in a byte.
1460 RE2 re("\\140\\440\\174\\271\\150\\656\\106\\201\\004\\332", RE2::Latin1);
1461 EXPECT_FALSE(re.ok());
1462 EXPECT_FALSE(RE2::FullMatch("hello world", re));
1463 }
1464
1465 TEST(RE2, Bug18391750) {
1466 // Stray write past end of match_ in nfa.cc, caught by fuzzing + address sanit izer.
1467 const char t[] = {
1468 (char)0x28, (char)0x28, (char)0xfc, (char)0xfc, (char)0x08, (char)0x08,
1469 (char)0x26, (char)0x26, (char)0x28, (char)0xc2, (char)0x9b, (char)0xc5,
1470 (char)0xc5, (char)0xd4, (char)0x8f, (char)0x8f, (char)0x69, (char)0x69,
1471 (char)0xe7, (char)0x29, (char)0x7b, (char)0x37, (char)0x31, (char)0x31,
1472 (char)0x7d, (char)0xae, (char)0x7c, (char)0x7c, (char)0xf3, (char)0x29,
1473 (char)0xae, (char)0xae, (char)0x2e, (char)0x2a, (char)0x29, (char)0x00,
1474 };
1475 RE2::Options opt;
1476 opt.set_encoding(RE2::Options::EncodingLatin1);
1477 opt.set_longest_match(true);
1478 opt.set_dot_nl(true);
1479 opt.set_case_sensitive(false);
1480 RE2 re(t, opt);
1481 CHECK(re.ok());
1482 RE2::PartialMatch(t, re);
1483 }
1484
1485 TEST(RE2, Bug18458852) {
1486 // Bug in parser accepting invalid (too large) rune,
1487 // causing compiler to fail in DCHECK in UTF-8
1488 // character class code.
1489 const char b[] = {
1490 (char)0x28, (char)0x05, (char)0x05, (char)0x41, (char)0x41, (char)0x28,
1491 (char)0x24, (char)0x5b, (char)0x5e, (char)0xf5, (char)0x87, (char)0x87,
1492 (char)0x90, (char)0x29, (char)0x5d, (char)0x29, (char)0x29, (char)0x00,
1493 };
1494 RE2 re(b);
1495 CHECK(!re.ok());
1496 }
1497
1498 TEST(RE2, Bug18523943) {
1499 // Bug in bitstate: case kFailInst was merged into the default with LOG(DFATAL ).
1500
1501 RE2::Options opt;
1502 const char a[] = {
1503 (char)0x29, (char)0x29, (char)0x24, (char)0x00,
1504 };
1505 const char b[] = {
1506 (char)0x28, (char)0x0a, (char)0x2a, (char)0x2a, (char)0x29, (char)0x00,
1507 };
1508 opt.set_log_errors(false);
1509 opt.set_encoding(RE2::Options::EncodingLatin1);
1510 opt.set_posix_syntax(true);
1511 opt.set_longest_match(true);
1512 opt.set_literal(false);
1513 opt.set_never_nl(true);
1514
1515 RE2 re((const char*)b, opt);
1516 CHECK(re.ok());
1517 string s1;
1518 CHECK(!RE2::PartialMatch((const char*)a, re, &s1));
1519 }
1520
1521 TEST(RE2, Bug21371806) {
1522 // Bug in parser accepting Unicode groups in Latin-1 mode,
1523 // causing compiler to fail in DCHECK in prog.cc.
1524
1525 RE2::Options opt;
1526 opt.set_encoding(RE2::Options::EncodingLatin1);
1527
1528 RE2 re("g\\p{Zl}]", opt);
1529 CHECK(re.ok());
1530 }
1531
1380 } // namespace re2 1532 } // namespace re2
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698