OLD | NEW |
1 // -*- coding: utf-8 -*- | 1 // -*- coding: utf-8 -*- |
2 // Copyright 2002-2009 The RE2 Authors. All Rights Reserved. | 2 // Copyright 2002-2009 The RE2 Authors. All Rights Reserved. |
3 // Use of this source code is governed by a BSD-style | 3 // Use of this source code is governed by a BSD-style |
4 // license that can be found in the LICENSE file. | 4 // license that can be found in the LICENSE file. |
5 | 5 |
6 // TODO: Test extractions for PartialMatch/Consume | 6 // TODO: Test extractions for PartialMatch/Consume |
7 | 7 |
8 #include <sys/types.h> | 8 #include <errno.h> |
9 #ifndef WIN32 | 9 #ifndef _MSC_VER |
| 10 #include <unistd.h> /* for sysconf */ |
10 #include <sys/mman.h> | 11 #include <sys/mman.h> |
11 #endif | 12 #endif |
12 #include <sys/stat.h> | 13 #include <sys/stat.h> |
13 #include <errno.h> | 14 #include <sys/types.h> |
14 #include <vector> | 15 #include <vector> |
15 #include "util/test.h" | 16 #include "util/test.h" |
16 #include "re2/re2.h" | 17 #include "re2/re2.h" |
17 #include "re2/regexp.h" | 18 #include "re2/regexp.h" |
18 | 19 |
19 #ifdef WIN32 | |
20 #include <stdio.h> | |
21 #define snprintf _snprintf | |
22 #endif | |
23 | |
24 DECLARE_bool(logtostderr); | 20 DECLARE_bool(logtostderr); |
25 | 21 |
26 namespace re2 { | 22 namespace re2 { |
27 | 23 |
28 TEST(RE2, HexTests) { | 24 TEST(RE2, HexTests) { |
29 | 25 |
30 VLOG(1) << "hex tests"; | 26 VLOG(1) << "hex tests"; |
31 | 27 |
32 #define CHECK_HEX(type, value) \ | 28 #define CHECK_HEX(type, value) \ |
33 do { \ | 29 do { \ |
(...skipping 139 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
173 // Check newline handling | 169 // Check newline handling |
174 { "a.*a", | 170 { "a.*a", |
175 "(\\0)", | 171 "(\\0)", |
176 "aba\naba", | 172 "aba\naba", |
177 "(aba)\naba", | 173 "(aba)\naba", |
178 "(aba)\n(aba)", | 174 "(aba)\n(aba)", |
179 2 }, | 175 2 }, |
180 { "", NULL, NULL, NULL, NULL, 0 } | 176 { "", NULL, NULL, NULL, NULL, 0 } |
181 }; | 177 }; |
182 | 178 |
183 for (const ReplaceTest *t = tests; t->original != NULL; ++t) { | 179 for (const ReplaceTest* t = tests; t->original != NULL; t++) { |
184 VLOG(1) << StringPrintf("\"%s\" =~ s/%s/%s/g", t->original, t->regexp, t->re
write); | 180 VLOG(1) << StringPrintf("\"%s\" =~ s/%s/%s/g", t->original, t->regexp, t->re
write); |
185 string one(t->original); | 181 string one(t->original); |
186 CHECK(RE2::Replace(&one, t->regexp, t->rewrite)); | 182 CHECK(RE2::Replace(&one, t->regexp, t->rewrite)); |
187 CHECK_EQ(one, t->single); | 183 CHECK_EQ(one, t->single); |
188 string all(t->original); | 184 string all(t->original); |
189 CHECK_EQ(RE2::GlobalReplace(&all, t->regexp, t->rewrite), t->greplace_count) | 185 CHECK_EQ(RE2::GlobalReplace(&all, t->regexp, t->rewrite), t->greplace_count) |
190 << "Got: " << all; | 186 << "Got: " << all; |
191 CHECK_EQ(all, t->global); | 187 CHECK_EQ(all, t->global); |
192 } | 188 } |
193 } | 189 } |
(...skipping 172 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
366 CHECK_EQ(group[3], "9000"); | 362 CHECK_EQ(group[3], "9000"); |
367 | 363 |
368 string all, host; | 364 string all, host; |
369 int port; | 365 int port; |
370 CHECK(RE2::PartialMatch("a chrisr:9000 here", re, &all, &host, &port)); | 366 CHECK(RE2::PartialMatch("a chrisr:9000 here", re, &all, &host, &port)); |
371 CHECK_EQ(all, "chrisr:9000"); | 367 CHECK_EQ(all, "chrisr:9000"); |
372 CHECK_EQ(host, "chrisr"); | 368 CHECK_EQ(host, "chrisr"); |
373 CHECK_EQ(port, 9000); | 369 CHECK_EQ(port, 9000); |
374 } | 370 } |
375 | 371 |
376 static void TestRecursion(int size, const char *pattern) { | 372 static void TestRecursion(int size, const char* pattern) { |
377 // Fill up a string repeating the pattern given | 373 // Fill up a string repeating the pattern given |
378 string domain; | 374 string domain; |
379 domain.resize(size); | 375 domain.resize(size); |
380 int patlen = strlen(pattern); | 376 size_t patlen = strlen(pattern); |
381 for (int i = 0; i < size; ++i) { | 377 for (int i = 0; i < size; i++) { |
382 domain[i] = pattern[i % patlen]; | 378 domain[i] = pattern[i % patlen]; |
383 } | 379 } |
384 // Just make sure it doesn't crash due to too much recursion. | 380 // Just make sure it doesn't crash due to too much recursion. |
385 RE2 re("([a-zA-Z0-9]|-)+(\\.([a-zA-Z0-9]|-)+)*(\\.)?", RE2::Quiet); | 381 RE2 re("([a-zA-Z0-9]|-)+(\\.([a-zA-Z0-9]|-)+)*(\\.)?", RE2::Quiet); |
386 RE2::FullMatch(domain, re); | 382 RE2::FullMatch(domain, re); |
387 } | 383 } |
388 | 384 |
389 // A meta-quoted string, interpreted as a pattern, should always match | 385 // A meta-quoted string, interpreted as a pattern, should always match |
390 // the original unquoted string. | 386 // the original unquoted string. |
391 static void TestQuoteMeta(string unquoted, | 387 static void TestQuoteMeta(string unquoted, |
392 const RE2::Options& options = RE2::DefaultOptions) { | 388 const RE2::Options& options = RE2::DefaultOptions) { |
393 string quoted = RE2::QuoteMeta(unquoted); | 389 string quoted = RE2::QuoteMeta(unquoted); |
394 RE2 re(quoted, options); | 390 RE2 re(quoted, options); |
395 EXPECT_TRUE_M(RE2::FullMatch(unquoted, re), | 391 EXPECT_TRUE(RE2::FullMatch(unquoted, re)) |
396 "Unquoted='" + unquoted + "', quoted='" + quoted + "'."); | 392 << "Unquoted='" << unquoted << "', quoted='" << quoted << "'."; |
397 } | 393 } |
398 | 394 |
399 // A meta-quoted string, interpreted as a pattern, should always match | 395 // A meta-quoted string, interpreted as a pattern, should always match |
400 // the original unquoted string. | 396 // the original unquoted string. |
401 static void NegativeTestQuoteMeta(string unquoted, string should_not_match, | 397 static void NegativeTestQuoteMeta(string unquoted, string should_not_match, |
402 const RE2::Options& options = RE2::DefaultOpti
ons) { | 398 const RE2::Options& options = RE2::DefaultOpti
ons) { |
403 string quoted = RE2::QuoteMeta(unquoted); | 399 string quoted = RE2::QuoteMeta(unquoted); |
404 RE2 re(quoted, options); | 400 RE2 re(quoted, options); |
405 EXPECT_FALSE_M(RE2::FullMatch(should_not_match, re), | 401 EXPECT_FALSE(RE2::FullMatch(should_not_match, re)) |
406 "Unquoted='" + unquoted + "', quoted='" + quoted + "'."); | 402 << "Unquoted='" << unquoted << "', quoted='" << quoted << "'."; |
407 } | 403 } |
408 | 404 |
409 // Tests that quoted meta characters match their original strings, | 405 // Tests that quoted meta characters match their original strings, |
410 // and that a few things that shouldn't match indeed do not. | 406 // and that a few things that shouldn't match indeed do not. |
411 TEST(QuoteMeta, Simple) { | 407 TEST(QuoteMeta, Simple) { |
412 TestQuoteMeta("foo"); | 408 TestQuoteMeta("foo"); |
413 TestQuoteMeta("foo.bar"); | 409 TestQuoteMeta("foo.bar"); |
414 TestQuoteMeta("foo\\.bar"); | 410 TestQuoteMeta("foo\\.bar"); |
415 TestQuoteMeta("[1-9]"); | 411 TestQuoteMeta("[1-9]"); |
416 TestQuoteMeta("1.5-2.0?"); | 412 TestQuoteMeta("1.5-2.0?"); |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
462 | 458 |
463 // Don't want null-followed-by-'1' to be interpreted as '\01'. | 459 // Don't want null-followed-by-'1' to be interpreted as '\01'. |
464 has_null += '1'; | 460 has_null += '1'; |
465 TestQuoteMeta(has_null); | 461 TestQuoteMeta(has_null); |
466 NegativeTestQuoteMeta(has_null, "\1"); | 462 NegativeTestQuoteMeta(has_null, "\1"); |
467 } | 463 } |
468 | 464 |
469 TEST(ProgramSize, BigProgram) { | 465 TEST(ProgramSize, BigProgram) { |
470 RE2 re_simple("simple regexp"); | 466 RE2 re_simple("simple regexp"); |
471 RE2 re_medium("medium.*regexp"); | 467 RE2 re_medium("medium.*regexp"); |
472 RE2 re_complex("hard.{1,128}regexp"); | 468 RE2 re_complex("complex.{1,128}regexp"); |
473 | 469 |
474 CHECK_GT(re_simple.ProgramSize(), 0); | 470 CHECK_GT(re_simple.ProgramSize(), 0); |
475 CHECK_GT(re_medium.ProgramSize(), re_simple.ProgramSize()); | 471 CHECK_GT(re_medium.ProgramSize(), re_simple.ProgramSize()); |
476 CHECK_GT(re_complex.ProgramSize(), re_medium.ProgramSize()); | 472 CHECK_GT(re_complex.ProgramSize(), re_medium.ProgramSize()); |
477 } | 473 } |
478 | 474 |
| 475 TEST(ProgramFanout, BigProgram) { |
| 476 RE2 re1("(?:(?:(?:(?:(?:.)?){1})*)+)"); |
| 477 RE2 re10("(?:(?:(?:(?:(?:.)?){10})*)+)"); |
| 478 RE2 re100("(?:(?:(?:(?:(?:.)?){100})*)+)"); |
| 479 RE2 re1000("(?:(?:(?:(?:(?:.)?){1000})*)+)"); |
| 480 |
| 481 map<int, int> histogram; |
| 482 |
| 483 // 3 is the largest non-empty bucket and has 1 element. |
| 484 CHECK_EQ(3, re1.ProgramFanout(&histogram)); |
| 485 CHECK_EQ(1, histogram[3]); |
| 486 |
| 487 // 7 is the largest non-empty bucket and has 10 elements. |
| 488 CHECK_EQ(7, re10.ProgramFanout(&histogram)); |
| 489 CHECK_EQ(10, histogram[7]); |
| 490 |
| 491 // 10 is the largest non-empty bucket and has 100 elements. |
| 492 CHECK_EQ(10, re100.ProgramFanout(&histogram)); |
| 493 CHECK_EQ(100, histogram[10]); |
| 494 |
| 495 // 13 is the largest non-empty bucket and has 1000 elements. |
| 496 CHECK_EQ(13, re1000.ProgramFanout(&histogram)); |
| 497 CHECK_EQ(1000, histogram[13]); |
| 498 } |
| 499 |
479 // Issue 956519: handling empty character sets was | 500 // Issue 956519: handling empty character sets was |
480 // causing NULL dereference. This tests a few empty character sets. | 501 // causing NULL dereference. This tests a few empty character sets. |
481 // (The way to get an empty character set is to negate a full one.) | 502 // (The way to get an empty character set is to negate a full one.) |
482 TEST(EmptyCharset, Fuzz) { | 503 TEST(EmptyCharset, Fuzz) { |
483 static const char *empties[] = { | 504 static const char *empties[] = { |
484 "[^\\S\\s]", | 505 "[^\\S\\s]", |
485 "[^\\S[:space:]]", | 506 "[^\\S[:space:]]", |
486 "[^\\D\\d]", | 507 "[^\\D\\d]", |
487 "[^\\D[:digit:]]" | 508 "[^\\D[:digit:]]" |
488 }; | 509 }; |
489 for (int i = 0; i < arraysize(empties); i++) | 510 for (int i = 0; i < arraysize(empties); i++) |
490 CHECK(!RE2(empties[i]).Match("abc", 0, 3, RE2::UNANCHORED, NULL, 0)); | 511 CHECK(!RE2(empties[i]).Match("abc", 0, 3, RE2::UNANCHORED, NULL, 0)); |
491 } | 512 } |
492 | 513 |
| 514 // Bitstate assumes that kInstFail instructions in |
| 515 // alternations or capture groups have been "compiled away". |
| 516 TEST(EmptyCharset, BitstateAssumptions) { |
| 517 // Captures trigger use of Bitstate. |
| 518 static const char *nop_empties[] = { |
| 519 "((((()))))" "[^\\S\\s]?", |
| 520 "((((()))))" "([^\\S\\s])?", |
| 521 "((((()))))" "([^\\S\\s]|[^\\S\\s])?", |
| 522 "((((()))))" "(([^\\S\\s]|[^\\S\\s])|)" |
| 523 }; |
| 524 StringPiece group[6]; |
| 525 for (int i = 0; i < arraysize(nop_empties); i++) |
| 526 CHECK(RE2(nop_empties[i]).Match("", 0, 0, RE2::UNANCHORED, group, 6)); |
| 527 } |
| 528 |
493 // Test that named groups work correctly. | 529 // Test that named groups work correctly. |
494 TEST(Capture, NamedGroups) { | 530 TEST(Capture, NamedGroups) { |
495 { | 531 { |
496 RE2 re("(hello world)"); | 532 RE2 re("(hello world)"); |
497 CHECK_EQ(re.NumberOfCapturingGroups(), 1); | 533 CHECK_EQ(re.NumberOfCapturingGroups(), 1); |
498 const map<string, int>& m = re.NamedCapturingGroups(); | 534 const map<string, int>& m = re.NamedCapturingGroups(); |
499 CHECK_EQ(m.size(), 0); | 535 CHECK_EQ(m.size(), 0); |
500 } | 536 } |
501 | 537 |
502 { | 538 { |
503 RE2 re("(?P<A>expr(?P<B>expr)(?P<C>expr))((expr)(?P<D>expr))"); | 539 RE2 re("(?P<A>expr(?P<B>expr)(?P<C>expr))((expr)(?P<D>expr))"); |
504 CHECK_EQ(re.NumberOfCapturingGroups(), 6); | 540 CHECK_EQ(re.NumberOfCapturingGroups(), 6); |
505 const map<string, int>& m = re.NamedCapturingGroups(); | 541 const map<string, int>& m = re.NamedCapturingGroups(); |
506 CHECK_EQ(m.size(), 4); | 542 CHECK_EQ(m.size(), 4); |
507 CHECK_EQ(m.find("A")->second, 1); | 543 CHECK_EQ(m.find("A")->second, 1); |
508 CHECK_EQ(m.find("B")->second, 2); | 544 CHECK_EQ(m.find("B")->second, 2); |
509 CHECK_EQ(m.find("C")->second, 3); | 545 CHECK_EQ(m.find("C")->second, 3); |
510 CHECK_EQ(m.find("D")->second, 6); // $4 and $5 are anonymous | 546 CHECK_EQ(m.find("D")->second, 6); // $4 and $5 are anonymous |
511 } | 547 } |
512 } | 548 } |
513 | 549 |
| 550 TEST(RE2, CapturedGroupTest) { |
| 551 RE2 re("directions from (?P<S>.*) to (?P<D>.*)"); |
| 552 int num_groups = re.NumberOfCapturingGroups(); |
| 553 EXPECT_EQ(2, num_groups); |
| 554 string args[4]; |
| 555 RE2::Arg arg0(&args[0]); |
| 556 RE2::Arg arg1(&args[1]); |
| 557 RE2::Arg arg2(&args[2]); |
| 558 RE2::Arg arg3(&args[3]); |
| 559 |
| 560 const RE2::Arg* const matches[4] = {&arg0, &arg1, &arg2, &arg3}; |
| 561 EXPECT_TRUE(RE2::FullMatchN("directions from mountain view to san jose", |
| 562 re, matches, num_groups)); |
| 563 const map<string, int>& named_groups = re.NamedCapturingGroups(); |
| 564 EXPECT_TRUE(named_groups.find("S") != named_groups.end()); |
| 565 EXPECT_TRUE(named_groups.find("D") != named_groups.end()); |
| 566 |
| 567 // The named group index is 1-based. |
| 568 int source_group_index = named_groups.find("S")->second; |
| 569 int destination_group_index = named_groups.find("D")->second; |
| 570 EXPECT_EQ(1, source_group_index); |
| 571 EXPECT_EQ(2, destination_group_index); |
| 572 |
| 573 // The args is zero-based. |
| 574 EXPECT_EQ("mountain view", args[source_group_index - 1]); |
| 575 EXPECT_EQ("san jose", args[destination_group_index - 1]); |
| 576 } |
| 577 |
514 TEST(RE2, FullMatchWithNoArgs) { | 578 TEST(RE2, FullMatchWithNoArgs) { |
515 CHECK(RE2::FullMatch("h", "h")); | 579 CHECK(RE2::FullMatch("h", "h")); |
516 CHECK(RE2::FullMatch("hello", "hello")); | 580 CHECK(RE2::FullMatch("hello", "hello")); |
517 CHECK(RE2::FullMatch("hello", "h.*o")); | 581 CHECK(RE2::FullMatch("hello", "h.*o")); |
518 CHECK(!RE2::FullMatch("othello", "h.*o")); // Must be anchored at front | 582 CHECK(!RE2::FullMatch("othello", "h.*o")); // Must be anchored at front |
519 CHECK(!RE2::FullMatch("hello!", "h.*o")); // Must be anchored at end | 583 CHECK(!RE2::FullMatch("hello!", "h.*o")); // Must be anchored at end |
520 } | 584 } |
521 | 585 |
522 TEST(RE2, PartialMatch) { | 586 TEST(RE2, PartialMatch) { |
523 CHECK(RE2::PartialMatch("x", "x")); | 587 CHECK(RE2::PartialMatch("x", "x")); |
(...skipping 133 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
657 CHECK(RE2::FullMatch("123.4567890123456", "(.*)", (float*)NULL)); | 721 CHECK(RE2::FullMatch("123.4567890123456", "(.*)", (float*)NULL)); |
658 | 722 |
659 // Fail on non-void* NULL arg if the match doesn't parse for the given type. | 723 // Fail on non-void* NULL arg if the match doesn't parse for the given type. |
660 CHECK(!RE2::FullMatch("hello", "h(.*)lo", &s, (char*)NULL)); | 724 CHECK(!RE2::FullMatch("hello", "h(.*)lo", &s, (char*)NULL)); |
661 CHECK(!RE2::FullMatch("hello", "(.*)", (int*)NULL)); | 725 CHECK(!RE2::FullMatch("hello", "(.*)", (int*)NULL)); |
662 CHECK(!RE2::FullMatch("1234567890123456", "(.*)", (int*)NULL)); | 726 CHECK(!RE2::FullMatch("1234567890123456", "(.*)", (int*)NULL)); |
663 CHECK(!RE2::FullMatch("hello", "(.*)", (double*)NULL)); | 727 CHECK(!RE2::FullMatch("hello", "(.*)", (double*)NULL)); |
664 CHECK(!RE2::FullMatch("hello", "(.*)", (float*)NULL)); | 728 CHECK(!RE2::FullMatch("hello", "(.*)", (float*)NULL)); |
665 } | 729 } |
666 | 730 |
667 #ifndef WIN32 | |
668 // Check that numeric parsing code does not read past the end of | 731 // Check that numeric parsing code does not read past the end of |
669 // the number being parsed. | 732 // the number being parsed. |
| 733 // This implementation requires mmap(2) et al. and thus cannot |
| 734 // be used unless they are available. |
670 TEST(RE2, NULTerminated) { | 735 TEST(RE2, NULTerminated) { |
| 736 #if defined(_POSIX_MAPPED_FILES) && _POSIX_MAPPED_FILES > 0 |
671 char *v; | 737 char *v; |
672 int x; | 738 int x; |
673 long pagesize = sysconf(_SC_PAGE_SIZE); | 739 long pagesize = sysconf(_SC_PAGE_SIZE); |
674 | 740 |
675 #ifndef MAP_ANONYMOUS | 741 #ifndef MAP_ANONYMOUS |
676 #define MAP_ANONYMOUS MAP_ANON | 742 #define MAP_ANONYMOUS MAP_ANON |
677 #endif | 743 #endif |
678 v = static_cast<char*>(mmap(NULL, 2*pagesize, PROT_READ|PROT_WRITE, | 744 v = static_cast<char*>(mmap(NULL, 2*pagesize, PROT_READ|PROT_WRITE, |
679 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0)); | 745 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0)); |
680 CHECK(v != reinterpret_cast<char*>(-1)); | 746 CHECK(v != reinterpret_cast<char*>(-1)); |
681 LOG(INFO) << "Memory at " << (void*)v; | 747 LOG(INFO) << "Memory at " << (void*)v; |
682 CHECK_EQ(munmap(v + pagesize, pagesize), 0) << " error " << errno; | 748 CHECK_EQ(munmap(v + pagesize, pagesize), 0) << " error " << errno; |
683 v[pagesize - 1] = '1'; | 749 v[pagesize - 1] = '1'; |
684 | 750 |
685 x = 0; | 751 x = 0; |
686 CHECK(RE2::FullMatch(StringPiece(v + pagesize - 1, 1), "(.*)", &x)); | 752 CHECK(RE2::FullMatch(StringPiece(v + pagesize - 1, 1), "(.*)", &x)); |
687 CHECK_EQ(x, 1); | 753 CHECK_EQ(x, 1); |
| 754 #endif |
688 } | 755 } |
689 #endif | |
690 | 756 |
691 TEST(RE2, FullMatchTypeTests) { | 757 TEST(RE2, FullMatchTypeTests) { |
692 // Type tests | 758 // Type tests |
693 string zeros(100, '0'); | 759 string zeros(1000, '0'); |
694 { | 760 { |
695 char c; | 761 char c; |
696 CHECK(RE2::FullMatch("Hello", "(H)ello", &c)); | 762 CHECK(RE2::FullMatch("Hello", "(H)ello", &c)); |
697 CHECK_EQ(c, 'H'); | 763 CHECK_EQ(c, 'H'); |
698 } | 764 } |
699 { | 765 { |
700 unsigned char c; | 766 unsigned char c; |
701 CHECK(RE2::FullMatch("Hello", "(H)ello", &c)); | 767 CHECK(RE2::FullMatch("Hello", "(H)ello", &c)); |
702 CHECK_EQ(c, static_cast<unsigned char>('H')); | 768 CHECK_EQ(c, static_cast<unsigned char>('H')); |
703 } | 769 } |
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
785 snprintf(buf, sizeof(buf), "%llu", (long long unsigned)max); | 851 snprintf(buf, sizeof(buf), "%llu", (long long unsigned)max); |
786 CHECK(RE2::FullMatch(buf, "(-?\\d+)", &v)); CHECK_EQ(v, max); | 852 CHECK(RE2::FullMatch(buf, "(-?\\d+)", &v)); CHECK_EQ(v, max); |
787 | 853 |
788 assert(buf[strlen(buf)-1] != '9'); | 854 assert(buf[strlen(buf)-1] != '9'); |
789 buf[strlen(buf)-1]++; | 855 buf[strlen(buf)-1]++; |
790 CHECK(!RE2::FullMatch(buf, "(-?\\d+)", &v)); | 856 CHECK(!RE2::FullMatch(buf, "(-?\\d+)", &v)); |
791 } | 857 } |
792 } | 858 } |
793 | 859 |
794 TEST(RE2, FloatingPointFullMatchTypes) { | 860 TEST(RE2, FloatingPointFullMatchTypes) { |
795 string zeros(100, '0'); | 861 string zeros(1000, '0'); |
796 { | 862 { |
797 float v; | 863 float v; |
798 CHECK(RE2::FullMatch("100", "(.*)", &v)); CHECK_EQ(v, 100); | 864 CHECK(RE2::FullMatch("100", "(.*)", &v)); CHECK_EQ(v, 100); |
799 CHECK(RE2::FullMatch("-100.", "(.*)", &v)); CHECK_EQ(v, -100); | 865 CHECK(RE2::FullMatch("-100.", "(.*)", &v)); CHECK_EQ(v, -100); |
800 CHECK(RE2::FullMatch("1e23", "(.*)", &v)); CHECK_EQ(v, float(1e23)); | 866 CHECK(RE2::FullMatch("1e23", "(.*)", &v)); CHECK_EQ(v, float(1e23)); |
| 867 CHECK(RE2::FullMatch(" 100", "(.*)", &v)); CHECK_EQ(v, 100); |
801 | 868 |
802 CHECK(RE2::FullMatch(zeros + "1e23", "(.*)", &v)); | 869 CHECK(RE2::FullMatch(zeros + "1e23", "(.*)", &v)); |
803 CHECK_EQ(v, float(1e23)); | 870 CHECK_EQ(v, float(1e23)); |
804 | 871 |
805 // 6700000000081920.1 is an edge case. | 872 // 6700000000081920.1 is an edge case. |
806 // 6700000000081920 is exactly halfway between | 873 // 6700000000081920 is exactly halfway between |
807 // two float32s, so the .1 should make it round up. | 874 // two float32s, so the .1 should make it round up. |
808 // However, the .1 is outside the precision possible with | 875 // However, the .1 is outside the precision possible with |
809 // a float64: the nearest float64 is 6700000000081920. | 876 // a float64: the nearest float64 is 6700000000081920. |
810 // So if the code uses strtod and then converts to float32, | 877 // So if the code uses strtod and then converts to float32, |
(...skipping 179 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
990 CHECK(re.error().empty()); // Must have no error | 1057 CHECK(re.error().empty()); // Must have no error |
991 CHECK(re.ok()); | 1058 CHECK(re.ok()); |
992 CHECK(re.error_code() == RE2::NoError); | 1059 CHECK(re.error_code() == RE2::NoError); |
993 } | 1060 } |
994 } | 1061 } |
995 | 1062 |
996 TEST(RE2, UTF8) { | 1063 TEST(RE2, UTF8) { |
997 // Check UTF-8 handling | 1064 // Check UTF-8 handling |
998 // Three Japanese characters (nihongo) | 1065 // Three Japanese characters (nihongo) |
999 const char utf8_string[] = { | 1066 const char utf8_string[] = { |
1000 0xe6, 0x97, 0xa5, // 65e5 | 1067 (char)0xe6, (char)0x97, (char)0xa5, // 65e5 |
1001 0xe6, 0x9c, 0xac, // 627c | 1068 (char)0xe6, (char)0x9c, (char)0xac, // 627c |
1002 0xe8, 0xaa, 0x9e, // 8a9e | 1069 (char)0xe8, (char)0xaa, (char)0x9e, // 8a9e |
1003 0 | 1070 0 |
1004 }; | 1071 }; |
1005 const char utf8_pattern[] = { | 1072 const char utf8_pattern[] = { |
1006 '.', | 1073 '.', |
1007 0xe6, 0x9c, 0xac, // 627c | 1074 (char)0xe6, (char)0x9c, (char)0xac, // 627c |
1008 '.', | 1075 '.', |
1009 0 | 1076 0 |
1010 }; | 1077 }; |
1011 | 1078 |
1012 // Both should match in either mode, bytes or UTF-8 | 1079 // Both should match in either mode, bytes or UTF-8 |
1013 RE2 re_test1(".........", RE2::Latin1); | 1080 RE2 re_test1(".........", RE2::Latin1); |
1014 CHECK(RE2::FullMatch(utf8_string, re_test1)); | 1081 CHECK(RE2::FullMatch(utf8_string, re_test1)); |
1015 RE2 re_test2("..."); | 1082 RE2 re_test2("..."); |
1016 CHECK(RE2::FullMatch(utf8_string, re_test2)); | 1083 CHECK(RE2::FullMatch(utf8_string, re_test2)); |
1017 | 1084 |
(...skipping 228 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1246 if (t.match == NULL) { | 1313 if (t.match == NULL) { |
1247 EXPECT_FALSE(re.PartialMatch(t.text, re)); | 1314 EXPECT_FALSE(re.PartialMatch(t.text, re)); |
1248 } else { | 1315 } else { |
1249 StringPiece m; | 1316 StringPiece m; |
1250 EXPECT_TRUE(re.PartialMatch(t.text, re, &m)); | 1317 EXPECT_TRUE(re.PartialMatch(t.text, re, &m)); |
1251 EXPECT_EQ(m, t.match); | 1318 EXPECT_EQ(m, t.match); |
1252 } | 1319 } |
1253 } | 1320 } |
1254 } | 1321 } |
1255 | 1322 |
| 1323 // Check that dot_nl option works. |
| 1324 TEST(RE2, DotNL) { |
| 1325 RE2::Options opt; |
| 1326 opt.set_dot_nl(true); |
| 1327 EXPECT_TRUE(RE2::PartialMatch("\n", RE2(".", opt))); |
| 1328 EXPECT_FALSE(RE2::PartialMatch("\n", RE2("(?-s).", opt))); |
| 1329 opt.set_never_nl(true); |
| 1330 EXPECT_FALSE(RE2::PartialMatch("\n", RE2(".", opt))); |
| 1331 } |
| 1332 |
1256 // Check that there are no capturing groups in "never capture" mode. | 1333 // Check that there are no capturing groups in "never capture" mode. |
1257 TEST(RE2, NeverCapture) { | 1334 TEST(RE2, NeverCapture) { |
1258 RE2::Options opt; | 1335 RE2::Options opt; |
1259 opt.set_never_capture(true); | 1336 opt.set_never_capture(true); |
1260 RE2 re("(r)(e)", opt); | 1337 RE2 re("(r)(e)", opt); |
1261 EXPECT_EQ(0, re.NumberOfCapturingGroups()); | 1338 EXPECT_EQ(0, re.NumberOfCapturingGroups()); |
1262 } | 1339 } |
1263 | 1340 |
1264 // Bitstate bug was looking at submatch[0] even if nsubmatch == 0. | 1341 // Bitstate bug was looking at submatch[0] even if nsubmatch == 0. |
1265 // Triggered by a failed DFA search falling back to Bitstate when | 1342 // Triggered by a failed DFA search falling back to Bitstate when |
(...skipping 104 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1370 EXPECT_EQ(want, have); | 1447 EXPECT_EQ(want, have); |
1371 } | 1448 } |
1372 | 1449 |
1373 TEST(RE2, RegexpToStringLossOfAnchor) { | 1450 TEST(RE2, RegexpToStringLossOfAnchor) { |
1374 EXPECT_EQ(RE2("^[a-c]at", RE2::POSIX).Regexp()->ToString(), "^[a-c]at"); | 1451 EXPECT_EQ(RE2("^[a-c]at", RE2::POSIX).Regexp()->ToString(), "^[a-c]at"); |
1375 EXPECT_EQ(RE2("^[a-c]at").Regexp()->ToString(), "(?-m:^)[a-c]at"); | 1452 EXPECT_EQ(RE2("^[a-c]at").Regexp()->ToString(), "(?-m:^)[a-c]at"); |
1376 EXPECT_EQ(RE2("ca[t-z]$", RE2::POSIX).Regexp()->ToString(), "ca[t-z]$"); | 1453 EXPECT_EQ(RE2("ca[t-z]$", RE2::POSIX).Regexp()->ToString(), "ca[t-z]$"); |
1377 EXPECT_EQ(RE2("ca[t-z]$").Regexp()->ToString(), "ca[t-z](?-m:$)"); | 1454 EXPECT_EQ(RE2("ca[t-z]$").Regexp()->ToString(), "ca[t-z](?-m:$)"); |
1378 } | 1455 } |
1379 | 1456 |
| 1457 // Issue 10131674 |
| 1458 TEST(RE2, Bug10131674) { |
| 1459 // Some of these escapes describe values that do not fit in a byte. |
| 1460 RE2 re("\\140\\440\\174\\271\\150\\656\\106\\201\\004\\332", RE2::Latin1); |
| 1461 EXPECT_FALSE(re.ok()); |
| 1462 EXPECT_FALSE(RE2::FullMatch("hello world", re)); |
| 1463 } |
| 1464 |
| 1465 TEST(RE2, Bug18391750) { |
| 1466 // Stray write past end of match_ in nfa.cc, caught by fuzzing + address sanit
izer. |
| 1467 const char t[] = { |
| 1468 (char)0x28, (char)0x28, (char)0xfc, (char)0xfc, (char)0x08, (char)0x08, |
| 1469 (char)0x26, (char)0x26, (char)0x28, (char)0xc2, (char)0x9b, (char)0xc5, |
| 1470 (char)0xc5, (char)0xd4, (char)0x8f, (char)0x8f, (char)0x69, (char)0x69, |
| 1471 (char)0xe7, (char)0x29, (char)0x7b, (char)0x37, (char)0x31, (char)0x31, |
| 1472 (char)0x7d, (char)0xae, (char)0x7c, (char)0x7c, (char)0xf3, (char)0x29, |
| 1473 (char)0xae, (char)0xae, (char)0x2e, (char)0x2a, (char)0x29, (char)0x00, |
| 1474 }; |
| 1475 RE2::Options opt; |
| 1476 opt.set_encoding(RE2::Options::EncodingLatin1); |
| 1477 opt.set_longest_match(true); |
| 1478 opt.set_dot_nl(true); |
| 1479 opt.set_case_sensitive(false); |
| 1480 RE2 re(t, opt); |
| 1481 CHECK(re.ok()); |
| 1482 RE2::PartialMatch(t, re); |
| 1483 } |
| 1484 |
| 1485 TEST(RE2, Bug18458852) { |
| 1486 // Bug in parser accepting invalid (too large) rune, |
| 1487 // causing compiler to fail in DCHECK in UTF-8 |
| 1488 // character class code. |
| 1489 const char b[] = { |
| 1490 (char)0x28, (char)0x05, (char)0x05, (char)0x41, (char)0x41, (char)0x28, |
| 1491 (char)0x24, (char)0x5b, (char)0x5e, (char)0xf5, (char)0x87, (char)0x87, |
| 1492 (char)0x90, (char)0x29, (char)0x5d, (char)0x29, (char)0x29, (char)0x00, |
| 1493 }; |
| 1494 RE2 re(b); |
| 1495 CHECK(!re.ok()); |
| 1496 } |
| 1497 |
| 1498 TEST(RE2, Bug18523943) { |
| 1499 // Bug in bitstate: case kFailInst was merged into the default with LOG(DFATAL
). |
| 1500 |
| 1501 RE2::Options opt; |
| 1502 const char a[] = { |
| 1503 (char)0x29, (char)0x29, (char)0x24, (char)0x00, |
| 1504 }; |
| 1505 const char b[] = { |
| 1506 (char)0x28, (char)0x0a, (char)0x2a, (char)0x2a, (char)0x29, (char)0x00, |
| 1507 }; |
| 1508 opt.set_log_errors(false); |
| 1509 opt.set_encoding(RE2::Options::EncodingLatin1); |
| 1510 opt.set_posix_syntax(true); |
| 1511 opt.set_longest_match(true); |
| 1512 opt.set_literal(false); |
| 1513 opt.set_never_nl(true); |
| 1514 |
| 1515 RE2 re((const char*)b, opt); |
| 1516 CHECK(re.ok()); |
| 1517 string s1; |
| 1518 CHECK(!RE2::PartialMatch((const char*)a, re, &s1)); |
| 1519 } |
| 1520 |
| 1521 TEST(RE2, Bug21371806) { |
| 1522 // Bug in parser accepting Unicode groups in Latin-1 mode, |
| 1523 // causing compiler to fail in DCHECK in prog.cc. |
| 1524 |
| 1525 RE2::Options opt; |
| 1526 opt.set_encoding(RE2::Options::EncodingLatin1); |
| 1527 |
| 1528 RE2 re("g\\p{Zl}]", opt); |
| 1529 CHECK(re.ok()); |
| 1530 } |
| 1531 |
1380 } // namespace re2 | 1532 } // namespace re2 |
OLD | NEW |