| OLD | NEW |
| 1 // -*- coding: utf-8 -*- | 1 // -*- coding: utf-8 -*- |
| 2 // Copyright 2002-2009 The RE2 Authors. All Rights Reserved. | 2 // Copyright 2002-2009 The RE2 Authors. All Rights Reserved. |
| 3 // Use of this source code is governed by a BSD-style | 3 // Use of this source code is governed by a BSD-style |
| 4 // license that can be found in the LICENSE file. | 4 // license that can be found in the LICENSE file. |
| 5 | 5 |
| 6 // TODO: Test extractions for PartialMatch/Consume | 6 // TODO: Test extractions for PartialMatch/Consume |
| 7 | 7 |
| 8 #include <errno.h> | 8 #include <sys/types.h> |
| 9 #ifndef _MSC_VER | 9 #ifndef WIN32 |
| 10 #include <unistd.h> /* for sysconf */ | |
| 11 #include <sys/mman.h> | 10 #include <sys/mman.h> |
| 12 #endif | 11 #endif |
| 13 #include <sys/stat.h> | 12 #include <sys/stat.h> |
| 14 #include <sys/types.h> | 13 #include <errno.h> |
| 15 #include <vector> | 14 #include <vector> |
| 16 #include "util/test.h" | 15 #include "util/test.h" |
| 17 #include "re2/re2.h" | 16 #include "re2/re2.h" |
| 18 #include "re2/regexp.h" | 17 #include "re2/regexp.h" |
| 19 | 18 |
| 19 #ifdef WIN32 |
| 20 #include <stdio.h> |
| 21 #define snprintf _snprintf |
| 22 #endif |
| 23 |
| 20 DECLARE_bool(logtostderr); | 24 DECLARE_bool(logtostderr); |
| 21 | 25 |
| 22 namespace re2 { | 26 namespace re2 { |
| 23 | 27 |
| 24 TEST(RE2, HexTests) { | 28 TEST(RE2, HexTests) { |
| 25 | 29 |
| 26 VLOG(1) << "hex tests"; | 30 VLOG(1) << "hex tests"; |
| 27 | 31 |
| 28 #define CHECK_HEX(type, value) \ | 32 #define CHECK_HEX(type, value) \ |
| 29 do { \ | 33 do { \ |
| (...skipping 139 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 169 // Check newline handling | 173 // Check newline handling |
| 170 { "a.*a", | 174 { "a.*a", |
| 171 "(\\0)", | 175 "(\\0)", |
| 172 "aba\naba", | 176 "aba\naba", |
| 173 "(aba)\naba", | 177 "(aba)\naba", |
| 174 "(aba)\n(aba)", | 178 "(aba)\n(aba)", |
| 175 2 }, | 179 2 }, |
| 176 { "", NULL, NULL, NULL, NULL, 0 } | 180 { "", NULL, NULL, NULL, NULL, 0 } |
| 177 }; | 181 }; |
| 178 | 182 |
| 179 for (const ReplaceTest* t = tests; t->original != NULL; t++) { | 183 for (const ReplaceTest *t = tests; t->original != NULL; ++t) { |
| 180 VLOG(1) << StringPrintf("\"%s\" =~ s/%s/%s/g", t->original, t->regexp, t->re
write); | 184 VLOG(1) << StringPrintf("\"%s\" =~ s/%s/%s/g", t->original, t->regexp, t->re
write); |
| 181 string one(t->original); | 185 string one(t->original); |
| 182 CHECK(RE2::Replace(&one, t->regexp, t->rewrite)); | 186 CHECK(RE2::Replace(&one, t->regexp, t->rewrite)); |
| 183 CHECK_EQ(one, t->single); | 187 CHECK_EQ(one, t->single); |
| 184 string all(t->original); | 188 string all(t->original); |
| 185 CHECK_EQ(RE2::GlobalReplace(&all, t->regexp, t->rewrite), t->greplace_count) | 189 CHECK_EQ(RE2::GlobalReplace(&all, t->regexp, t->rewrite), t->greplace_count) |
| 186 << "Got: " << all; | 190 << "Got: " << all; |
| 187 CHECK_EQ(all, t->global); | 191 CHECK_EQ(all, t->global); |
| 188 } | 192 } |
| 189 } | 193 } |
| (...skipping 172 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 362 CHECK_EQ(group[3], "9000"); | 366 CHECK_EQ(group[3], "9000"); |
| 363 | 367 |
| 364 string all, host; | 368 string all, host; |
| 365 int port; | 369 int port; |
| 366 CHECK(RE2::PartialMatch("a chrisr:9000 here", re, &all, &host, &port)); | 370 CHECK(RE2::PartialMatch("a chrisr:9000 here", re, &all, &host, &port)); |
| 367 CHECK_EQ(all, "chrisr:9000"); | 371 CHECK_EQ(all, "chrisr:9000"); |
| 368 CHECK_EQ(host, "chrisr"); | 372 CHECK_EQ(host, "chrisr"); |
| 369 CHECK_EQ(port, 9000); | 373 CHECK_EQ(port, 9000); |
| 370 } | 374 } |
| 371 | 375 |
| 372 static void TestRecursion(int size, const char* pattern) { | 376 static void TestRecursion(int size, const char *pattern) { |
| 373 // Fill up a string repeating the pattern given | 377 // Fill up a string repeating the pattern given |
| 374 string domain; | 378 string domain; |
| 375 domain.resize(size); | 379 domain.resize(size); |
| 376 size_t patlen = strlen(pattern); | 380 int patlen = strlen(pattern); |
| 377 for (int i = 0; i < size; i++) { | 381 for (int i = 0; i < size; ++i) { |
| 378 domain[i] = pattern[i % patlen]; | 382 domain[i] = pattern[i % patlen]; |
| 379 } | 383 } |
| 380 // Just make sure it doesn't crash due to too much recursion. | 384 // Just make sure it doesn't crash due to too much recursion. |
| 381 RE2 re("([a-zA-Z0-9]|-)+(\\.([a-zA-Z0-9]|-)+)*(\\.)?", RE2::Quiet); | 385 RE2 re("([a-zA-Z0-9]|-)+(\\.([a-zA-Z0-9]|-)+)*(\\.)?", RE2::Quiet); |
| 382 RE2::FullMatch(domain, re); | 386 RE2::FullMatch(domain, re); |
| 383 } | 387 } |
| 384 | 388 |
| 385 // A meta-quoted string, interpreted as a pattern, should always match | 389 // A meta-quoted string, interpreted as a pattern, should always match |
| 386 // the original unquoted string. | 390 // the original unquoted string. |
| 387 static void TestQuoteMeta(string unquoted, | 391 static void TestQuoteMeta(string unquoted, |
| 388 const RE2::Options& options = RE2::DefaultOptions) { | 392 const RE2::Options& options = RE2::DefaultOptions) { |
| 389 string quoted = RE2::QuoteMeta(unquoted); | 393 string quoted = RE2::QuoteMeta(unquoted); |
| 390 RE2 re(quoted, options); | 394 RE2 re(quoted, options); |
| 391 EXPECT_TRUE(RE2::FullMatch(unquoted, re)) | 395 EXPECT_TRUE_M(RE2::FullMatch(unquoted, re), |
| 392 << "Unquoted='" << unquoted << "', quoted='" << quoted << "'."; | 396 "Unquoted='" + unquoted + "', quoted='" + quoted + "'."); |
| 393 } | 397 } |
| 394 | 398 |
| 395 // A meta-quoted string, interpreted as a pattern, should always match | 399 // A meta-quoted string, interpreted as a pattern, should always match |
| 396 // the original unquoted string. | 400 // the original unquoted string. |
| 397 static void NegativeTestQuoteMeta(string unquoted, string should_not_match, | 401 static void NegativeTestQuoteMeta(string unquoted, string should_not_match, |
| 398 const RE2::Options& options = RE2::DefaultOpti
ons) { | 402 const RE2::Options& options = RE2::DefaultOpti
ons) { |
| 399 string quoted = RE2::QuoteMeta(unquoted); | 403 string quoted = RE2::QuoteMeta(unquoted); |
| 400 RE2 re(quoted, options); | 404 RE2 re(quoted, options); |
| 401 EXPECT_FALSE(RE2::FullMatch(should_not_match, re)) | 405 EXPECT_FALSE_M(RE2::FullMatch(should_not_match, re), |
| 402 << "Unquoted='" << unquoted << "', quoted='" << quoted << "'."; | 406 "Unquoted='" + unquoted + "', quoted='" + quoted + "'."); |
| 403 } | 407 } |
| 404 | 408 |
| 405 // Tests that quoted meta characters match their original strings, | 409 // Tests that quoted meta characters match their original strings, |
| 406 // and that a few things that shouldn't match indeed do not. | 410 // and that a few things that shouldn't match indeed do not. |
| 407 TEST(QuoteMeta, Simple) { | 411 TEST(QuoteMeta, Simple) { |
| 408 TestQuoteMeta("foo"); | 412 TestQuoteMeta("foo"); |
| 409 TestQuoteMeta("foo.bar"); | 413 TestQuoteMeta("foo.bar"); |
| 410 TestQuoteMeta("foo\\.bar"); | 414 TestQuoteMeta("foo\\.bar"); |
| 411 TestQuoteMeta("[1-9]"); | 415 TestQuoteMeta("[1-9]"); |
| 412 TestQuoteMeta("1.5-2.0?"); | 416 TestQuoteMeta("1.5-2.0?"); |
| (...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 458 | 462 |
| 459 // Don't want null-followed-by-'1' to be interpreted as '\01'. | 463 // Don't want null-followed-by-'1' to be interpreted as '\01'. |
| 460 has_null += '1'; | 464 has_null += '1'; |
| 461 TestQuoteMeta(has_null); | 465 TestQuoteMeta(has_null); |
| 462 NegativeTestQuoteMeta(has_null, "\1"); | 466 NegativeTestQuoteMeta(has_null, "\1"); |
| 463 } | 467 } |
| 464 | 468 |
| 465 TEST(ProgramSize, BigProgram) { | 469 TEST(ProgramSize, BigProgram) { |
| 466 RE2 re_simple("simple regexp"); | 470 RE2 re_simple("simple regexp"); |
| 467 RE2 re_medium("medium.*regexp"); | 471 RE2 re_medium("medium.*regexp"); |
| 468 RE2 re_complex("complex.{1,128}regexp"); | 472 RE2 re_complex("hard.{1,128}regexp"); |
| 469 | 473 |
| 470 CHECK_GT(re_simple.ProgramSize(), 0); | 474 CHECK_GT(re_simple.ProgramSize(), 0); |
| 471 CHECK_GT(re_medium.ProgramSize(), re_simple.ProgramSize()); | 475 CHECK_GT(re_medium.ProgramSize(), re_simple.ProgramSize()); |
| 472 CHECK_GT(re_complex.ProgramSize(), re_medium.ProgramSize()); | 476 CHECK_GT(re_complex.ProgramSize(), re_medium.ProgramSize()); |
| 473 } | 477 } |
| 474 | 478 |
| 475 TEST(ProgramFanout, BigProgram) { | |
| 476 RE2 re1("(?:(?:(?:(?:(?:.)?){1})*)+)"); | |
| 477 RE2 re10("(?:(?:(?:(?:(?:.)?){10})*)+)"); | |
| 478 RE2 re100("(?:(?:(?:(?:(?:.)?){100})*)+)"); | |
| 479 RE2 re1000("(?:(?:(?:(?:(?:.)?){1000})*)+)"); | |
| 480 | |
| 481 map<int, int> histogram; | |
| 482 | |
| 483 // 3 is the largest non-empty bucket and has 1 element. | |
| 484 CHECK_EQ(3, re1.ProgramFanout(&histogram)); | |
| 485 CHECK_EQ(1, histogram[3]); | |
| 486 | |
| 487 // 7 is the largest non-empty bucket and has 10 elements. | |
| 488 CHECK_EQ(7, re10.ProgramFanout(&histogram)); | |
| 489 CHECK_EQ(10, histogram[7]); | |
| 490 | |
| 491 // 10 is the largest non-empty bucket and has 100 elements. | |
| 492 CHECK_EQ(10, re100.ProgramFanout(&histogram)); | |
| 493 CHECK_EQ(100, histogram[10]); | |
| 494 | |
| 495 // 13 is the largest non-empty bucket and has 1000 elements. | |
| 496 CHECK_EQ(13, re1000.ProgramFanout(&histogram)); | |
| 497 CHECK_EQ(1000, histogram[13]); | |
| 498 } | |
| 499 | |
| 500 // Issue 956519: handling empty character sets was | 479 // Issue 956519: handling empty character sets was |
| 501 // causing NULL dereference. This tests a few empty character sets. | 480 // causing NULL dereference. This tests a few empty character sets. |
| 502 // (The way to get an empty character set is to negate a full one.) | 481 // (The way to get an empty character set is to negate a full one.) |
| 503 TEST(EmptyCharset, Fuzz) { | 482 TEST(EmptyCharset, Fuzz) { |
| 504 static const char *empties[] = { | 483 static const char *empties[] = { |
| 505 "[^\\S\\s]", | 484 "[^\\S\\s]", |
| 506 "[^\\S[:space:]]", | 485 "[^\\S[:space:]]", |
| 507 "[^\\D\\d]", | 486 "[^\\D\\d]", |
| 508 "[^\\D[:digit:]]" | 487 "[^\\D[:digit:]]" |
| 509 }; | 488 }; |
| 510 for (int i = 0; i < arraysize(empties); i++) | 489 for (int i = 0; i < arraysize(empties); i++) |
| 511 CHECK(!RE2(empties[i]).Match("abc", 0, 3, RE2::UNANCHORED, NULL, 0)); | 490 CHECK(!RE2(empties[i]).Match("abc", 0, 3, RE2::UNANCHORED, NULL, 0)); |
| 512 } | 491 } |
| 513 | 492 |
| 514 // Bitstate assumes that kInstFail instructions in | |
| 515 // alternations or capture groups have been "compiled away". | |
| 516 TEST(EmptyCharset, BitstateAssumptions) { | |
| 517 // Captures trigger use of Bitstate. | |
| 518 static const char *nop_empties[] = { | |
| 519 "((((()))))" "[^\\S\\s]?", | |
| 520 "((((()))))" "([^\\S\\s])?", | |
| 521 "((((()))))" "([^\\S\\s]|[^\\S\\s])?", | |
| 522 "((((()))))" "(([^\\S\\s]|[^\\S\\s])|)" | |
| 523 }; | |
| 524 StringPiece group[6]; | |
| 525 for (int i = 0; i < arraysize(nop_empties); i++) | |
| 526 CHECK(RE2(nop_empties[i]).Match("", 0, 0, RE2::UNANCHORED, group, 6)); | |
| 527 } | |
| 528 | |
| 529 // Test that named groups work correctly. | 493 // Test that named groups work correctly. |
| 530 TEST(Capture, NamedGroups) { | 494 TEST(Capture, NamedGroups) { |
| 531 { | 495 { |
| 532 RE2 re("(hello world)"); | 496 RE2 re("(hello world)"); |
| 533 CHECK_EQ(re.NumberOfCapturingGroups(), 1); | 497 CHECK_EQ(re.NumberOfCapturingGroups(), 1); |
| 534 const map<string, int>& m = re.NamedCapturingGroups(); | 498 const map<string, int>& m = re.NamedCapturingGroups(); |
| 535 CHECK_EQ(m.size(), 0); | 499 CHECK_EQ(m.size(), 0); |
| 536 } | 500 } |
| 537 | 501 |
| 538 { | 502 { |
| 539 RE2 re("(?P<A>expr(?P<B>expr)(?P<C>expr))((expr)(?P<D>expr))"); | 503 RE2 re("(?P<A>expr(?P<B>expr)(?P<C>expr))((expr)(?P<D>expr))"); |
| 540 CHECK_EQ(re.NumberOfCapturingGroups(), 6); | 504 CHECK_EQ(re.NumberOfCapturingGroups(), 6); |
| 541 const map<string, int>& m = re.NamedCapturingGroups(); | 505 const map<string, int>& m = re.NamedCapturingGroups(); |
| 542 CHECK_EQ(m.size(), 4); | 506 CHECK_EQ(m.size(), 4); |
| 543 CHECK_EQ(m.find("A")->second, 1); | 507 CHECK_EQ(m.find("A")->second, 1); |
| 544 CHECK_EQ(m.find("B")->second, 2); | 508 CHECK_EQ(m.find("B")->second, 2); |
| 545 CHECK_EQ(m.find("C")->second, 3); | 509 CHECK_EQ(m.find("C")->second, 3); |
| 546 CHECK_EQ(m.find("D")->second, 6); // $4 and $5 are anonymous | 510 CHECK_EQ(m.find("D")->second, 6); // $4 and $5 are anonymous |
| 547 } | 511 } |
| 548 } | 512 } |
| 549 | 513 |
| 550 TEST(RE2, CapturedGroupTest) { | |
| 551 RE2 re("directions from (?P<S>.*) to (?P<D>.*)"); | |
| 552 int num_groups = re.NumberOfCapturingGroups(); | |
| 553 EXPECT_EQ(2, num_groups); | |
| 554 string args[4]; | |
| 555 RE2::Arg arg0(&args[0]); | |
| 556 RE2::Arg arg1(&args[1]); | |
| 557 RE2::Arg arg2(&args[2]); | |
| 558 RE2::Arg arg3(&args[3]); | |
| 559 | |
| 560 const RE2::Arg* const matches[4] = {&arg0, &arg1, &arg2, &arg3}; | |
| 561 EXPECT_TRUE(RE2::FullMatchN("directions from mountain view to san jose", | |
| 562 re, matches, num_groups)); | |
| 563 const map<string, int>& named_groups = re.NamedCapturingGroups(); | |
| 564 EXPECT_TRUE(named_groups.find("S") != named_groups.end()); | |
| 565 EXPECT_TRUE(named_groups.find("D") != named_groups.end()); | |
| 566 | |
| 567 // The named group index is 1-based. | |
| 568 int source_group_index = named_groups.find("S")->second; | |
| 569 int destination_group_index = named_groups.find("D")->second; | |
| 570 EXPECT_EQ(1, source_group_index); | |
| 571 EXPECT_EQ(2, destination_group_index); | |
| 572 | |
| 573 // The args is zero-based. | |
| 574 EXPECT_EQ("mountain view", args[source_group_index - 1]); | |
| 575 EXPECT_EQ("san jose", args[destination_group_index - 1]); | |
| 576 } | |
| 577 | |
| 578 TEST(RE2, FullMatchWithNoArgs) { | 514 TEST(RE2, FullMatchWithNoArgs) { |
| 579 CHECK(RE2::FullMatch("h", "h")); | 515 CHECK(RE2::FullMatch("h", "h")); |
| 580 CHECK(RE2::FullMatch("hello", "hello")); | 516 CHECK(RE2::FullMatch("hello", "hello")); |
| 581 CHECK(RE2::FullMatch("hello", "h.*o")); | 517 CHECK(RE2::FullMatch("hello", "h.*o")); |
| 582 CHECK(!RE2::FullMatch("othello", "h.*o")); // Must be anchored at front | 518 CHECK(!RE2::FullMatch("othello", "h.*o")); // Must be anchored at front |
| 583 CHECK(!RE2::FullMatch("hello!", "h.*o")); // Must be anchored at end | 519 CHECK(!RE2::FullMatch("hello!", "h.*o")); // Must be anchored at end |
| 584 } | 520 } |
| 585 | 521 |
| 586 TEST(RE2, PartialMatch) { | 522 TEST(RE2, PartialMatch) { |
| 587 CHECK(RE2::PartialMatch("x", "x")); | 523 CHECK(RE2::PartialMatch("x", "x")); |
| (...skipping 133 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 721 CHECK(RE2::FullMatch("123.4567890123456", "(.*)", (float*)NULL)); | 657 CHECK(RE2::FullMatch("123.4567890123456", "(.*)", (float*)NULL)); |
| 722 | 658 |
| 723 // Fail on non-void* NULL arg if the match doesn't parse for the given type. | 659 // Fail on non-void* NULL arg if the match doesn't parse for the given type. |
| 724 CHECK(!RE2::FullMatch("hello", "h(.*)lo", &s, (char*)NULL)); | 660 CHECK(!RE2::FullMatch("hello", "h(.*)lo", &s, (char*)NULL)); |
| 725 CHECK(!RE2::FullMatch("hello", "(.*)", (int*)NULL)); | 661 CHECK(!RE2::FullMatch("hello", "(.*)", (int*)NULL)); |
| 726 CHECK(!RE2::FullMatch("1234567890123456", "(.*)", (int*)NULL)); | 662 CHECK(!RE2::FullMatch("1234567890123456", "(.*)", (int*)NULL)); |
| 727 CHECK(!RE2::FullMatch("hello", "(.*)", (double*)NULL)); | 663 CHECK(!RE2::FullMatch("hello", "(.*)", (double*)NULL)); |
| 728 CHECK(!RE2::FullMatch("hello", "(.*)", (float*)NULL)); | 664 CHECK(!RE2::FullMatch("hello", "(.*)", (float*)NULL)); |
| 729 } | 665 } |
| 730 | 666 |
| 667 #ifndef WIN32 |
| 731 // Check that numeric parsing code does not read past the end of | 668 // Check that numeric parsing code does not read past the end of |
| 732 // the number being parsed. | 669 // the number being parsed. |
| 733 // This implementation requires mmap(2) et al. and thus cannot | |
| 734 // be used unless they are available. | |
| 735 TEST(RE2, NULTerminated) { | 670 TEST(RE2, NULTerminated) { |
| 736 #if defined(_POSIX_MAPPED_FILES) && _POSIX_MAPPED_FILES > 0 | |
| 737 char *v; | 671 char *v; |
| 738 int x; | 672 int x; |
| 739 long pagesize = sysconf(_SC_PAGE_SIZE); | 673 long pagesize = sysconf(_SC_PAGE_SIZE); |
| 740 | 674 |
| 741 #ifndef MAP_ANONYMOUS | 675 #ifndef MAP_ANONYMOUS |
| 742 #define MAP_ANONYMOUS MAP_ANON | 676 #define MAP_ANONYMOUS MAP_ANON |
| 743 #endif | 677 #endif |
| 744 v = static_cast<char*>(mmap(NULL, 2*pagesize, PROT_READ|PROT_WRITE, | 678 v = static_cast<char*>(mmap(NULL, 2*pagesize, PROT_READ|PROT_WRITE, |
| 745 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0)); | 679 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0)); |
| 746 CHECK(v != reinterpret_cast<char*>(-1)); | 680 CHECK(v != reinterpret_cast<char*>(-1)); |
| 747 LOG(INFO) << "Memory at " << (void*)v; | 681 LOG(INFO) << "Memory at " << (void*)v; |
| 748 CHECK_EQ(munmap(v + pagesize, pagesize), 0) << " error " << errno; | 682 CHECK_EQ(munmap(v + pagesize, pagesize), 0) << " error " << errno; |
| 749 v[pagesize - 1] = '1'; | 683 v[pagesize - 1] = '1'; |
| 750 | 684 |
| 751 x = 0; | 685 x = 0; |
| 752 CHECK(RE2::FullMatch(StringPiece(v + pagesize - 1, 1), "(.*)", &x)); | 686 CHECK(RE2::FullMatch(StringPiece(v + pagesize - 1, 1), "(.*)", &x)); |
| 753 CHECK_EQ(x, 1); | 687 CHECK_EQ(x, 1); |
| 688 } |
| 754 #endif | 689 #endif |
| 755 } | |
| 756 | 690 |
| 757 TEST(RE2, FullMatchTypeTests) { | 691 TEST(RE2, FullMatchTypeTests) { |
| 758 // Type tests | 692 // Type tests |
| 759 string zeros(1000, '0'); | 693 string zeros(100, '0'); |
| 760 { | 694 { |
| 761 char c; | 695 char c; |
| 762 CHECK(RE2::FullMatch("Hello", "(H)ello", &c)); | 696 CHECK(RE2::FullMatch("Hello", "(H)ello", &c)); |
| 763 CHECK_EQ(c, 'H'); | 697 CHECK_EQ(c, 'H'); |
| 764 } | 698 } |
| 765 { | 699 { |
| 766 unsigned char c; | 700 unsigned char c; |
| 767 CHECK(RE2::FullMatch("Hello", "(H)ello", &c)); | 701 CHECK(RE2::FullMatch("Hello", "(H)ello", &c)); |
| 768 CHECK_EQ(c, static_cast<unsigned char>('H')); | 702 CHECK_EQ(c, static_cast<unsigned char>('H')); |
| 769 } | 703 } |
| (...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 851 snprintf(buf, sizeof(buf), "%llu", (long long unsigned)max); | 785 snprintf(buf, sizeof(buf), "%llu", (long long unsigned)max); |
| 852 CHECK(RE2::FullMatch(buf, "(-?\\d+)", &v)); CHECK_EQ(v, max); | 786 CHECK(RE2::FullMatch(buf, "(-?\\d+)", &v)); CHECK_EQ(v, max); |
| 853 | 787 |
| 854 assert(buf[strlen(buf)-1] != '9'); | 788 assert(buf[strlen(buf)-1] != '9'); |
| 855 buf[strlen(buf)-1]++; | 789 buf[strlen(buf)-1]++; |
| 856 CHECK(!RE2::FullMatch(buf, "(-?\\d+)", &v)); | 790 CHECK(!RE2::FullMatch(buf, "(-?\\d+)", &v)); |
| 857 } | 791 } |
| 858 } | 792 } |
| 859 | 793 |
| 860 TEST(RE2, FloatingPointFullMatchTypes) { | 794 TEST(RE2, FloatingPointFullMatchTypes) { |
| 861 string zeros(1000, '0'); | 795 string zeros(100, '0'); |
| 862 { | 796 { |
| 863 float v; | 797 float v; |
| 864 CHECK(RE2::FullMatch("100", "(.*)", &v)); CHECK_EQ(v, 100); | 798 CHECK(RE2::FullMatch("100", "(.*)", &v)); CHECK_EQ(v, 100); |
| 865 CHECK(RE2::FullMatch("-100.", "(.*)", &v)); CHECK_EQ(v, -100); | 799 CHECK(RE2::FullMatch("-100.", "(.*)", &v)); CHECK_EQ(v, -100); |
| 866 CHECK(RE2::FullMatch("1e23", "(.*)", &v)); CHECK_EQ(v, float(1e23)); | 800 CHECK(RE2::FullMatch("1e23", "(.*)", &v)); CHECK_EQ(v, float(1e23)); |
| 867 CHECK(RE2::FullMatch(" 100", "(.*)", &v)); CHECK_EQ(v, 100); | |
| 868 | 801 |
| 869 CHECK(RE2::FullMatch(zeros + "1e23", "(.*)", &v)); | 802 CHECK(RE2::FullMatch(zeros + "1e23", "(.*)", &v)); |
| 870 CHECK_EQ(v, float(1e23)); | 803 CHECK_EQ(v, float(1e23)); |
| 871 | 804 |
| 872 // 6700000000081920.1 is an edge case. | 805 // 6700000000081920.1 is an edge case. |
| 873 // 6700000000081920 is exactly halfway between | 806 // 6700000000081920 is exactly halfway between |
| 874 // two float32s, so the .1 should make it round up. | 807 // two float32s, so the .1 should make it round up. |
| 875 // However, the .1 is outside the precision possible with | 808 // However, the .1 is outside the precision possible with |
| 876 // a float64: the nearest float64 is 6700000000081920. | 809 // a float64: the nearest float64 is 6700000000081920. |
| 877 // So if the code uses strtod and then converts to float32, | 810 // So if the code uses strtod and then converts to float32, |
| (...skipping 179 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1057 CHECK(re.error().empty()); // Must have no error | 990 CHECK(re.error().empty()); // Must have no error |
| 1058 CHECK(re.ok()); | 991 CHECK(re.ok()); |
| 1059 CHECK(re.error_code() == RE2::NoError); | 992 CHECK(re.error_code() == RE2::NoError); |
| 1060 } | 993 } |
| 1061 } | 994 } |
| 1062 | 995 |
| 1063 TEST(RE2, UTF8) { | 996 TEST(RE2, UTF8) { |
| 1064 // Check UTF-8 handling | 997 // Check UTF-8 handling |
| 1065 // Three Japanese characters (nihongo) | 998 // Three Japanese characters (nihongo) |
| 1066 const char utf8_string[] = { | 999 const char utf8_string[] = { |
| 1067 (char)0xe6, (char)0x97, (char)0xa5, // 65e5 | 1000 0xe6, 0x97, 0xa5, // 65e5 |
| 1068 (char)0xe6, (char)0x9c, (char)0xac, // 627c | 1001 0xe6, 0x9c, 0xac, // 627c |
| 1069 (char)0xe8, (char)0xaa, (char)0x9e, // 8a9e | 1002 0xe8, 0xaa, 0x9e, // 8a9e |
| 1070 0 | 1003 0 |
| 1071 }; | 1004 }; |
| 1072 const char utf8_pattern[] = { | 1005 const char utf8_pattern[] = { |
| 1073 '.', | 1006 '.', |
| 1074 (char)0xe6, (char)0x9c, (char)0xac, // 627c | 1007 0xe6, 0x9c, 0xac, // 627c |
| 1075 '.', | 1008 '.', |
| 1076 0 | 1009 0 |
| 1077 }; | 1010 }; |
| 1078 | 1011 |
| 1079 // Both should match in either mode, bytes or UTF-8 | 1012 // Both should match in either mode, bytes or UTF-8 |
| 1080 RE2 re_test1(".........", RE2::Latin1); | 1013 RE2 re_test1(".........", RE2::Latin1); |
| 1081 CHECK(RE2::FullMatch(utf8_string, re_test1)); | 1014 CHECK(RE2::FullMatch(utf8_string, re_test1)); |
| 1082 RE2 re_test2("..."); | 1015 RE2 re_test2("..."); |
| 1083 CHECK(RE2::FullMatch(utf8_string, re_test2)); | 1016 CHECK(RE2::FullMatch(utf8_string, re_test2)); |
| 1084 | 1017 |
| (...skipping 228 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1313 if (t.match == NULL) { | 1246 if (t.match == NULL) { |
| 1314 EXPECT_FALSE(re.PartialMatch(t.text, re)); | 1247 EXPECT_FALSE(re.PartialMatch(t.text, re)); |
| 1315 } else { | 1248 } else { |
| 1316 StringPiece m; | 1249 StringPiece m; |
| 1317 EXPECT_TRUE(re.PartialMatch(t.text, re, &m)); | 1250 EXPECT_TRUE(re.PartialMatch(t.text, re, &m)); |
| 1318 EXPECT_EQ(m, t.match); | 1251 EXPECT_EQ(m, t.match); |
| 1319 } | 1252 } |
| 1320 } | 1253 } |
| 1321 } | 1254 } |
| 1322 | 1255 |
| 1323 // Check that dot_nl option works. | |
| 1324 TEST(RE2, DotNL) { | |
| 1325 RE2::Options opt; | |
| 1326 opt.set_dot_nl(true); | |
| 1327 EXPECT_TRUE(RE2::PartialMatch("\n", RE2(".", opt))); | |
| 1328 EXPECT_FALSE(RE2::PartialMatch("\n", RE2("(?-s).", opt))); | |
| 1329 opt.set_never_nl(true); | |
| 1330 EXPECT_FALSE(RE2::PartialMatch("\n", RE2(".", opt))); | |
| 1331 } | |
| 1332 | |
| 1333 // Check that there are no capturing groups in "never capture" mode. | 1256 // Check that there are no capturing groups in "never capture" mode. |
| 1334 TEST(RE2, NeverCapture) { | 1257 TEST(RE2, NeverCapture) { |
| 1335 RE2::Options opt; | 1258 RE2::Options opt; |
| 1336 opt.set_never_capture(true); | 1259 opt.set_never_capture(true); |
| 1337 RE2 re("(r)(e)", opt); | 1260 RE2 re("(r)(e)", opt); |
| 1338 EXPECT_EQ(0, re.NumberOfCapturingGroups()); | 1261 EXPECT_EQ(0, re.NumberOfCapturingGroups()); |
| 1339 } | 1262 } |
| 1340 | 1263 |
| 1341 // Bitstate bug was looking at submatch[0] even if nsubmatch == 0. | 1264 // Bitstate bug was looking at submatch[0] even if nsubmatch == 0. |
| 1342 // Triggered by a failed DFA search falling back to Bitstate when | 1265 // Triggered by a failed DFA search falling back to Bitstate when |
| (...skipping 104 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1447 EXPECT_EQ(want, have); | 1370 EXPECT_EQ(want, have); |
| 1448 } | 1371 } |
| 1449 | 1372 |
| 1450 TEST(RE2, RegexpToStringLossOfAnchor) { | 1373 TEST(RE2, RegexpToStringLossOfAnchor) { |
| 1451 EXPECT_EQ(RE2("^[a-c]at", RE2::POSIX).Regexp()->ToString(), "^[a-c]at"); | 1374 EXPECT_EQ(RE2("^[a-c]at", RE2::POSIX).Regexp()->ToString(), "^[a-c]at"); |
| 1452 EXPECT_EQ(RE2("^[a-c]at").Regexp()->ToString(), "(?-m:^)[a-c]at"); | 1375 EXPECT_EQ(RE2("^[a-c]at").Regexp()->ToString(), "(?-m:^)[a-c]at"); |
| 1453 EXPECT_EQ(RE2("ca[t-z]$", RE2::POSIX).Regexp()->ToString(), "ca[t-z]$"); | 1376 EXPECT_EQ(RE2("ca[t-z]$", RE2::POSIX).Regexp()->ToString(), "ca[t-z]$"); |
| 1454 EXPECT_EQ(RE2("ca[t-z]$").Regexp()->ToString(), "ca[t-z](?-m:$)"); | 1377 EXPECT_EQ(RE2("ca[t-z]$").Regexp()->ToString(), "ca[t-z](?-m:$)"); |
| 1455 } | 1378 } |
| 1456 | 1379 |
| 1457 // Issue 10131674 | |
| 1458 TEST(RE2, Bug10131674) { | |
| 1459 // Some of these escapes describe values that do not fit in a byte. | |
| 1460 RE2 re("\\140\\440\\174\\271\\150\\656\\106\\201\\004\\332", RE2::Latin1); | |
| 1461 EXPECT_FALSE(re.ok()); | |
| 1462 EXPECT_FALSE(RE2::FullMatch("hello world", re)); | |
| 1463 } | |
| 1464 | |
| 1465 TEST(RE2, Bug18391750) { | |
| 1466 // Stray write past end of match_ in nfa.cc, caught by fuzzing + address sanit
izer. | |
| 1467 const char t[] = { | |
| 1468 (char)0x28, (char)0x28, (char)0xfc, (char)0xfc, (char)0x08, (char)0x08, | |
| 1469 (char)0x26, (char)0x26, (char)0x28, (char)0xc2, (char)0x9b, (char)0xc5, | |
| 1470 (char)0xc5, (char)0xd4, (char)0x8f, (char)0x8f, (char)0x69, (char)0x69, | |
| 1471 (char)0xe7, (char)0x29, (char)0x7b, (char)0x37, (char)0x31, (char)0x31, | |
| 1472 (char)0x7d, (char)0xae, (char)0x7c, (char)0x7c, (char)0xf3, (char)0x29, | |
| 1473 (char)0xae, (char)0xae, (char)0x2e, (char)0x2a, (char)0x29, (char)0x00, | |
| 1474 }; | |
| 1475 RE2::Options opt; | |
| 1476 opt.set_encoding(RE2::Options::EncodingLatin1); | |
| 1477 opt.set_longest_match(true); | |
| 1478 opt.set_dot_nl(true); | |
| 1479 opt.set_case_sensitive(false); | |
| 1480 RE2 re(t, opt); | |
| 1481 CHECK(re.ok()); | |
| 1482 RE2::PartialMatch(t, re); | |
| 1483 } | |
| 1484 | |
| 1485 TEST(RE2, Bug18458852) { | |
| 1486 // Bug in parser accepting invalid (too large) rune, | |
| 1487 // causing compiler to fail in DCHECK in UTF-8 | |
| 1488 // character class code. | |
| 1489 const char b[] = { | |
| 1490 (char)0x28, (char)0x05, (char)0x05, (char)0x41, (char)0x41, (char)0x28, | |
| 1491 (char)0x24, (char)0x5b, (char)0x5e, (char)0xf5, (char)0x87, (char)0x87, | |
| 1492 (char)0x90, (char)0x29, (char)0x5d, (char)0x29, (char)0x29, (char)0x00, | |
| 1493 }; | |
| 1494 RE2 re(b); | |
| 1495 CHECK(!re.ok()); | |
| 1496 } | |
| 1497 | |
| 1498 TEST(RE2, Bug18523943) { | |
| 1499 // Bug in bitstate: case kFailInst was merged into the default with LOG(DFATAL
). | |
| 1500 | |
| 1501 RE2::Options opt; | |
| 1502 const char a[] = { | |
| 1503 (char)0x29, (char)0x29, (char)0x24, (char)0x00, | |
| 1504 }; | |
| 1505 const char b[] = { | |
| 1506 (char)0x28, (char)0x0a, (char)0x2a, (char)0x2a, (char)0x29, (char)0x00, | |
| 1507 }; | |
| 1508 opt.set_log_errors(false); | |
| 1509 opt.set_encoding(RE2::Options::EncodingLatin1); | |
| 1510 opt.set_posix_syntax(true); | |
| 1511 opt.set_longest_match(true); | |
| 1512 opt.set_literal(false); | |
| 1513 opt.set_never_nl(true); | |
| 1514 | |
| 1515 RE2 re((const char*)b, opt); | |
| 1516 CHECK(re.ok()); | |
| 1517 string s1; | |
| 1518 CHECK(!RE2::PartialMatch((const char*)a, re, &s1)); | |
| 1519 } | |
| 1520 | |
| 1521 TEST(RE2, Bug21371806) { | |
| 1522 // Bug in parser accepting Unicode groups in Latin-1 mode, | |
| 1523 // causing compiler to fail in DCHECK in prog.cc. | |
| 1524 | |
| 1525 RE2::Options opt; | |
| 1526 opt.set_encoding(RE2::Options::EncodingLatin1); | |
| 1527 | |
| 1528 RE2 re("g\\p{Zl}]", opt); | |
| 1529 CHECK(re.ok()); | |
| 1530 } | |
| 1531 | |
| 1532 } // namespace re2 | 1380 } // namespace re2 |
| OLD | NEW |