| Index: third_party/re2/re2/testing/re2_test.cc
|
| diff --git a/third_party/re2/re2/testing/re2_test.cc b/third_party/re2/re2/testing/re2_test.cc
|
| index 911e8689ed5e28c78fe6cb228721570dee037ba0..a1d9c572f4dd4dffa87d506021bda022f9cdffa2 100644
|
| --- a/third_party/re2/re2/testing/re2_test.cc
|
| +++ b/third_party/re2/re2/testing/re2_test.cc
|
| @@ -5,22 +5,18 @@
|
|
|
| // TODO: Test extractions for PartialMatch/Consume
|
|
|
| -#include <sys/types.h>
|
| -#ifndef WIN32
|
| +#include <errno.h>
|
| +#ifndef _MSC_VER
|
| +#include <unistd.h> /* for sysconf */
|
| #include <sys/mman.h>
|
| #endif
|
| #include <sys/stat.h>
|
| -#include <errno.h>
|
| +#include <sys/types.h>
|
| #include <vector>
|
| #include "util/test.h"
|
| #include "re2/re2.h"
|
| #include "re2/regexp.h"
|
|
|
| -#ifdef WIN32
|
| -#include <stdio.h>
|
| -#define snprintf _snprintf
|
| -#endif
|
| -
|
| DECLARE_bool(logtostderr);
|
|
|
| namespace re2 {
|
| @@ -180,7 +176,7 @@ TEST(RE2, Replace) {
|
| { "", NULL, NULL, NULL, NULL, 0 }
|
| };
|
|
|
| - for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
|
| + for (const ReplaceTest* t = tests; t->original != NULL; t++) {
|
| VLOG(1) << StringPrintf("\"%s\" =~ s/%s/%s/g", t->original, t->regexp, t->rewrite);
|
| string one(t->original);
|
| CHECK(RE2::Replace(&one, t->regexp, t->rewrite));
|
| @@ -373,12 +369,12 @@ TEST(RE2, Match) {
|
| CHECK_EQ(port, 9000);
|
| }
|
|
|
| -static void TestRecursion(int size, const char *pattern) {
|
| +static void TestRecursion(int size, const char* pattern) {
|
| // Fill up a string repeating the pattern given
|
| string domain;
|
| domain.resize(size);
|
| - int patlen = strlen(pattern);
|
| - for (int i = 0; i < size; ++i) {
|
| + size_t patlen = strlen(pattern);
|
| + for (int i = 0; i < size; i++) {
|
| domain[i] = pattern[i % patlen];
|
| }
|
| // Just make sure it doesn't crash due to too much recursion.
|
| @@ -392,8 +388,8 @@ static void TestQuoteMeta(string unquoted,
|
| const RE2::Options& options = RE2::DefaultOptions) {
|
| string quoted = RE2::QuoteMeta(unquoted);
|
| RE2 re(quoted, options);
|
| - EXPECT_TRUE_M(RE2::FullMatch(unquoted, re),
|
| - "Unquoted='" + unquoted + "', quoted='" + quoted + "'.");
|
| + EXPECT_TRUE(RE2::FullMatch(unquoted, re))
|
| + << "Unquoted='" << unquoted << "', quoted='" << quoted << "'.";
|
| }
|
|
|
| // A meta-quoted string, interpreted as a pattern, should always match
|
| @@ -402,8 +398,8 @@ static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
|
| const RE2::Options& options = RE2::DefaultOptions) {
|
| string quoted = RE2::QuoteMeta(unquoted);
|
| RE2 re(quoted, options);
|
| - EXPECT_FALSE_M(RE2::FullMatch(should_not_match, re),
|
| - "Unquoted='" + unquoted + "', quoted='" + quoted + "'.");
|
| + EXPECT_FALSE(RE2::FullMatch(should_not_match, re))
|
| + << "Unquoted='" << unquoted << "', quoted='" << quoted << "'.";
|
| }
|
|
|
| // Tests that quoted meta characters match their original strings,
|
| @@ -469,13 +465,38 @@ TEST(QuoteMeta, HasNull) {
|
| TEST(ProgramSize, BigProgram) {
|
| RE2 re_simple("simple regexp");
|
| RE2 re_medium("medium.*regexp");
|
| - RE2 re_complex("hard.{1,128}regexp");
|
| + RE2 re_complex("complex.{1,128}regexp");
|
|
|
| CHECK_GT(re_simple.ProgramSize(), 0);
|
| CHECK_GT(re_medium.ProgramSize(), re_simple.ProgramSize());
|
| CHECK_GT(re_complex.ProgramSize(), re_medium.ProgramSize());
|
| }
|
|
|
| +TEST(ProgramFanout, BigProgram) {
|
| + RE2 re1("(?:(?:(?:(?:(?:.)?){1})*)+)");
|
| + RE2 re10("(?:(?:(?:(?:(?:.)?){10})*)+)");
|
| + RE2 re100("(?:(?:(?:(?:(?:.)?){100})*)+)");
|
| + RE2 re1000("(?:(?:(?:(?:(?:.)?){1000})*)+)");
|
| +
|
| + map<int, int> histogram;
|
| +
|
| + // 3 is the largest non-empty bucket and has 1 element.
|
| + CHECK_EQ(3, re1.ProgramFanout(&histogram));
|
| + CHECK_EQ(1, histogram[3]);
|
| +
|
| + // 7 is the largest non-empty bucket and has 10 elements.
|
| + CHECK_EQ(7, re10.ProgramFanout(&histogram));
|
| + CHECK_EQ(10, histogram[7]);
|
| +
|
| + // 10 is the largest non-empty bucket and has 100 elements.
|
| + CHECK_EQ(10, re100.ProgramFanout(&histogram));
|
| + CHECK_EQ(100, histogram[10]);
|
| +
|
| + // 13 is the largest non-empty bucket and has 1000 elements.
|
| + CHECK_EQ(13, re1000.ProgramFanout(&histogram));
|
| + CHECK_EQ(1000, histogram[13]);
|
| +}
|
| +
|
| // Issue 956519: handling empty character sets was
|
| // causing NULL dereference. This tests a few empty character sets.
|
| // (The way to get an empty character set is to negate a full one.)
|
| @@ -490,6 +511,21 @@ TEST(EmptyCharset, Fuzz) {
|
| CHECK(!RE2(empties[i]).Match("abc", 0, 3, RE2::UNANCHORED, NULL, 0));
|
| }
|
|
|
| +// Bitstate assumes that kInstFail instructions in
|
| +// alternations or capture groups have been "compiled away".
|
| +TEST(EmptyCharset, BitstateAssumptions) {
|
| + // Captures trigger use of Bitstate.
|
| + static const char *nop_empties[] = {
|
| + "((((()))))" "[^\\S\\s]?",
|
| + "((((()))))" "([^\\S\\s])?",
|
| + "((((()))))" "([^\\S\\s]|[^\\S\\s])?",
|
| + "((((()))))" "(([^\\S\\s]|[^\\S\\s])|)"
|
| + };
|
| + StringPiece group[6];
|
| + for (int i = 0; i < arraysize(nop_empties); i++)
|
| + CHECK(RE2(nop_empties[i]).Match("", 0, 0, RE2::UNANCHORED, group, 6));
|
| +}
|
| +
|
| // Test that named groups work correctly.
|
| TEST(Capture, NamedGroups) {
|
| {
|
| @@ -511,6 +547,34 @@ TEST(Capture, NamedGroups) {
|
| }
|
| }
|
|
|
| +TEST(RE2, CapturedGroupTest) {
|
| + RE2 re("directions from (?P<S>.*) to (?P<D>.*)");
|
| + int num_groups = re.NumberOfCapturingGroups();
|
| + EXPECT_EQ(2, num_groups);
|
| + string args[4];
|
| + RE2::Arg arg0(&args[0]);
|
| + RE2::Arg arg1(&args[1]);
|
| + RE2::Arg arg2(&args[2]);
|
| + RE2::Arg arg3(&args[3]);
|
| +
|
| + const RE2::Arg* const matches[4] = {&arg0, &arg1, &arg2, &arg3};
|
| + EXPECT_TRUE(RE2::FullMatchN("directions from mountain view to san jose",
|
| + re, matches, num_groups));
|
| + const map<string, int>& named_groups = re.NamedCapturingGroups();
|
| + EXPECT_TRUE(named_groups.find("S") != named_groups.end());
|
| + EXPECT_TRUE(named_groups.find("D") != named_groups.end());
|
| +
|
| + // The named group index is 1-based.
|
| + int source_group_index = named_groups.find("S")->second;
|
| + int destination_group_index = named_groups.find("D")->second;
|
| + EXPECT_EQ(1, source_group_index);
|
| + EXPECT_EQ(2, destination_group_index);
|
| +
|
| + // The args is zero-based.
|
| + EXPECT_EQ("mountain view", args[source_group_index - 1]);
|
| + EXPECT_EQ("san jose", args[destination_group_index - 1]);
|
| +}
|
| +
|
| TEST(RE2, FullMatchWithNoArgs) {
|
| CHECK(RE2::FullMatch("h", "h"));
|
| CHECK(RE2::FullMatch("hello", "hello"));
|
| @@ -664,10 +728,12 @@ TEST(RE2, FullMatchTypedNullArg) {
|
| CHECK(!RE2::FullMatch("hello", "(.*)", (float*)NULL));
|
| }
|
|
|
| -#ifndef WIN32
|
| // Check that numeric parsing code does not read past the end of
|
| // the number being parsed.
|
| +// This implementation requires mmap(2) et al. and thus cannot
|
| +// be used unless they are available.
|
| TEST(RE2, NULTerminated) {
|
| +#if defined(_POSIX_MAPPED_FILES) && _POSIX_MAPPED_FILES > 0
|
| char *v;
|
| int x;
|
| long pagesize = sysconf(_SC_PAGE_SIZE);
|
| @@ -685,12 +751,12 @@ TEST(RE2, NULTerminated) {
|
| x = 0;
|
| CHECK(RE2::FullMatch(StringPiece(v + pagesize - 1, 1), "(.*)", &x));
|
| CHECK_EQ(x, 1);
|
| -}
|
| #endif
|
| +}
|
|
|
| TEST(RE2, FullMatchTypeTests) {
|
| // Type tests
|
| - string zeros(100, '0');
|
| + string zeros(1000, '0');
|
| {
|
| char c;
|
| CHECK(RE2::FullMatch("Hello", "(H)ello", &c));
|
| @@ -792,12 +858,13 @@ TEST(RE2, FullMatchTypeTests) {
|
| }
|
|
|
| TEST(RE2, FloatingPointFullMatchTypes) {
|
| - string zeros(100, '0');
|
| + string zeros(1000, '0');
|
| {
|
| float v;
|
| CHECK(RE2::FullMatch("100", "(.*)", &v)); CHECK_EQ(v, 100);
|
| CHECK(RE2::FullMatch("-100.", "(.*)", &v)); CHECK_EQ(v, -100);
|
| CHECK(RE2::FullMatch("1e23", "(.*)", &v)); CHECK_EQ(v, float(1e23));
|
| + CHECK(RE2::FullMatch(" 100", "(.*)", &v)); CHECK_EQ(v, 100);
|
|
|
| CHECK(RE2::FullMatch(zeros + "1e23", "(.*)", &v));
|
| CHECK_EQ(v, float(1e23));
|
| @@ -997,14 +1064,14 @@ TEST(RE2, UTF8) {
|
| // Check UTF-8 handling
|
| // Three Japanese characters (nihongo)
|
| const char utf8_string[] = {
|
| - 0xe6, 0x97, 0xa5, // 65e5
|
| - 0xe6, 0x9c, 0xac, // 627c
|
| - 0xe8, 0xaa, 0x9e, // 8a9e
|
| + (char)0xe6, (char)0x97, (char)0xa5, // 65e5
|
| + (char)0xe6, (char)0x9c, (char)0xac, // 627c
|
| + (char)0xe8, (char)0xaa, (char)0x9e, // 8a9e
|
| 0
|
| };
|
| const char utf8_pattern[] = {
|
| '.',
|
| - 0xe6, 0x9c, 0xac, // 627c
|
| + (char)0xe6, (char)0x9c, (char)0xac, // 627c
|
| '.',
|
| 0
|
| };
|
| @@ -1253,6 +1320,16 @@ TEST(RE2, NeverNewline) {
|
| }
|
| }
|
|
|
| +// Check that dot_nl option works.
|
| +TEST(RE2, DotNL) {
|
| + RE2::Options opt;
|
| + opt.set_dot_nl(true);
|
| + EXPECT_TRUE(RE2::PartialMatch("\n", RE2(".", opt)));
|
| + EXPECT_FALSE(RE2::PartialMatch("\n", RE2("(?-s).", opt)));
|
| + opt.set_never_nl(true);
|
| + EXPECT_FALSE(RE2::PartialMatch("\n", RE2(".", opt)));
|
| +}
|
| +
|
| // Check that there are no capturing groups in "never capture" mode.
|
| TEST(RE2, NeverCapture) {
|
| RE2::Options opt;
|
| @@ -1377,4 +1454,79 @@ TEST(RE2, RegexpToStringLossOfAnchor) {
|
| EXPECT_EQ(RE2("ca[t-z]$").Regexp()->ToString(), "ca[t-z](?-m:$)");
|
| }
|
|
|
| +// Issue 10131674
|
| +TEST(RE2, Bug10131674) {
|
| + // Some of these escapes describe values that do not fit in a byte.
|
| + RE2 re("\\140\\440\\174\\271\\150\\656\\106\\201\\004\\332", RE2::Latin1);
|
| + EXPECT_FALSE(re.ok());
|
| + EXPECT_FALSE(RE2::FullMatch("hello world", re));
|
| +}
|
| +
|
| +TEST(RE2, Bug18391750) {
|
| + // Stray write past end of match_ in nfa.cc, caught by fuzzing + address sanitizer.
|
| + const char t[] = {
|
| + (char)0x28, (char)0x28, (char)0xfc, (char)0xfc, (char)0x08, (char)0x08,
|
| + (char)0x26, (char)0x26, (char)0x28, (char)0xc2, (char)0x9b, (char)0xc5,
|
| + (char)0xc5, (char)0xd4, (char)0x8f, (char)0x8f, (char)0x69, (char)0x69,
|
| + (char)0xe7, (char)0x29, (char)0x7b, (char)0x37, (char)0x31, (char)0x31,
|
| + (char)0x7d, (char)0xae, (char)0x7c, (char)0x7c, (char)0xf3, (char)0x29,
|
| + (char)0xae, (char)0xae, (char)0x2e, (char)0x2a, (char)0x29, (char)0x00,
|
| + };
|
| + RE2::Options opt;
|
| + opt.set_encoding(RE2::Options::EncodingLatin1);
|
| + opt.set_longest_match(true);
|
| + opt.set_dot_nl(true);
|
| + opt.set_case_sensitive(false);
|
| + RE2 re(t, opt);
|
| + CHECK(re.ok());
|
| + RE2::PartialMatch(t, re);
|
| +}
|
| +
|
| +TEST(RE2, Bug18458852) {
|
| + // Bug in parser accepting invalid (too large) rune,
|
| + // causing compiler to fail in DCHECK in UTF-8
|
| + // character class code.
|
| + const char b[] = {
|
| + (char)0x28, (char)0x05, (char)0x05, (char)0x41, (char)0x41, (char)0x28,
|
| + (char)0x24, (char)0x5b, (char)0x5e, (char)0xf5, (char)0x87, (char)0x87,
|
| + (char)0x90, (char)0x29, (char)0x5d, (char)0x29, (char)0x29, (char)0x00,
|
| + };
|
| + RE2 re(b);
|
| + CHECK(!re.ok());
|
| +}
|
| +
|
| +TEST(RE2, Bug18523943) {
|
| + // Bug in bitstate: case kFailInst was merged into the default with LOG(DFATAL).
|
| +
|
| + RE2::Options opt;
|
| + const char a[] = {
|
| + (char)0x29, (char)0x29, (char)0x24, (char)0x00,
|
| + };
|
| + const char b[] = {
|
| + (char)0x28, (char)0x0a, (char)0x2a, (char)0x2a, (char)0x29, (char)0x00,
|
| + };
|
| + opt.set_log_errors(false);
|
| + opt.set_encoding(RE2::Options::EncodingLatin1);
|
| + opt.set_posix_syntax(true);
|
| + opt.set_longest_match(true);
|
| + opt.set_literal(false);
|
| + opt.set_never_nl(true);
|
| +
|
| + RE2 re((const char*)b, opt);
|
| + CHECK(re.ok());
|
| + string s1;
|
| + CHECK(!RE2::PartialMatch((const char*)a, re, &s1));
|
| +}
|
| +
|
| +TEST(RE2, Bug21371806) {
|
| + // Bug in parser accepting Unicode groups in Latin-1 mode,
|
| + // causing compiler to fail in DCHECK in prog.cc.
|
| +
|
| + RE2::Options opt;
|
| + opt.set_encoding(RE2::Options::EncodingLatin1);
|
| +
|
| + RE2 re("g\\p{Zl}]", opt);
|
| + CHECK(re.ok());
|
| +}
|
| +
|
| } // namespace re2
|
|
|