Index: third_party/re2/re2/testing/re2_test.cc |
diff --git a/third_party/re2/re2/testing/re2_test.cc b/third_party/re2/re2/testing/re2_test.cc |
index 911e8689ed5e28c78fe6cb228721570dee037ba0..a1d9c572f4dd4dffa87d506021bda022f9cdffa2 100644 |
--- a/third_party/re2/re2/testing/re2_test.cc |
+++ b/third_party/re2/re2/testing/re2_test.cc |
@@ -5,22 +5,18 @@ |
// TODO: Test extractions for PartialMatch/Consume |
-#include <sys/types.h> |
-#ifndef WIN32 |
+#include <errno.h> |
+#ifndef _MSC_VER |
+#include <unistd.h> /* for sysconf */ |
#include <sys/mman.h> |
#endif |
#include <sys/stat.h> |
-#include <errno.h> |
+#include <sys/types.h> |
#include <vector> |
#include "util/test.h" |
#include "re2/re2.h" |
#include "re2/regexp.h" |
-#ifdef WIN32 |
-#include <stdio.h> |
-#define snprintf _snprintf |
-#endif |
- |
DECLARE_bool(logtostderr); |
namespace re2 { |
@@ -180,7 +176,7 @@ TEST(RE2, Replace) { |
{ "", NULL, NULL, NULL, NULL, 0 } |
}; |
- for (const ReplaceTest *t = tests; t->original != NULL; ++t) { |
+ for (const ReplaceTest* t = tests; t->original != NULL; t++) { |
VLOG(1) << StringPrintf("\"%s\" =~ s/%s/%s/g", t->original, t->regexp, t->rewrite); |
string one(t->original); |
CHECK(RE2::Replace(&one, t->regexp, t->rewrite)); |
@@ -373,12 +369,12 @@ TEST(RE2, Match) { |
CHECK_EQ(port, 9000); |
} |
-static void TestRecursion(int size, const char *pattern) { |
+static void TestRecursion(int size, const char* pattern) { |
// Fill up a string repeating the pattern given |
string domain; |
domain.resize(size); |
- int patlen = strlen(pattern); |
- for (int i = 0; i < size; ++i) { |
+ size_t patlen = strlen(pattern); |
+ for (int i = 0; i < size; i++) { |
domain[i] = pattern[i % patlen]; |
} |
// Just make sure it doesn't crash due to too much recursion. |
@@ -392,8 +388,8 @@ static void TestQuoteMeta(string unquoted, |
const RE2::Options& options = RE2::DefaultOptions) { |
string quoted = RE2::QuoteMeta(unquoted); |
RE2 re(quoted, options); |
- EXPECT_TRUE_M(RE2::FullMatch(unquoted, re), |
- "Unquoted='" + unquoted + "', quoted='" + quoted + "'."); |
+ EXPECT_TRUE(RE2::FullMatch(unquoted, re)) |
+ << "Unquoted='" << unquoted << "', quoted='" << quoted << "'."; |
} |
// A meta-quoted string, interpreted as a pattern, should always match |
@@ -402,8 +398,8 @@ static void NegativeTestQuoteMeta(string unquoted, string should_not_match, |
const RE2::Options& options = RE2::DefaultOptions) { |
string quoted = RE2::QuoteMeta(unquoted); |
RE2 re(quoted, options); |
- EXPECT_FALSE_M(RE2::FullMatch(should_not_match, re), |
- "Unquoted='" + unquoted + "', quoted='" + quoted + "'."); |
+ EXPECT_FALSE(RE2::FullMatch(should_not_match, re)) |
+ << "Unquoted='" << unquoted << "', quoted='" << quoted << "'."; |
} |
// Tests that quoted meta characters match their original strings, |
@@ -469,13 +465,38 @@ TEST(QuoteMeta, HasNull) { |
TEST(ProgramSize, BigProgram) { |
RE2 re_simple("simple regexp"); |
RE2 re_medium("medium.*regexp"); |
- RE2 re_complex("hard.{1,128}regexp"); |
+ RE2 re_complex("complex.{1,128}regexp"); |
CHECK_GT(re_simple.ProgramSize(), 0); |
CHECK_GT(re_medium.ProgramSize(), re_simple.ProgramSize()); |
CHECK_GT(re_complex.ProgramSize(), re_medium.ProgramSize()); |
} |
+TEST(ProgramFanout, BigProgram) { |
+ RE2 re1("(?:(?:(?:(?:(?:.)?){1})*)+)"); |
+ RE2 re10("(?:(?:(?:(?:(?:.)?){10})*)+)"); |
+ RE2 re100("(?:(?:(?:(?:(?:.)?){100})*)+)"); |
+ RE2 re1000("(?:(?:(?:(?:(?:.)?){1000})*)+)"); |
+ |
+ map<int, int> histogram; |
+ |
+ // 3 is the largest non-empty bucket and has 1 element. |
+ CHECK_EQ(3, re1.ProgramFanout(&histogram)); |
+ CHECK_EQ(1, histogram[3]); |
+ |
+ // 7 is the largest non-empty bucket and has 10 elements. |
+ CHECK_EQ(7, re10.ProgramFanout(&histogram)); |
+ CHECK_EQ(10, histogram[7]); |
+ |
+ // 10 is the largest non-empty bucket and has 100 elements. |
+ CHECK_EQ(10, re100.ProgramFanout(&histogram)); |
+ CHECK_EQ(100, histogram[10]); |
+ |
+ // 13 is the largest non-empty bucket and has 1000 elements. |
+ CHECK_EQ(13, re1000.ProgramFanout(&histogram)); |
+ CHECK_EQ(1000, histogram[13]); |
+} |
+ |
// Issue 956519: handling empty character sets was |
// causing NULL dereference. This tests a few empty character sets. |
// (The way to get an empty character set is to negate a full one.) |
@@ -490,6 +511,21 @@ TEST(EmptyCharset, Fuzz) { |
CHECK(!RE2(empties[i]).Match("abc", 0, 3, RE2::UNANCHORED, NULL, 0)); |
} |
+// Bitstate assumes that kInstFail instructions in |
+// alternations or capture groups have been "compiled away". |
+TEST(EmptyCharset, BitstateAssumptions) { |
+ // Captures trigger use of Bitstate. |
+ static const char *nop_empties[] = { |
+ "((((()))))" "[^\\S\\s]?", |
+ "((((()))))" "([^\\S\\s])?", |
+ "((((()))))" "([^\\S\\s]|[^\\S\\s])?", |
+ "((((()))))" "(([^\\S\\s]|[^\\S\\s])|)" |
+ }; |
+ StringPiece group[6]; |
+ for (int i = 0; i < arraysize(nop_empties); i++) |
+ CHECK(RE2(nop_empties[i]).Match("", 0, 0, RE2::UNANCHORED, group, 6)); |
+} |
+ |
// Test that named groups work correctly. |
TEST(Capture, NamedGroups) { |
{ |
@@ -511,6 +547,34 @@ TEST(Capture, NamedGroups) { |
} |
} |
+TEST(RE2, CapturedGroupTest) { |
+ RE2 re("directions from (?P<S>.*) to (?P<D>.*)"); |
+ int num_groups = re.NumberOfCapturingGroups(); |
+ EXPECT_EQ(2, num_groups); |
+ string args[4]; |
+ RE2::Arg arg0(&args[0]); |
+ RE2::Arg arg1(&args[1]); |
+ RE2::Arg arg2(&args[2]); |
+ RE2::Arg arg3(&args[3]); |
+ |
+ const RE2::Arg* const matches[4] = {&arg0, &arg1, &arg2, &arg3}; |
+ EXPECT_TRUE(RE2::FullMatchN("directions from mountain view to san jose", |
+ re, matches, num_groups)); |
+ const map<string, int>& named_groups = re.NamedCapturingGroups(); |
+ EXPECT_TRUE(named_groups.find("S") != named_groups.end()); |
+ EXPECT_TRUE(named_groups.find("D") != named_groups.end()); |
+ |
+ // The named group index is 1-based. |
+ int source_group_index = named_groups.find("S")->second; |
+ int destination_group_index = named_groups.find("D")->second; |
+ EXPECT_EQ(1, source_group_index); |
+ EXPECT_EQ(2, destination_group_index); |
+ |
+ // The args is zero-based. |
+ EXPECT_EQ("mountain view", args[source_group_index - 1]); |
+ EXPECT_EQ("san jose", args[destination_group_index - 1]); |
+} |
+ |
TEST(RE2, FullMatchWithNoArgs) { |
CHECK(RE2::FullMatch("h", "h")); |
CHECK(RE2::FullMatch("hello", "hello")); |
@@ -664,10 +728,12 @@ TEST(RE2, FullMatchTypedNullArg) { |
CHECK(!RE2::FullMatch("hello", "(.*)", (float*)NULL)); |
} |
-#ifndef WIN32 |
// Check that numeric parsing code does not read past the end of |
// the number being parsed. |
+// This implementation requires mmap(2) et al. and thus cannot |
+// be used unless they are available. |
TEST(RE2, NULTerminated) { |
+#if defined(_POSIX_MAPPED_FILES) && _POSIX_MAPPED_FILES > 0 |
char *v; |
int x; |
long pagesize = sysconf(_SC_PAGE_SIZE); |
@@ -685,12 +751,12 @@ TEST(RE2, NULTerminated) { |
x = 0; |
CHECK(RE2::FullMatch(StringPiece(v + pagesize - 1, 1), "(.*)", &x)); |
CHECK_EQ(x, 1); |
-} |
#endif |
+} |
TEST(RE2, FullMatchTypeTests) { |
// Type tests |
- string zeros(100, '0'); |
+ string zeros(1000, '0'); |
{ |
char c; |
CHECK(RE2::FullMatch("Hello", "(H)ello", &c)); |
@@ -792,12 +858,13 @@ TEST(RE2, FullMatchTypeTests) { |
} |
TEST(RE2, FloatingPointFullMatchTypes) { |
- string zeros(100, '0'); |
+ string zeros(1000, '0'); |
{ |
float v; |
CHECK(RE2::FullMatch("100", "(.*)", &v)); CHECK_EQ(v, 100); |
CHECK(RE2::FullMatch("-100.", "(.*)", &v)); CHECK_EQ(v, -100); |
CHECK(RE2::FullMatch("1e23", "(.*)", &v)); CHECK_EQ(v, float(1e23)); |
+ CHECK(RE2::FullMatch(" 100", "(.*)", &v)); CHECK_EQ(v, 100); |
CHECK(RE2::FullMatch(zeros + "1e23", "(.*)", &v)); |
CHECK_EQ(v, float(1e23)); |
@@ -997,14 +1064,14 @@ TEST(RE2, UTF8) { |
// Check UTF-8 handling |
// Three Japanese characters (nihongo) |
const char utf8_string[] = { |
- 0xe6, 0x97, 0xa5, // 65e5 |
- 0xe6, 0x9c, 0xac, // 627c |
- 0xe8, 0xaa, 0x9e, // 8a9e |
+ (char)0xe6, (char)0x97, (char)0xa5, // 65e5 |
+ (char)0xe6, (char)0x9c, (char)0xac, // 627c |
+ (char)0xe8, (char)0xaa, (char)0x9e, // 8a9e |
0 |
}; |
const char utf8_pattern[] = { |
'.', |
- 0xe6, 0x9c, 0xac, // 627c |
+ (char)0xe6, (char)0x9c, (char)0xac, // 627c |
'.', |
0 |
}; |
@@ -1253,6 +1320,16 @@ TEST(RE2, NeverNewline) { |
} |
} |
+// Check that dot_nl option works. |
+TEST(RE2, DotNL) { |
+ RE2::Options opt; |
+ opt.set_dot_nl(true); |
+ EXPECT_TRUE(RE2::PartialMatch("\n", RE2(".", opt))); |
+ EXPECT_FALSE(RE2::PartialMatch("\n", RE2("(?-s).", opt))); |
+ opt.set_never_nl(true); |
+ EXPECT_FALSE(RE2::PartialMatch("\n", RE2(".", opt))); |
+} |
+ |
// Check that there are no capturing groups in "never capture" mode. |
TEST(RE2, NeverCapture) { |
RE2::Options opt; |
@@ -1377,4 +1454,79 @@ TEST(RE2, RegexpToStringLossOfAnchor) { |
EXPECT_EQ(RE2("ca[t-z]$").Regexp()->ToString(), "ca[t-z](?-m:$)"); |
} |
+// Issue 10131674 |
+TEST(RE2, Bug10131674) { |
+ // Some of these escapes describe values that do not fit in a byte. |
+ RE2 re("\\140\\440\\174\\271\\150\\656\\106\\201\\004\\332", RE2::Latin1); |
+ EXPECT_FALSE(re.ok()); |
+ EXPECT_FALSE(RE2::FullMatch("hello world", re)); |
+} |
+ |
+TEST(RE2, Bug18391750) { |
+ // Stray write past end of match_ in nfa.cc, caught by fuzzing + address sanitizer. |
+ const char t[] = { |
+ (char)0x28, (char)0x28, (char)0xfc, (char)0xfc, (char)0x08, (char)0x08, |
+ (char)0x26, (char)0x26, (char)0x28, (char)0xc2, (char)0x9b, (char)0xc5, |
+ (char)0xc5, (char)0xd4, (char)0x8f, (char)0x8f, (char)0x69, (char)0x69, |
+ (char)0xe7, (char)0x29, (char)0x7b, (char)0x37, (char)0x31, (char)0x31, |
+ (char)0x7d, (char)0xae, (char)0x7c, (char)0x7c, (char)0xf3, (char)0x29, |
+ (char)0xae, (char)0xae, (char)0x2e, (char)0x2a, (char)0x29, (char)0x00, |
+ }; |
+ RE2::Options opt; |
+ opt.set_encoding(RE2::Options::EncodingLatin1); |
+ opt.set_longest_match(true); |
+ opt.set_dot_nl(true); |
+ opt.set_case_sensitive(false); |
+ RE2 re(t, opt); |
+ CHECK(re.ok()); |
+ RE2::PartialMatch(t, re); |
+} |
+ |
+TEST(RE2, Bug18458852) { |
+ // Bug in parser accepting invalid (too large) rune, |
+ // causing compiler to fail in DCHECK in UTF-8 |
+ // character class code. |
+ const char b[] = { |
+ (char)0x28, (char)0x05, (char)0x05, (char)0x41, (char)0x41, (char)0x28, |
+ (char)0x24, (char)0x5b, (char)0x5e, (char)0xf5, (char)0x87, (char)0x87, |
+ (char)0x90, (char)0x29, (char)0x5d, (char)0x29, (char)0x29, (char)0x00, |
+ }; |
+ RE2 re(b); |
+ CHECK(!re.ok()); |
+} |
+ |
+TEST(RE2, Bug18523943) { |
+ // Bug in bitstate: case kFailInst was merged into the default with LOG(DFATAL). |
+ |
+ RE2::Options opt; |
+ const char a[] = { |
+ (char)0x29, (char)0x29, (char)0x24, (char)0x00, |
+ }; |
+ const char b[] = { |
+ (char)0x28, (char)0x0a, (char)0x2a, (char)0x2a, (char)0x29, (char)0x00, |
+ }; |
+ opt.set_log_errors(false); |
+ opt.set_encoding(RE2::Options::EncodingLatin1); |
+ opt.set_posix_syntax(true); |
+ opt.set_longest_match(true); |
+ opt.set_literal(false); |
+ opt.set_never_nl(true); |
+ |
+ RE2 re((const char*)b, opt); |
+ CHECK(re.ok()); |
+ string s1; |
+ CHECK(!RE2::PartialMatch((const char*)a, re, &s1)); |
+} |
+ |
+TEST(RE2, Bug21371806) { |
+ // Bug in parser accepting Unicode groups in Latin-1 mode, |
+ // causing compiler to fail in DCHECK in prog.cc. |
+ |
+ RE2::Options opt; |
+ opt.set_encoding(RE2::Options::EncodingLatin1); |
+ |
+ RE2 re("g\\p{Zl}]", opt); |
+ CHECK(re.ok()); |
+} |
+ |
} // namespace re2 |