OLD | NEW |
(Empty) | |
| 1 // -*- coding: utf-8 -*- |
| 2 // Copyright 2002-2009 The RE2 Authors. All Rights Reserved. |
| 3 // Use of this source code is governed by a BSD-style |
| 4 // license that can be found in the LICENSE file. |
| 5 |
| 6 // TODO: Test extractions for PartialMatch/Consume |
| 7 |
| 8 #include <sys/types.h> |
| 9 #ifndef WIN32 |
| 10 #include <sys/mman.h> |
| 11 #endif |
| 12 #include <sys/stat.h> |
| 13 #include <errno.h> |
| 14 #include <vector> |
| 15 #include "util/test.h" |
| 16 #include "re2/re2.h" |
| 17 #include "re2/regexp.h" |
| 18 |
| 19 #ifdef WIN32 |
| 20 #include <stdio.h> |
| 21 #define snprintf _snprintf |
| 22 #endif |
| 23 |
| 24 DECLARE_bool(logtostderr); |
| 25 |
| 26 namespace re2 { |
| 27 |
| 28 TEST(RE2, HexTests) { |
| 29 |
| 30 VLOG(1) << "hex tests"; |
| 31 |
| 32 #define CHECK_HEX(type, value) \ |
| 33 do { \ |
| 34 type v; \ |
| 35 CHECK(RE2::FullMatch(#value, "([0-9a-fA-F]+)[uUlL]*", RE2::Hex(&v))); \ |
| 36 CHECK_EQ(v, 0x ## value); \ |
| 37 CHECK(RE2::FullMatch("0x" #value, "([0-9a-fA-FxX]+)[uUlL]*", RE2::CRadix(&v)
)); \ |
| 38 CHECK_EQ(v, 0x ## value); \ |
| 39 } while(0) |
| 40 |
| 41 CHECK_HEX(short, 2bad); |
| 42 CHECK_HEX(unsigned short, 2badU); |
| 43 CHECK_HEX(int, dead); |
| 44 CHECK_HEX(unsigned int, deadU); |
| 45 CHECK_HEX(long, 7eadbeefL); |
| 46 CHECK_HEX(unsigned long, deadbeefUL); |
| 47 CHECK_HEX(long long, 12345678deadbeefLL); |
| 48 CHECK_HEX(unsigned long long, cafebabedeadbeefULL); |
| 49 |
| 50 #undef CHECK_HEX |
| 51 } |
| 52 |
| 53 TEST(RE2, OctalTests) { |
| 54 VLOG(1) << "octal tests"; |
| 55 |
| 56 #define CHECK_OCTAL(type, value) \ |
| 57 do { \ |
| 58 type v; \ |
| 59 CHECK(RE2::FullMatch(#value, "([0-7]+)[uUlL]*", RE2::Octal(&v))); \ |
| 60 CHECK_EQ(v, 0 ## value); \ |
| 61 CHECK(RE2::FullMatch("0" #value, "([0-9a-fA-FxX]+)[uUlL]*", RE2::CRadix(&v))
); \ |
| 62 CHECK_EQ(v, 0 ## value); \ |
| 63 } while(0) |
| 64 |
| 65 CHECK_OCTAL(short, 77777); |
| 66 CHECK_OCTAL(unsigned short, 177777U); |
| 67 CHECK_OCTAL(int, 17777777777); |
| 68 CHECK_OCTAL(unsigned int, 37777777777U); |
| 69 CHECK_OCTAL(long, 17777777777L); |
| 70 CHECK_OCTAL(unsigned long, 37777777777UL); |
| 71 CHECK_OCTAL(long long, 777777777777777777777LL); |
| 72 CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL); |
| 73 |
| 74 #undef CHECK_OCTAL |
| 75 } |
| 76 |
| 77 TEST(RE2, DecimalTests) { |
| 78 VLOG(1) << "decimal tests"; |
| 79 |
| 80 #define CHECK_DECIMAL(type, value) \ |
| 81 do { \ |
| 82 type v; \ |
| 83 CHECK(RE2::FullMatch(#value, "(-?[0-9]+)[uUlL]*", &v)); \ |
| 84 CHECK_EQ(v, value); \ |
| 85 CHECK(RE2::FullMatch(#value, "(-?[0-9a-fA-FxX]+)[uUlL]*", RE2::CRadix(&v)));
\ |
| 86 CHECK_EQ(v, value); \ |
| 87 } while(0) |
| 88 |
| 89 CHECK_DECIMAL(short, -1); |
| 90 CHECK_DECIMAL(unsigned short, 9999); |
| 91 CHECK_DECIMAL(int, -1000); |
| 92 CHECK_DECIMAL(unsigned int, 12345U); |
| 93 CHECK_DECIMAL(long, -10000000L); |
| 94 CHECK_DECIMAL(unsigned long, 3083324652U); |
| 95 CHECK_DECIMAL(long long, -100000000000000LL); |
| 96 CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL); |
| 97 |
| 98 #undef CHECK_DECIMAL |
| 99 } |
| 100 |
| 101 TEST(RE2, Replace) { |
| 102 VLOG(1) << "TestReplace"; |
| 103 |
| 104 struct ReplaceTest { |
| 105 const char *regexp; |
| 106 const char *rewrite; |
| 107 const char *original; |
| 108 const char *single; |
| 109 const char *global; |
| 110 int greplace_count; |
| 111 }; |
| 112 static const ReplaceTest tests[] = { |
| 113 { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)", |
| 114 "\\2\\1ay", |
| 115 "the quick brown fox jumps over the lazy dogs.", |
| 116 "ethay quick brown fox jumps over the lazy dogs.", |
| 117 "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.", |
| 118 9 }, |
| 119 { "\\w+", |
| 120 "\\0-NOSPAM", |
| 121 "abcd.efghi@google.com", |
| 122 "abcd-NOSPAM.efghi@google.com", |
| 123 "abcd-NOSPAM.efghi-NOSPAM@google-NOSPAM.com-NOSPAM", |
| 124 4 }, |
| 125 { "^", |
| 126 "(START)", |
| 127 "foo", |
| 128 "(START)foo", |
| 129 "(START)foo", |
| 130 1 }, |
| 131 { "^", |
| 132 "(START)", |
| 133 "", |
| 134 "(START)", |
| 135 "(START)", |
| 136 1 }, |
| 137 { "$", |
| 138 "(END)", |
| 139 "", |
| 140 "(END)", |
| 141 "(END)", |
| 142 1 }, |
| 143 { "b", |
| 144 "bb", |
| 145 "ababababab", |
| 146 "abbabababab", |
| 147 "abbabbabbabbabb", |
| 148 5 }, |
| 149 { "b", |
| 150 "bb", |
| 151 "bbbbbb", |
| 152 "bbbbbbb", |
| 153 "bbbbbbbbbbbb", |
| 154 6 }, |
| 155 { "b+", |
| 156 "bb", |
| 157 "bbbbbb", |
| 158 "bb", |
| 159 "bb", |
| 160 1 }, |
| 161 { "b*", |
| 162 "bb", |
| 163 "bbbbbb", |
| 164 "bb", |
| 165 "bb", |
| 166 1 }, |
| 167 { "b*", |
| 168 "bb", |
| 169 "aaaaa", |
| 170 "bbaaaaa", |
| 171 "bbabbabbabbabbabb", |
| 172 6 }, |
| 173 // Check newline handling |
| 174 { "a.*a", |
| 175 "(\\0)", |
| 176 "aba\naba", |
| 177 "(aba)\naba", |
| 178 "(aba)\n(aba)", |
| 179 2 }, |
| 180 { "", NULL, NULL, NULL, NULL, 0 } |
| 181 }; |
| 182 |
| 183 for (const ReplaceTest *t = tests; t->original != NULL; ++t) { |
| 184 VLOG(1) << StringPrintf("\"%s\" =~ s/%s/%s/g", t->original, t->regexp, t->re
write); |
| 185 string one(t->original); |
| 186 CHECK(RE2::Replace(&one, t->regexp, t->rewrite)); |
| 187 CHECK_EQ(one, t->single); |
| 188 string all(t->original); |
| 189 CHECK_EQ(RE2::GlobalReplace(&all, t->regexp, t->rewrite), t->greplace_count) |
| 190 << "Got: " << all; |
| 191 CHECK_EQ(all, t->global); |
| 192 } |
| 193 } |
| 194 |
| 195 static void TestCheckRewriteString(const char* regexp, const char* rewrite, |
| 196 bool expect_ok) { |
| 197 string error; |
| 198 RE2 exp(regexp); |
| 199 bool actual_ok = exp.CheckRewriteString(rewrite, &error); |
| 200 EXPECT_EQ(expect_ok, actual_ok) << " for " << rewrite << " error: " << error; |
| 201 } |
| 202 |
| 203 TEST(CheckRewriteString, all) { |
| 204 TestCheckRewriteString("abc", "foo", true); |
| 205 TestCheckRewriteString("abc", "foo\\", false); |
| 206 TestCheckRewriteString("abc", "foo\\0bar", true); |
| 207 |
| 208 TestCheckRewriteString("a(b)c", "foo", true); |
| 209 TestCheckRewriteString("a(b)c", "foo\\0bar", true); |
| 210 TestCheckRewriteString("a(b)c", "foo\\1bar", true); |
| 211 TestCheckRewriteString("a(b)c", "foo\\2bar", false); |
| 212 TestCheckRewriteString("a(b)c", "f\\\\2o\\1o", true); |
| 213 |
| 214 TestCheckRewriteString("a(b)(c)", "foo\\12", true); |
| 215 TestCheckRewriteString("a(b)(c)", "f\\2o\\1o", true); |
| 216 TestCheckRewriteString("a(b)(c)", "f\\oo\\1", false); |
| 217 } |
| 218 |
| 219 TEST(RE2, Extract) { |
| 220 VLOG(1) << "TestExtract"; |
| 221 |
| 222 string s; |
| 223 |
| 224 CHECK(RE2::Extract("boris@kremvax.ru", "(.*)@([^.]*)", "\\2!\\1", &s)); |
| 225 CHECK_EQ(s, "kremvax!boris"); |
| 226 |
| 227 CHECK(RE2::Extract("foo", ".*", "'\\0'", &s)); |
| 228 CHECK_EQ(s, "'foo'"); |
| 229 // check that false match doesn't overwrite |
| 230 CHECK(!RE2::Extract("baz", "bar", "'\\0'", &s)); |
| 231 CHECK_EQ(s, "'foo'"); |
| 232 } |
| 233 |
| 234 TEST(RE2, Consume) { |
| 235 VLOG(1) << "TestConsume"; |
| 236 |
| 237 RE2 r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace |
| 238 string word; |
| 239 |
| 240 string s(" aaa b!@#$@#$cccc"); |
| 241 StringPiece input(s); |
| 242 |
| 243 CHECK(RE2::Consume(&input, r, &word)); |
| 244 CHECK_EQ(word, "aaa") << " input: " << input; |
| 245 CHECK(RE2::Consume(&input, r, &word)); |
| 246 CHECK_EQ(word, "b") << " input: " << input; |
| 247 CHECK(! RE2::Consume(&input, r, &word)) << " input: " << input; |
| 248 } |
| 249 |
| 250 TEST(RE2, ConsumeN) { |
| 251 const string s(" one two three 4"); |
| 252 StringPiece input(s); |
| 253 |
| 254 RE2::Arg argv[2]; |
| 255 const RE2::Arg* const args[2] = { &argv[0], &argv[1] }; |
| 256 |
| 257 // 0 arg |
| 258 EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)", args, 0)); // Skips "one". |
| 259 |
| 260 // 1 arg |
| 261 string word; |
| 262 argv[0] = &word; |
| 263 EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)", args, 1)); |
| 264 EXPECT_EQ("two", word); |
| 265 |
| 266 // Multi-args |
| 267 int n; |
| 268 argv[1] = &n; |
| 269 EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)\\s*(\\d+)", args, 2)); |
| 270 EXPECT_EQ("three", word); |
| 271 EXPECT_EQ(4, n); |
| 272 } |
| 273 |
| 274 TEST(RE2, FindAndConsume) { |
| 275 VLOG(1) << "TestFindAndConsume"; |
| 276 |
| 277 RE2 r("(\\w+)"); // matches a word |
| 278 string word; |
| 279 |
| 280 string s(" aaa b!@#$@#$cccc"); |
| 281 StringPiece input(s); |
| 282 |
| 283 CHECK(RE2::FindAndConsume(&input, r, &word)); |
| 284 CHECK_EQ(word, "aaa"); |
| 285 CHECK(RE2::FindAndConsume(&input, r, &word)); |
| 286 CHECK_EQ(word, "b"); |
| 287 CHECK(RE2::FindAndConsume(&input, r, &word)); |
| 288 CHECK_EQ(word, "cccc"); |
| 289 CHECK(! RE2::FindAndConsume(&input, r, &word)); |
| 290 |
| 291 // Check that FindAndConsume works without any submatches. |
| 292 // Earlier version used uninitialized data for |
| 293 // length to consume. |
| 294 input = "aaa"; |
| 295 CHECK(RE2::FindAndConsume(&input, "aaa")); |
| 296 CHECK_EQ(input, ""); |
| 297 } |
| 298 |
| 299 TEST(RE2, FindAndConsumeN) { |
| 300 const string s(" one two three 4"); |
| 301 StringPiece input(s); |
| 302 |
| 303 RE2::Arg argv[2]; |
| 304 const RE2::Arg* const args[2] = { &argv[0], &argv[1] }; |
| 305 |
| 306 // 0 arg |
| 307 EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)", args, 0)); // Skips "one". |
| 308 |
| 309 // 1 arg |
| 310 string word; |
| 311 argv[0] = &word; |
| 312 EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)", args, 1)); |
| 313 EXPECT_EQ("two", word); |
| 314 |
| 315 // Multi-args |
| 316 int n; |
| 317 argv[1] = &n; |
| 318 EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)\\s*(\\d+)", args, 2)); |
| 319 EXPECT_EQ("three", word); |
| 320 EXPECT_EQ(4, n); |
| 321 } |
| 322 |
| 323 TEST(RE2, MatchNumberPeculiarity) { |
| 324 VLOG(1) << "TestMatchNumberPeculiarity"; |
| 325 |
| 326 RE2 r("(foo)|(bar)|(baz)"); |
| 327 string word1; |
| 328 string word2; |
| 329 string word3; |
| 330 |
| 331 CHECK(RE2::PartialMatch("foo", r, &word1, &word2, &word3)); |
| 332 CHECK_EQ(word1, "foo"); |
| 333 CHECK_EQ(word2, ""); |
| 334 CHECK_EQ(word3, ""); |
| 335 CHECK(RE2::PartialMatch("bar", r, &word1, &word2, &word3)); |
| 336 CHECK_EQ(word1, ""); |
| 337 CHECK_EQ(word2, "bar"); |
| 338 CHECK_EQ(word3, ""); |
| 339 CHECK(RE2::PartialMatch("baz", r, &word1, &word2, &word3)); |
| 340 CHECK_EQ(word1, ""); |
| 341 CHECK_EQ(word2, ""); |
| 342 CHECK_EQ(word3, "baz"); |
| 343 CHECK(!RE2::PartialMatch("f", r, &word1, &word2, &word3)); |
| 344 |
| 345 string a; |
| 346 CHECK(RE2::FullMatch("hello", "(foo)|hello", &a)); |
| 347 CHECK_EQ(a, ""); |
| 348 } |
| 349 |
| 350 TEST(RE2, Match) { |
| 351 RE2 re("((\\w+):([0-9]+))"); // extracts host and port |
| 352 StringPiece group[4]; |
| 353 |
| 354 // No match. |
| 355 StringPiece s = "zyzzyva"; |
| 356 CHECK(!re.Match(s, 0, s.size(), RE2::UNANCHORED, |
| 357 group, arraysize(group))); |
| 358 |
| 359 // Matches and extracts. |
| 360 s = "a chrisr:9000 here"; |
| 361 CHECK(re.Match(s, 0, s.size(), RE2::UNANCHORED, |
| 362 group, arraysize(group))); |
| 363 CHECK_EQ(group[0], "chrisr:9000"); |
| 364 CHECK_EQ(group[1], "chrisr:9000"); |
| 365 CHECK_EQ(group[2], "chrisr"); |
| 366 CHECK_EQ(group[3], "9000"); |
| 367 |
| 368 string all, host; |
| 369 int port; |
| 370 CHECK(RE2::PartialMatch("a chrisr:9000 here", re, &all, &host, &port)); |
| 371 CHECK_EQ(all, "chrisr:9000"); |
| 372 CHECK_EQ(host, "chrisr"); |
| 373 CHECK_EQ(port, 9000); |
| 374 } |
| 375 |
| 376 static void TestRecursion(int size, const char *pattern) { |
| 377 // Fill up a string repeating the pattern given |
| 378 string domain; |
| 379 domain.resize(size); |
| 380 int patlen = strlen(pattern); |
| 381 for (int i = 0; i < size; ++i) { |
| 382 domain[i] = pattern[i % patlen]; |
| 383 } |
| 384 // Just make sure it doesn't crash due to too much recursion. |
| 385 RE2 re("([a-zA-Z0-9]|-)+(\\.([a-zA-Z0-9]|-)+)*(\\.)?", RE2::Quiet); |
| 386 RE2::FullMatch(domain, re); |
| 387 } |
| 388 |
| 389 // A meta-quoted string, interpreted as a pattern, should always match |
| 390 // the original unquoted string. |
| 391 static void TestQuoteMeta(string unquoted, |
| 392 const RE2::Options& options = RE2::DefaultOptions) { |
| 393 string quoted = RE2::QuoteMeta(unquoted); |
| 394 RE2 re(quoted, options); |
| 395 EXPECT_TRUE_M(RE2::FullMatch(unquoted, re), |
| 396 "Unquoted='" + unquoted + "', quoted='" + quoted + "'."); |
| 397 } |
| 398 |
| 399 // A meta-quoted string, interpreted as a pattern, should always match |
| 400 // the original unquoted string. |
| 401 static void NegativeTestQuoteMeta(string unquoted, string should_not_match, |
| 402 const RE2::Options& options = RE2::DefaultOpti
ons) { |
| 403 string quoted = RE2::QuoteMeta(unquoted); |
| 404 RE2 re(quoted, options); |
| 405 EXPECT_FALSE_M(RE2::FullMatch(should_not_match, re), |
| 406 "Unquoted='" + unquoted + "', quoted='" + quoted + "'."); |
| 407 } |
| 408 |
| 409 // Tests that quoted meta characters match their original strings, |
| 410 // and that a few things that shouldn't match indeed do not. |
| 411 TEST(QuoteMeta, Simple) { |
| 412 TestQuoteMeta("foo"); |
| 413 TestQuoteMeta("foo.bar"); |
| 414 TestQuoteMeta("foo\\.bar"); |
| 415 TestQuoteMeta("[1-9]"); |
| 416 TestQuoteMeta("1.5-2.0?"); |
| 417 TestQuoteMeta("\\d"); |
| 418 TestQuoteMeta("Who doesn't like ice cream?"); |
| 419 TestQuoteMeta("((a|b)c?d*e+[f-h]i)"); |
| 420 TestQuoteMeta("((?!)xxx).*yyy"); |
| 421 TestQuoteMeta("(["); |
| 422 } |
| 423 TEST(QuoteMeta, SimpleNegative) { |
| 424 NegativeTestQuoteMeta("foo", "bar"); |
| 425 NegativeTestQuoteMeta("...", "bar"); |
| 426 NegativeTestQuoteMeta("\\.", "."); |
| 427 NegativeTestQuoteMeta("\\.", ".."); |
| 428 NegativeTestQuoteMeta("(a)", "a"); |
| 429 NegativeTestQuoteMeta("(a|b)", "a"); |
| 430 NegativeTestQuoteMeta("(a|b)", "(a)"); |
| 431 NegativeTestQuoteMeta("(a|b)", "a|b"); |
| 432 NegativeTestQuoteMeta("[0-9]", "0"); |
| 433 NegativeTestQuoteMeta("[0-9]", "0-9"); |
| 434 NegativeTestQuoteMeta("[0-9]", "[9]"); |
| 435 NegativeTestQuoteMeta("((?!)xxx)", "xxx"); |
| 436 } |
| 437 |
| 438 TEST(QuoteMeta, Latin1) { |
| 439 TestQuoteMeta("3\xb2 = 9", RE2::Latin1); |
| 440 } |
| 441 |
| 442 TEST(QuoteMeta, UTF8) { |
| 443 TestQuoteMeta("Plácido Domingo"); |
| 444 TestQuoteMeta("xyz"); // No fancy utf8. |
| 445 TestQuoteMeta("\xc2\xb0"); // 2-byte utf8 -- a degree symbol. |
| 446 TestQuoteMeta("27\xc2\xb0 degrees"); // As a middle character. |
| 447 TestQuoteMeta("\xe2\x80\xb3"); // 3-byte utf8 -- a double prime. |
| 448 TestQuoteMeta("\xf0\x9d\x85\x9f"); // 4-byte utf8 -- a music note. |
| 449 TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, this should |
| 450 // still work. |
| 451 NegativeTestQuoteMeta("27\xc2\xb0", |
| 452 "27\\\xc2\\\xb0"); // 2-byte utf8 -- a degree symbol. |
| 453 } |
| 454 |
| 455 TEST(QuoteMeta, HasNull) { |
| 456 string has_null; |
| 457 |
| 458 // string with one null character |
| 459 has_null += '\0'; |
| 460 TestQuoteMeta(has_null); |
| 461 NegativeTestQuoteMeta(has_null, ""); |
| 462 |
| 463 // Don't want null-followed-by-'1' to be interpreted as '\01'. |
| 464 has_null += '1'; |
| 465 TestQuoteMeta(has_null); |
| 466 NegativeTestQuoteMeta(has_null, "\1"); |
| 467 } |
| 468 |
| 469 TEST(ProgramSize, BigProgram) { |
| 470 RE2 re_simple("simple regexp"); |
| 471 RE2 re_medium("medium.*regexp"); |
| 472 RE2 re_complex("hard.{1,128}regexp"); |
| 473 |
| 474 CHECK_GT(re_simple.ProgramSize(), 0); |
| 475 CHECK_GT(re_medium.ProgramSize(), re_simple.ProgramSize()); |
| 476 CHECK_GT(re_complex.ProgramSize(), re_medium.ProgramSize()); |
| 477 } |
| 478 |
| 479 // Issue 956519: handling empty character sets was |
| 480 // causing NULL dereference. This tests a few empty character sets. |
| 481 // (The way to get an empty character set is to negate a full one.) |
| 482 TEST(EmptyCharset, Fuzz) { |
| 483 static const char *empties[] = { |
| 484 "[^\\S\\s]", |
| 485 "[^\\S[:space:]]", |
| 486 "[^\\D\\d]", |
| 487 "[^\\D[:digit:]]" |
| 488 }; |
| 489 for (int i = 0; i < arraysize(empties); i++) |
| 490 CHECK(!RE2(empties[i]).Match("abc", 0, 3, RE2::UNANCHORED, NULL, 0)); |
| 491 } |
| 492 |
| 493 // Test that named groups work correctly. |
| 494 TEST(Capture, NamedGroups) { |
| 495 { |
| 496 RE2 re("(hello world)"); |
| 497 CHECK_EQ(re.NumberOfCapturingGroups(), 1); |
| 498 const map<string, int>& m = re.NamedCapturingGroups(); |
| 499 CHECK_EQ(m.size(), 0); |
| 500 } |
| 501 |
| 502 { |
| 503 RE2 re("(?P<A>expr(?P<B>expr)(?P<C>expr))((expr)(?P<D>expr))"); |
| 504 CHECK_EQ(re.NumberOfCapturingGroups(), 6); |
| 505 const map<string, int>& m = re.NamedCapturingGroups(); |
| 506 CHECK_EQ(m.size(), 4); |
| 507 CHECK_EQ(m.find("A")->second, 1); |
| 508 CHECK_EQ(m.find("B")->second, 2); |
| 509 CHECK_EQ(m.find("C")->second, 3); |
| 510 CHECK_EQ(m.find("D")->second, 6); // $4 and $5 are anonymous |
| 511 } |
| 512 } |
| 513 |
| 514 TEST(RE2, FullMatchWithNoArgs) { |
| 515 CHECK(RE2::FullMatch("h", "h")); |
| 516 CHECK(RE2::FullMatch("hello", "hello")); |
| 517 CHECK(RE2::FullMatch("hello", "h.*o")); |
| 518 CHECK(!RE2::FullMatch("othello", "h.*o")); // Must be anchored at front |
| 519 CHECK(!RE2::FullMatch("hello!", "h.*o")); // Must be anchored at end |
| 520 } |
| 521 |
| 522 TEST(RE2, PartialMatch) { |
| 523 CHECK(RE2::PartialMatch("x", "x")); |
| 524 CHECK(RE2::PartialMatch("hello", "h.*o")); |
| 525 CHECK(RE2::PartialMatch("othello", "h.*o")); |
| 526 CHECK(RE2::PartialMatch("hello!", "h.*o")); |
| 527 CHECK(RE2::PartialMatch("x", "((((((((((((((((((((x))))))))))))))))))))")); |
| 528 } |
| 529 |
| 530 TEST(RE2, PartialMatchN) { |
| 531 RE2::Arg argv[2]; |
| 532 const RE2::Arg* const args[2] = { &argv[0], &argv[1] }; |
| 533 |
| 534 // 0 arg |
| 535 EXPECT_TRUE(RE2::PartialMatchN("hello", "e.*o", args, 0)); |
| 536 EXPECT_FALSE(RE2::PartialMatchN("othello", "a.*o", args, 0)); |
| 537 |
| 538 // 1 arg |
| 539 int i; |
| 540 argv[0] = &i; |
| 541 EXPECT_TRUE(RE2::PartialMatchN("1001 nights", "(\\d+)", args, 1)); |
| 542 EXPECT_EQ(1001, i); |
| 543 EXPECT_FALSE(RE2::PartialMatchN("three", "(\\d+)", args, 1)); |
| 544 |
| 545 // Multi-arg |
| 546 string s; |
| 547 argv[1] = &s; |
| 548 EXPECT_TRUE(RE2::PartialMatchN("answer: 42:life", "(\\d+):(\\w+)", args, 2)); |
| 549 EXPECT_EQ(42, i); |
| 550 EXPECT_EQ("life", s); |
| 551 EXPECT_FALSE(RE2::PartialMatchN("hi1", "(\\w+)(1)", args, 2)); |
| 552 } |
| 553 |
| 554 TEST(RE2, FullMatchZeroArg) { |
| 555 // Zero-arg |
| 556 CHECK(RE2::FullMatch("1001", "\\d+")); |
| 557 } |
| 558 |
| 559 TEST(RE2, FullMatchOneArg) { |
| 560 int i; |
| 561 |
| 562 // Single-arg |
| 563 CHECK(RE2::FullMatch("1001", "(\\d+)", &i)); |
| 564 CHECK_EQ(i, 1001); |
| 565 CHECK(RE2::FullMatch("-123", "(-?\\d+)", &i)); |
| 566 CHECK_EQ(i, -123); |
| 567 CHECK(!RE2::FullMatch("10", "()\\d+", &i)); |
| 568 CHECK(!RE2::FullMatch("1234567890123456789012345678901234567890", |
| 569 "(\\d+)", &i)); |
| 570 } |
| 571 |
| 572 TEST(RE2, FullMatchIntegerArg) { |
| 573 int i; |
| 574 |
| 575 // Digits surrounding integer-arg |
| 576 CHECK(RE2::FullMatch("1234", "1(\\d*)4", &i)); |
| 577 CHECK_EQ(i, 23); |
| 578 CHECK(RE2::FullMatch("1234", "(\\d)\\d+", &i)); |
| 579 CHECK_EQ(i, 1); |
| 580 CHECK(RE2::FullMatch("-1234", "(-\\d)\\d+", &i)); |
| 581 CHECK_EQ(i, -1); |
| 582 CHECK(RE2::PartialMatch("1234", "(\\d)", &i)); |
| 583 CHECK_EQ(i, 1); |
| 584 CHECK(RE2::PartialMatch("-1234", "(-\\d)", &i)); |
| 585 CHECK_EQ(i, -1); |
| 586 } |
| 587 |
| 588 TEST(RE2, FullMatchStringArg) { |
| 589 string s; |
| 590 // String-arg |
| 591 CHECK(RE2::FullMatch("hello", "h(.*)o", &s)); |
| 592 CHECK_EQ(s, string("ell")); |
| 593 } |
| 594 |
| 595 TEST(RE2, FullMatchStringPieceArg) { |
| 596 int i; |
| 597 // StringPiece-arg |
| 598 StringPiece sp; |
| 599 CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &sp, &i)); |
| 600 CHECK_EQ(sp.size(), 4); |
| 601 CHECK(memcmp(sp.data(), "ruby", 4) == 0); |
| 602 CHECK_EQ(i, 1234); |
| 603 } |
| 604 |
| 605 TEST(RE2, FullMatchMultiArg) { |
| 606 int i; |
| 607 string s; |
| 608 // Multi-arg |
| 609 CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i)); |
| 610 CHECK_EQ(s, string("ruby")); |
| 611 CHECK_EQ(i, 1234); |
| 612 } |
| 613 |
| 614 TEST(RE2, FullMatchN) { |
| 615 RE2::Arg argv[2]; |
| 616 const RE2::Arg* const args[2] = { &argv[0], &argv[1] }; |
| 617 |
| 618 // 0 arg |
| 619 EXPECT_TRUE(RE2::FullMatchN("hello", "h.*o", args, 0)); |
| 620 EXPECT_FALSE(RE2::FullMatchN("othello", "h.*o", args, 0)); |
| 621 |
| 622 // 1 arg |
| 623 int i; |
| 624 argv[0] = &i; |
| 625 EXPECT_TRUE(RE2::FullMatchN("1001", "(\\d+)", args, 1)); |
| 626 EXPECT_EQ(1001, i); |
| 627 EXPECT_FALSE(RE2::FullMatchN("three", "(\\d+)", args, 1)); |
| 628 |
| 629 // Multi-arg |
| 630 string s; |
| 631 argv[1] = &s; |
| 632 EXPECT_TRUE(RE2::FullMatchN("42:life", "(\\d+):(\\w+)", args, 2)); |
| 633 EXPECT_EQ(42, i); |
| 634 EXPECT_EQ("life", s); |
| 635 EXPECT_FALSE(RE2::FullMatchN("hi1", "(\\w+)(1)", args, 2)); |
| 636 } |
| 637 |
| 638 TEST(RE2, FullMatchIgnoredArg) { |
| 639 int i; |
| 640 string s; |
| 641 // Ignored arg |
| 642 CHECK(RE2::FullMatch("ruby:1234", "(\\w+)(:)(\\d+)", &s, (void*)NULL, &i)); |
| 643 CHECK_EQ(s, string("ruby")); |
| 644 CHECK_EQ(i, 1234); |
| 645 } |
| 646 |
| 647 TEST(RE2, FullMatchTypedNullArg) { |
| 648 string s; |
| 649 |
| 650 // Ignore non-void* NULL arg |
| 651 CHECK(RE2::FullMatch("hello", "he(.*)lo", (char*)NULL)); |
| 652 CHECK(RE2::FullMatch("hello", "h(.*)o", (string*)NULL)); |
| 653 CHECK(RE2::FullMatch("hello", "h(.*)o", (StringPiece*)NULL)); |
| 654 CHECK(RE2::FullMatch("1234", "(.*)", (int*)NULL)); |
| 655 CHECK(RE2::FullMatch("1234567890123456", "(.*)", (long long*)NULL)); |
| 656 CHECK(RE2::FullMatch("123.4567890123456", "(.*)", (double*)NULL)); |
| 657 CHECK(RE2::FullMatch("123.4567890123456", "(.*)", (float*)NULL)); |
| 658 |
| 659 // Fail on non-void* NULL arg if the match doesn't parse for the given type. |
| 660 CHECK(!RE2::FullMatch("hello", "h(.*)lo", &s, (char*)NULL)); |
| 661 CHECK(!RE2::FullMatch("hello", "(.*)", (int*)NULL)); |
| 662 CHECK(!RE2::FullMatch("1234567890123456", "(.*)", (int*)NULL)); |
| 663 CHECK(!RE2::FullMatch("hello", "(.*)", (double*)NULL)); |
| 664 CHECK(!RE2::FullMatch("hello", "(.*)", (float*)NULL)); |
| 665 } |
| 666 |
| 667 #ifndef WIN32 |
| 668 // Check that numeric parsing code does not read past the end of |
| 669 // the number being parsed. |
| 670 TEST(RE2, NULTerminated) { |
| 671 char *v; |
| 672 int x; |
| 673 long pagesize = sysconf(_SC_PAGE_SIZE); |
| 674 |
| 675 #ifndef MAP_ANONYMOUS |
| 676 #define MAP_ANONYMOUS MAP_ANON |
| 677 #endif |
| 678 v = static_cast<char*>(mmap(NULL, 2*pagesize, PROT_READ|PROT_WRITE, |
| 679 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0)); |
| 680 CHECK(v != reinterpret_cast<char*>(-1)); |
| 681 LOG(INFO) << "Memory at " << (void*)v; |
| 682 CHECK_EQ(munmap(v + pagesize, pagesize), 0) << " error " << errno; |
| 683 v[pagesize - 1] = '1'; |
| 684 |
| 685 x = 0; |
| 686 CHECK(RE2::FullMatch(StringPiece(v + pagesize - 1, 1), "(.*)", &x)); |
| 687 CHECK_EQ(x, 1); |
| 688 } |
| 689 #endif |
| 690 |
| 691 TEST(RE2, FullMatchTypeTests) { |
| 692 // Type tests |
| 693 string zeros(100, '0'); |
| 694 { |
| 695 char c; |
| 696 CHECK(RE2::FullMatch("Hello", "(H)ello", &c)); |
| 697 CHECK_EQ(c, 'H'); |
| 698 } |
| 699 { |
| 700 unsigned char c; |
| 701 CHECK(RE2::FullMatch("Hello", "(H)ello", &c)); |
| 702 CHECK_EQ(c, static_cast<unsigned char>('H')); |
| 703 } |
| 704 { |
| 705 int16 v; |
| 706 CHECK(RE2::FullMatch("100", "(-?\\d+)", &v)); CHECK_EQ(v, 100); |
| 707 CHECK(RE2::FullMatch("-100", "(-?\\d+)", &v)); CHECK_EQ(v, -100); |
| 708 CHECK(RE2::FullMatch("32767", "(-?\\d+)", &v)); CHECK_EQ(v, 32767); |
| 709 CHECK(RE2::FullMatch("-32768", "(-?\\d+)", &v)); CHECK_EQ(v, -32768); |
| 710 CHECK(!RE2::FullMatch("-32769", "(-?\\d+)", &v)); |
| 711 CHECK(!RE2::FullMatch("32768", "(-?\\d+)", &v)); |
| 712 } |
| 713 { |
| 714 uint16 v; |
| 715 CHECK(RE2::FullMatch("100", "(\\d+)", &v)); CHECK_EQ(v, 100); |
| 716 CHECK(RE2::FullMatch("32767", "(\\d+)", &v)); CHECK_EQ(v, 32767); |
| 717 CHECK(RE2::FullMatch("65535", "(\\d+)", &v)); CHECK_EQ(v, 65535); |
| 718 CHECK(!RE2::FullMatch("65536", "(\\d+)", &v)); |
| 719 } |
| 720 { |
| 721 int32 v; |
| 722 static const int32 max = 0x7fffffff; |
| 723 static const int32 min = -max - 1; |
| 724 CHECK(RE2::FullMatch("100", "(-?\\d+)", &v)); CHECK_EQ(v, 100); |
| 725 CHECK(RE2::FullMatch("-100", "(-?\\d+)", &v)); CHECK_EQ(v, -100); |
| 726 CHECK(RE2::FullMatch("2147483647", "(-?\\d+)", &v)); CHECK_EQ(v, max); |
| 727 CHECK(RE2::FullMatch("-2147483648", "(-?\\d+)", &v)); CHECK_EQ(v, min); |
| 728 CHECK(!RE2::FullMatch("-2147483649", "(-?\\d+)", &v)); |
| 729 CHECK(!RE2::FullMatch("2147483648", "(-?\\d+)", &v)); |
| 730 |
| 731 CHECK(RE2::FullMatch(zeros + "2147483647", "(-?\\d+)", &v)); |
| 732 CHECK_EQ(v, max); |
| 733 CHECK(RE2::FullMatch("-" + zeros + "2147483648", "(-?\\d+)", &v)); |
| 734 CHECK_EQ(v, min); |
| 735 |
| 736 CHECK(!RE2::FullMatch("-" + zeros + "2147483649", "(-?\\d+)", &v)); |
| 737 CHECK(RE2::FullMatch("0x7fffffff", "(.*)", RE2::CRadix(&v))); |
| 738 CHECK_EQ(v, max); |
| 739 CHECK(!RE2::FullMatch("000x7fffffff", "(.*)", RE2::CRadix(&v))); |
| 740 } |
| 741 { |
| 742 uint32 v; |
| 743 static const uint32 max = 0xfffffffful; |
| 744 CHECK(RE2::FullMatch("100", "(\\d+)", &v)); CHECK_EQ(v, 100); |
| 745 CHECK(RE2::FullMatch("4294967295", "(\\d+)", &v)); CHECK_EQ(v, max); |
| 746 CHECK(!RE2::FullMatch("4294967296", "(\\d+)", &v)); |
| 747 CHECK(!RE2::FullMatch("-1", "(\\d+)", &v)); |
| 748 |
| 749 CHECK(RE2::FullMatch(zeros + "4294967295", "(\\d+)", &v)); CHECK_EQ(v, max); |
| 750 } |
| 751 { |
| 752 int64 v; |
| 753 static const int64 max = 0x7fffffffffffffffull; |
| 754 static const int64 min = -max - 1; |
| 755 char buf[32]; |
| 756 |
| 757 CHECK(RE2::FullMatch("100", "(-?\\d+)", &v)); CHECK_EQ(v, 100); |
| 758 CHECK(RE2::FullMatch("-100", "(-?\\d+)", &v)); CHECK_EQ(v, -100); |
| 759 |
| 760 snprintf(buf, sizeof(buf), "%lld", max); |
| 761 CHECK(RE2::FullMatch(buf, "(-?\\d+)", &v)); CHECK_EQ(v, max); |
| 762 |
| 763 snprintf(buf, sizeof(buf), "%lld", min); |
| 764 CHECK(RE2::FullMatch(buf, "(-?\\d+)", &v)); CHECK_EQ(v, min); |
| 765 |
| 766 snprintf(buf, sizeof(buf), "%lld", max); |
| 767 assert(buf[strlen(buf)-1] != '9'); |
| 768 buf[strlen(buf)-1]++; |
| 769 CHECK(!RE2::FullMatch(buf, "(-?\\d+)", &v)); |
| 770 |
| 771 snprintf(buf, sizeof(buf), "%lld", min); |
| 772 assert(buf[strlen(buf)-1] != '9'); |
| 773 buf[strlen(buf)-1]++; |
| 774 CHECK(!RE2::FullMatch(buf, "(-?\\d+)", &v)); |
| 775 } |
| 776 { |
| 777 uint64 v; |
| 778 int64 v2; |
| 779 static const uint64 max = 0xffffffffffffffffull; |
| 780 char buf[32]; |
| 781 |
| 782 CHECK(RE2::FullMatch("100", "(-?\\d+)", &v)); CHECK_EQ(v, 100); |
| 783 CHECK(RE2::FullMatch("-100", "(-?\\d+)", &v2)); CHECK_EQ(v2, -100); |
| 784 |
| 785 snprintf(buf, sizeof(buf), "%llu", max); |
| 786 CHECK(RE2::FullMatch(buf, "(-?\\d+)", &v)); CHECK_EQ(v, max); |
| 787 |
| 788 assert(buf[strlen(buf)-1] != '9'); |
| 789 buf[strlen(buf)-1]++; |
| 790 CHECK(!RE2::FullMatch(buf, "(-?\\d+)", &v)); |
| 791 } |
| 792 } |
| 793 |
| 794 TEST(RE2, FloatingPointFullMatchTypes) { |
| 795 string zeros(100, '0'); |
| 796 { |
| 797 float v; |
| 798 CHECK(RE2::FullMatch("100", "(.*)", &v)); CHECK_EQ(v, 100); |
| 799 CHECK(RE2::FullMatch("-100.", "(.*)", &v)); CHECK_EQ(v, -100); |
| 800 CHECK(RE2::FullMatch("1e23", "(.*)", &v)); CHECK_EQ(v, float(1e23)); |
| 801 |
| 802 CHECK(RE2::FullMatch(zeros + "1e23", "(.*)", &v)); |
| 803 CHECK_EQ(v, float(1e23)); |
| 804 |
| 805 // 6700000000081920.1 is an edge case. |
| 806 // 6700000000081920 is exactly halfway between |
| 807 // two float32s, so the .1 should make it round up. |
| 808 // However, the .1 is outside the precision possible with |
| 809 // a float64: the nearest float64 is 6700000000081920. |
| 810 // So if the code uses strtod and then converts to float32, |
| 811 // round-to-even will make it round down instead of up. |
| 812 // To pass the test, the parser must call strtof directly. |
| 813 // This test case is carefully chosen to use only a 17-digit |
| 814 // number, since C does not guarantee to get the correctly |
| 815 // rounded answer for strtod and strtof unless the input is |
| 816 // short. |
| 817 CHECK(RE2::FullMatch("0.1", "(.*)", &v)); |
| 818 CHECK_EQ(v, 0.1f) << StringPrintf("%.8g != %.8g", v, 0.1f); |
| 819 CHECK(RE2::FullMatch("6700000000081920.1", "(.*)", &v)); |
| 820 CHECK_EQ(v, 6700000000081920.1f) |
| 821 << StringPrintf("%.8g != %.8g", v, 6700000000081920.1f); |
| 822 } |
| 823 { |
| 824 double v; |
| 825 CHECK(RE2::FullMatch("100", "(.*)", &v)); CHECK_EQ(v, 100); |
| 826 CHECK(RE2::FullMatch("-100.", "(.*)", &v)); CHECK_EQ(v, -100); |
| 827 CHECK(RE2::FullMatch("1e23", "(.*)", &v)); CHECK_EQ(v, 1e23); |
| 828 CHECK(RE2::FullMatch(zeros + "1e23", "(.*)", &v)); |
| 829 CHECK_EQ(v, double(1e23)); |
| 830 |
| 831 CHECK(RE2::FullMatch("0.1", "(.*)", &v)); |
| 832 CHECK_EQ(v, 0.1) << StringPrintf("%.17g != %.17g", v, 0.1); |
| 833 CHECK(RE2::FullMatch("1.00000005960464485", "(.*)", &v)); |
| 834 CHECK_EQ(v, 1.0000000596046448) |
| 835 << StringPrintf("%.17g != %.17g", v, 1.0000000596046448); |
| 836 } |
| 837 } |
| 838 |
| 839 TEST(RE2, FullMatchAnchored) { |
| 840 int i; |
| 841 // Check that matching is fully anchored |
| 842 CHECK(!RE2::FullMatch("x1001", "(\\d+)", &i)); |
| 843 CHECK(!RE2::FullMatch("1001x", "(\\d+)", &i)); |
| 844 CHECK(RE2::FullMatch("x1001", "x(\\d+)", &i)); CHECK_EQ(i, 1001); |
| 845 CHECK(RE2::FullMatch("1001x", "(\\d+)x", &i)); CHECK_EQ(i, 1001); |
| 846 } |
| 847 |
| 848 TEST(RE2, FullMatchBraces) { |
| 849 // Braces |
| 850 CHECK(RE2::FullMatch("0abcd", "[0-9a-f+.-]{5,}")); |
| 851 CHECK(RE2::FullMatch("0abcde", "[0-9a-f+.-]{5,}")); |
| 852 CHECK(!RE2::FullMatch("0abc", "[0-9a-f+.-]{5,}")); |
| 853 } |
| 854 |
| 855 TEST(RE2, Complicated) { |
| 856 // Complicated RE2 |
| 857 CHECK(RE2::FullMatch("foo", "foo|bar|[A-Z]")); |
| 858 CHECK(RE2::FullMatch("bar", "foo|bar|[A-Z]")); |
| 859 CHECK(RE2::FullMatch("X", "foo|bar|[A-Z]")); |
| 860 CHECK(!RE2::FullMatch("XY", "foo|bar|[A-Z]")); |
| 861 } |
| 862 |
| 863 TEST(RE2, FullMatchEnd) { |
| 864 // Check full-match handling (needs '$' tacked on internally) |
| 865 CHECK(RE2::FullMatch("fo", "fo|foo")); |
| 866 CHECK(RE2::FullMatch("foo", "fo|foo")); |
| 867 CHECK(RE2::FullMatch("fo", "fo|foo$")); |
| 868 CHECK(RE2::FullMatch("foo", "fo|foo$")); |
| 869 CHECK(RE2::FullMatch("foo", "foo$")); |
| 870 CHECK(!RE2::FullMatch("foo$bar", "foo\\$")); |
| 871 CHECK(!RE2::FullMatch("fox", "fo|bar")); |
| 872 |
| 873 // Uncomment the following if we change the handling of '$' to |
| 874 // prevent it from matching a trailing newline |
| 875 if (false) { |
| 876 // Check that we don't get bitten by pcre's special handling of a |
| 877 // '\n' at the end of the string matching '$' |
| 878 CHECK(!RE2::PartialMatch("foo\n", "foo$")); |
| 879 } |
| 880 } |
| 881 |
| 882 TEST(RE2, FullMatchArgCount) { |
| 883 // Number of args |
| 884 int a[16]; |
| 885 CHECK(RE2::FullMatch("", "")); |
| 886 |
| 887 memset(a, 0, sizeof(0)); |
| 888 CHECK(RE2::FullMatch("1", |
| 889 "(\\d){1}", |
| 890 &a[0])); |
| 891 CHECK_EQ(a[0], 1); |
| 892 |
| 893 memset(a, 0, sizeof(0)); |
| 894 CHECK(RE2::FullMatch("12", |
| 895 "(\\d)(\\d)", |
| 896 &a[0], &a[1])); |
| 897 CHECK_EQ(a[0], 1); |
| 898 CHECK_EQ(a[1], 2); |
| 899 |
| 900 memset(a, 0, sizeof(0)); |
| 901 CHECK(RE2::FullMatch("123", |
| 902 "(\\d)(\\d)(\\d)", |
| 903 &a[0], &a[1], &a[2])); |
| 904 CHECK_EQ(a[0], 1); |
| 905 CHECK_EQ(a[1], 2); |
| 906 CHECK_EQ(a[2], 3); |
| 907 |
| 908 memset(a, 0, sizeof(0)); |
| 909 CHECK(RE2::FullMatch("1234", |
| 910 "(\\d)(\\d)(\\d)(\\d)", |
| 911 &a[0], &a[1], &a[2], &a[3])); |
| 912 CHECK_EQ(a[0], 1); |
| 913 CHECK_EQ(a[1], 2); |
| 914 CHECK_EQ(a[2], 3); |
| 915 CHECK_EQ(a[3], 4); |
| 916 |
| 917 memset(a, 0, sizeof(0)); |
| 918 CHECK(RE2::FullMatch("12345", |
| 919 "(\\d)(\\d)(\\d)(\\d)(\\d)", |
| 920 &a[0], &a[1], &a[2], &a[3], |
| 921 &a[4])); |
| 922 CHECK_EQ(a[0], 1); |
| 923 CHECK_EQ(a[1], 2); |
| 924 CHECK_EQ(a[2], 3); |
| 925 CHECK_EQ(a[3], 4); |
| 926 CHECK_EQ(a[4], 5); |
| 927 |
| 928 memset(a, 0, sizeof(0)); |
| 929 CHECK(RE2::FullMatch("123456", |
| 930 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)", |
| 931 &a[0], &a[1], &a[2], &a[3], |
| 932 &a[4], &a[5])); |
| 933 CHECK_EQ(a[0], 1); |
| 934 CHECK_EQ(a[1], 2); |
| 935 CHECK_EQ(a[2], 3); |
| 936 CHECK_EQ(a[3], 4); |
| 937 CHECK_EQ(a[4], 5); |
| 938 CHECK_EQ(a[5], 6); |
| 939 |
| 940 memset(a, 0, sizeof(0)); |
| 941 CHECK(RE2::FullMatch("1234567", |
| 942 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)", |
| 943 &a[0], &a[1], &a[2], &a[3], |
| 944 &a[4], &a[5], &a[6])); |
| 945 CHECK_EQ(a[0], 1); |
| 946 CHECK_EQ(a[1], 2); |
| 947 CHECK_EQ(a[2], 3); |
| 948 CHECK_EQ(a[3], 4); |
| 949 CHECK_EQ(a[4], 5); |
| 950 CHECK_EQ(a[5], 6); |
| 951 CHECK_EQ(a[6], 7); |
| 952 |
| 953 memset(a, 0, sizeof(0)); |
| 954 CHECK(RE2::FullMatch("1234567890123456", |
| 955 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)" |
| 956 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)", |
| 957 &a[0], &a[1], &a[2], &a[3], |
| 958 &a[4], &a[5], &a[6], &a[7], |
| 959 &a[8], &a[9], &a[10], &a[11], |
| 960 &a[12], &a[13], &a[14], &a[15])); |
| 961 CHECK_EQ(a[0], 1); |
| 962 CHECK_EQ(a[1], 2); |
| 963 CHECK_EQ(a[2], 3); |
| 964 CHECK_EQ(a[3], 4); |
| 965 CHECK_EQ(a[4], 5); |
| 966 CHECK_EQ(a[5], 6); |
| 967 CHECK_EQ(a[6], 7); |
| 968 CHECK_EQ(a[7], 8); |
| 969 CHECK_EQ(a[8], 9); |
| 970 CHECK_EQ(a[9], 0); |
| 971 CHECK_EQ(a[10], 1); |
| 972 CHECK_EQ(a[11], 2); |
| 973 CHECK_EQ(a[12], 3); |
| 974 CHECK_EQ(a[13], 4); |
| 975 CHECK_EQ(a[14], 5); |
| 976 CHECK_EQ(a[15], 6); |
| 977 } |
| 978 |
| 979 TEST(RE2, Accessors) { |
| 980 // Check the pattern() accessor |
| 981 { |
| 982 const string kPattern = "http://([^/]+)/.*"; |
| 983 const RE2 re(kPattern); |
| 984 CHECK_EQ(kPattern, re.pattern()); |
| 985 } |
| 986 |
| 987 // Check RE2 error field. |
| 988 { |
| 989 RE2 re("foo"); |
| 990 CHECK(re.error().empty()); // Must have no error |
| 991 CHECK(re.ok()); |
| 992 CHECK(re.error_code() == RE2::NoError); |
| 993 } |
| 994 } |
| 995 |
| 996 TEST(RE2, UTF8) { |
| 997 // Check UTF-8 handling |
| 998 // Three Japanese characters (nihongo) |
| 999 const char utf8_string[] = { |
| 1000 0xe6, 0x97, 0xa5, // 65e5 |
| 1001 0xe6, 0x9c, 0xac, // 627c |
| 1002 0xe8, 0xaa, 0x9e, // 8a9e |
| 1003 0 |
| 1004 }; |
| 1005 const char utf8_pattern[] = { |
| 1006 '.', |
| 1007 0xe6, 0x9c, 0xac, // 627c |
| 1008 '.', |
| 1009 0 |
| 1010 }; |
| 1011 |
| 1012 // Both should match in either mode, bytes or UTF-8 |
| 1013 RE2 re_test1(".........", RE2::Latin1); |
| 1014 CHECK(RE2::FullMatch(utf8_string, re_test1)); |
| 1015 RE2 re_test2("..."); |
| 1016 CHECK(RE2::FullMatch(utf8_string, re_test2)); |
| 1017 |
| 1018 // Check that '.' matches one byte or UTF-8 character |
| 1019 // according to the mode. |
| 1020 string s; |
| 1021 RE2 re_test3("(.)", RE2::Latin1); |
| 1022 CHECK(RE2::PartialMatch(utf8_string, re_test3, &s)); |
| 1023 CHECK_EQ(s, string("\xe6")); |
| 1024 RE2 re_test4("(.)"); |
| 1025 CHECK(RE2::PartialMatch(utf8_string, re_test4, &s)); |
| 1026 CHECK_EQ(s, string("\xe6\x97\xa5")); |
| 1027 |
| 1028 // Check that string matches itself in either mode |
| 1029 RE2 re_test5(utf8_string, RE2::Latin1); |
| 1030 CHECK(RE2::FullMatch(utf8_string, re_test5)); |
| 1031 RE2 re_test6(utf8_string); |
| 1032 CHECK(RE2::FullMatch(utf8_string, re_test6)); |
| 1033 |
| 1034 // Check that pattern matches string only in UTF8 mode |
| 1035 RE2 re_test7(utf8_pattern, RE2::Latin1); |
| 1036 CHECK(!RE2::FullMatch(utf8_string, re_test7)); |
| 1037 RE2 re_test8(utf8_pattern); |
| 1038 CHECK(RE2::FullMatch(utf8_string, re_test8)); |
| 1039 } |
| 1040 |
| 1041 TEST(RE2, UngreedyUTF8) { |
| 1042 // Check that ungreedy, UTF8 regular expressions don't match when they |
| 1043 // oughtn't -- see bug 82246. |
| 1044 { |
| 1045 // This code always worked. |
| 1046 const char* pattern = "\\w+X"; |
| 1047 const string target = "a aX"; |
| 1048 RE2 match_sentence(pattern, RE2::Latin1); |
| 1049 RE2 match_sentence_re(pattern); |
| 1050 |
| 1051 CHECK(!RE2::FullMatch(target, match_sentence)); |
| 1052 CHECK(!RE2::FullMatch(target, match_sentence_re)); |
| 1053 } |
| 1054 { |
| 1055 const char* pattern = "(?U)\\w+X"; |
| 1056 const string target = "a aX"; |
| 1057 RE2 match_sentence(pattern, RE2::Latin1); |
| 1058 CHECK_EQ(match_sentence.error(), ""); |
| 1059 RE2 match_sentence_re(pattern); |
| 1060 |
| 1061 CHECK(!RE2::FullMatch(target, match_sentence)); |
| 1062 CHECK(!RE2::FullMatch(target, match_sentence_re)); |
| 1063 } |
| 1064 } |
| 1065 |
| 1066 TEST(RE2, Rejects) { |
| 1067 { RE2 re("a\\1", RE2::Quiet); CHECK(!re.ok()); } |
| 1068 { |
| 1069 RE2 re("a[x", RE2::Quiet); |
| 1070 CHECK(!re.ok()); |
| 1071 } |
| 1072 { |
| 1073 RE2 re("a[z-a]", RE2::Quiet); |
| 1074 CHECK(!re.ok()); |
| 1075 } |
| 1076 { |
| 1077 RE2 re("a[[:foobar:]]", RE2::Quiet); |
| 1078 CHECK(!re.ok()); |
| 1079 } |
| 1080 { |
| 1081 RE2 re("a(b", RE2::Quiet); |
| 1082 CHECK(!re.ok()); |
| 1083 } |
| 1084 { |
| 1085 RE2 re("a\\", RE2::Quiet); |
| 1086 CHECK(!re.ok()); |
| 1087 } |
| 1088 } |
| 1089 |
| 1090 TEST(RE2, NoCrash) { |
| 1091 // Test that using a bad regexp doesn't crash. |
| 1092 { |
| 1093 RE2 re("a\\", RE2::Quiet); |
| 1094 CHECK(!re.ok()); |
| 1095 CHECK(!RE2::PartialMatch("a\\b", re)); |
| 1096 } |
| 1097 |
| 1098 // Test that using an enormous regexp doesn't crash |
| 1099 { |
| 1100 RE2 re("(((.{100}){100}){100}){100}", RE2::Quiet); |
| 1101 CHECK(!re.ok()); |
| 1102 CHECK(!RE2::PartialMatch("aaa", re)); |
| 1103 } |
| 1104 |
| 1105 // Test that a crazy regexp still compiles and runs. |
| 1106 { |
| 1107 RE2 re(".{512}x", RE2::Quiet); |
| 1108 CHECK(re.ok()); |
| 1109 string s; |
| 1110 s.append(515, 'c'); |
| 1111 s.append("x"); |
| 1112 CHECK(RE2::PartialMatch(s, re)); |
| 1113 } |
| 1114 } |
| 1115 |
| 1116 TEST(RE2, Recursion) { |
| 1117 // Test that recursion is stopped. |
| 1118 // This test is PCRE-legacy -- there's no recursion in RE2. |
| 1119 int bytes = 15 * 1024; // enough to crash PCRE |
| 1120 TestRecursion(bytes, "."); |
| 1121 TestRecursion(bytes, "a"); |
| 1122 TestRecursion(bytes, "a."); |
| 1123 TestRecursion(bytes, "ab."); |
| 1124 TestRecursion(bytes, "abc."); |
| 1125 } |
| 1126 |
| 1127 TEST(RE2, BigCountedRepetition) { |
| 1128 // Test that counted repetition works, given tons of memory. |
| 1129 RE2::Options opt; |
| 1130 opt.set_max_mem(256<<20); |
| 1131 |
| 1132 RE2 re(".{512}x", opt); |
| 1133 CHECK(re.ok()); |
| 1134 string s; |
| 1135 s.append(515, 'c'); |
| 1136 s.append("x"); |
| 1137 CHECK(RE2::PartialMatch(s, re)); |
| 1138 } |
| 1139 |
| 1140 TEST(RE2, DeepRecursion) { |
| 1141 // Test for deep stack recursion. This would fail with a |
| 1142 // segmentation violation due to stack overflow before pcre was |
| 1143 // patched. |
| 1144 // Again, a PCRE legacy test. RE2 doesn't recurse. |
| 1145 string comment("x*"); |
| 1146 string a(131072, 'a'); |
| 1147 comment += a; |
| 1148 comment += "*x"; |
| 1149 RE2 re("((?:\\s|xx.*\n|x[*](?:\n|.)*?[*]x)*)"); |
| 1150 CHECK(RE2::FullMatch(comment, re)); |
| 1151 } |
| 1152 |
| 1153 // Suggested by Josh Hyman. Failed when SearchOnePass was |
| 1154 // not implementing case-folding. |
| 1155 TEST(CaseInsensitive, MatchAndConsume) { |
| 1156 string result; |
| 1157 string text = "A fish named *Wanda*"; |
| 1158 StringPiece sp(text); |
| 1159 |
| 1160 EXPECT_TRUE(RE2::PartialMatch(sp, "(?i)([wand]{5})", &result)); |
| 1161 EXPECT_TRUE(RE2::FindAndConsume(&sp, "(?i)([wand]{5})", &result)); |
| 1162 } |
| 1163 |
| 1164 // RE2 should permit implicit conversions from string, StringPiece, const char*, |
| 1165 // and C string literals. |
| 1166 TEST(RE2, ImplicitConversions) { |
| 1167 string re_string("."); |
| 1168 StringPiece re_stringpiece("."); |
| 1169 const char* re_cstring = "."; |
| 1170 EXPECT_TRUE(RE2::PartialMatch("e", re_string)); |
| 1171 EXPECT_TRUE(RE2::PartialMatch("e", re_stringpiece)); |
| 1172 EXPECT_TRUE(RE2::PartialMatch("e", re_cstring)); |
| 1173 EXPECT_TRUE(RE2::PartialMatch("e", ".")); |
| 1174 } |
| 1175 |
| 1176 // Bugs introduced by 8622304 |
| 1177 TEST(RE2, CL8622304) { |
| 1178 // reported by ingow |
| 1179 string dir; |
| 1180 EXPECT_TRUE(RE2::FullMatch("D", "([^\\\\])")); // ok |
| 1181 EXPECT_TRUE(RE2::FullMatch("D", "([^\\\\])", &dir)); // fails |
| 1182 |
| 1183 // reported by jacobsa |
| 1184 string key, val; |
| 1185 EXPECT_TRUE(RE2::PartialMatch("bar:1,0x2F,030,4,5;baz:true;fooby:false,true", |
| 1186 "(\\w+)(?::((?:[^;\\\\]|\\\\.)*))?;?", |
| 1187 &key, |
| 1188 &val)); |
| 1189 EXPECT_EQ(key, "bar"); |
| 1190 EXPECT_EQ(val, "1,0x2F,030,4,5"); |
| 1191 } |
| 1192 |
| 1193 |
| 1194 // Check that RE2 returns correct regexp pieces on error. |
| 1195 // In particular, make sure it returns whole runes |
| 1196 // and that it always reports invalid UTF-8. |
| 1197 // Also check that Perl error flag piece is big enough. |
| 1198 static struct ErrorTest { |
| 1199 const char *regexp; |
| 1200 const char *error; |
| 1201 } error_tests[] = { |
| 1202 { "ab\\αcd", "\\α" }, |
| 1203 { "ef\\x☺01", "\\x☺0" }, |
| 1204 { "gh\\x1☺01", "\\x1☺" }, |
| 1205 { "ij\\x1", "\\x1" }, |
| 1206 { "kl\\x", "\\x" }, |
| 1207 { "uv\\x{0000☺}", "\\x{0000☺" }, |
| 1208 { "wx\\p{ABC", "\\p{ABC" }, |
| 1209 { "yz(?smiUX:abc)", "(?smiUX" }, // used to return (?s but the error is X |
| 1210 { "aa(?sm☺i", "(?sm☺" }, |
| 1211 { "bb[abc", "[abc" }, |
| 1212 |
| 1213 { "mn\\x1\377", "" }, // no argument string returned for invalid UTF-8 |
| 1214 { "op\377qr", "" }, |
| 1215 { "st\\x{00000\377", "" }, |
| 1216 { "zz\\p{\377}", "" }, |
| 1217 { "zz\\x{00\377}", "" }, |
| 1218 { "zz(?P<name\377>abc)", "" }, |
| 1219 }; |
| 1220 TEST(RE2, ErrorArgs) { |
| 1221 for (int i = 0; i < arraysize(error_tests); i++) { |
| 1222 RE2 re(error_tests[i].regexp, RE2::Quiet); |
| 1223 EXPECT_FALSE(re.ok()); |
| 1224 EXPECT_EQ(re.error_arg(), error_tests[i].error) << re.error(); |
| 1225 } |
| 1226 } |
| 1227 |
| 1228 // Check that "never match \n" mode never matches \n. |
| 1229 static struct NeverTest { |
| 1230 const char* regexp; |
| 1231 const char* text; |
| 1232 const char* match; |
| 1233 } never_tests[] = { |
| 1234 { "(.*)", "abc\ndef\nghi\n", "abc" }, |
| 1235 { "(?s)(abc.*def)", "abc\ndef\n", NULL }, |
| 1236 { "(abc(.|\n)*def)", "abc\ndef\n", NULL }, |
| 1237 { "(abc[^x]*def)", "abc\ndef\n", NULL }, |
| 1238 { "(abc[^x]*def)", "abczzzdef\ndef\n", "abczzzdef" }, |
| 1239 }; |
| 1240 TEST(RE2, NeverNewline) { |
| 1241 RE2::Options opt; |
| 1242 opt.set_never_nl(true); |
| 1243 for (int i = 0; i < arraysize(never_tests); i++) { |
| 1244 const NeverTest& t = never_tests[i]; |
| 1245 RE2 re(t.regexp, opt); |
| 1246 if (t.match == NULL) { |
| 1247 EXPECT_FALSE(re.PartialMatch(t.text, re)); |
| 1248 } else { |
| 1249 StringPiece m; |
| 1250 EXPECT_TRUE(re.PartialMatch(t.text, re, &m)); |
| 1251 EXPECT_EQ(m, t.match); |
| 1252 } |
| 1253 } |
| 1254 } |
| 1255 |
| 1256 // Bitstate bug was looking at submatch[0] even if nsubmatch == 0. |
| 1257 // Triggered by a failed DFA search falling back to Bitstate when |
| 1258 // using Match with a NULL submatch set. Bitstate tried to read |
| 1259 // the submatch[0] entry even if nsubmatch was 0. |
| 1260 TEST(RE2, BitstateCaptureBug) { |
| 1261 RE2::Options opt; |
| 1262 opt.set_max_mem(20000); |
| 1263 RE2 re("(_________$)", opt); |
| 1264 StringPiece s = "xxxxxxxxxxxxxxxxxxxxxxxxxx_________x"; |
| 1265 EXPECT_FALSE(re.Match(s, 0, s.size(), RE2::UNANCHORED, NULL, 0)); |
| 1266 } |
| 1267 |
| 1268 // C++ version of bug 609710. |
| 1269 TEST(RE2, UnicodeClasses) { |
| 1270 const string str = "ABCDEFGHI譚永鋒"; |
| 1271 string a, b, c; |
| 1272 |
| 1273 EXPECT_TRUE(RE2::FullMatch("A", "\\p{L}")); |
| 1274 EXPECT_TRUE(RE2::FullMatch("A", "\\p{Lu}")); |
| 1275 EXPECT_FALSE(RE2::FullMatch("A", "\\p{Ll}")); |
| 1276 EXPECT_FALSE(RE2::FullMatch("A", "\\P{L}")); |
| 1277 EXPECT_FALSE(RE2::FullMatch("A", "\\P{Lu}")); |
| 1278 EXPECT_TRUE(RE2::FullMatch("A", "\\P{Ll}")); |
| 1279 |
| 1280 EXPECT_TRUE(RE2::FullMatch("譚", "\\p{L}")); |
| 1281 EXPECT_FALSE(RE2::FullMatch("譚", "\\p{Lu}")); |
| 1282 EXPECT_FALSE(RE2::FullMatch("譚", "\\p{Ll}")); |
| 1283 EXPECT_FALSE(RE2::FullMatch("譚", "\\P{L}")); |
| 1284 EXPECT_TRUE(RE2::FullMatch("譚", "\\P{Lu}")); |
| 1285 EXPECT_TRUE(RE2::FullMatch("譚", "\\P{Ll}")); |
| 1286 |
| 1287 EXPECT_TRUE(RE2::FullMatch("永", "\\p{L}")); |
| 1288 EXPECT_FALSE(RE2::FullMatch("永", "\\p{Lu}")); |
| 1289 EXPECT_FALSE(RE2::FullMatch("永", "\\p{Ll}")); |
| 1290 EXPECT_FALSE(RE2::FullMatch("永", "\\P{L}")); |
| 1291 EXPECT_TRUE(RE2::FullMatch("永", "\\P{Lu}")); |
| 1292 EXPECT_TRUE(RE2::FullMatch("永", "\\P{Ll}")); |
| 1293 |
| 1294 EXPECT_TRUE(RE2::FullMatch("鋒", "\\p{L}")); |
| 1295 EXPECT_FALSE(RE2::FullMatch("鋒", "\\p{Lu}")); |
| 1296 EXPECT_FALSE(RE2::FullMatch("鋒", "\\p{Ll}")); |
| 1297 EXPECT_FALSE(RE2::FullMatch("鋒", "\\P{L}")); |
| 1298 EXPECT_TRUE(RE2::FullMatch("鋒", "\\P{Lu}")); |
| 1299 EXPECT_TRUE(RE2::FullMatch("鋒", "\\P{Ll}")); |
| 1300 |
| 1301 EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?(.).*?(.)", &a, &b, &c)); |
| 1302 EXPECT_EQ("A", a); |
| 1303 EXPECT_EQ("B", b); |
| 1304 EXPECT_EQ("C", c); |
| 1305 |
| 1306 EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?([\\p{L}]).*?(.)", &a, &b, &c)); |
| 1307 EXPECT_EQ("A", a); |
| 1308 EXPECT_EQ("B", b); |
| 1309 EXPECT_EQ("C", c); |
| 1310 |
| 1311 EXPECT_FALSE(RE2::PartialMatch(str, "\\P{L}")); |
| 1312 |
| 1313 EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?([\\p{Lu}]).*?(.)", &a, &b, &c)); |
| 1314 EXPECT_EQ("A", a); |
| 1315 EXPECT_EQ("B", b); |
| 1316 EXPECT_EQ("C", c); |
| 1317 |
| 1318 EXPECT_FALSE(RE2::PartialMatch(str, "[^\\p{Lu}\\p{Lo}]")); |
| 1319 |
| 1320 EXPECT_TRUE(RE2::PartialMatch(str, ".*(.).*?([\\p{Lu}\\p{Lo}]).*?(.)", &a, &b,
&c)); |
| 1321 EXPECT_EQ("譚", a); |
| 1322 EXPECT_EQ("永", b); |
| 1323 EXPECT_EQ("鋒", c); |
| 1324 } |
| 1325 |
| 1326 // Bug reported by saito. 2009/02/17 |
| 1327 TEST(RE2, NullVsEmptyString) { |
| 1328 RE2 re2(".*"); |
| 1329 StringPiece v1(""); |
| 1330 EXPECT_TRUE(RE2::FullMatch(v1, re2)); |
| 1331 |
| 1332 StringPiece v2; |
| 1333 EXPECT_TRUE(RE2::FullMatch(v2, re2)); |
| 1334 } |
| 1335 |
| 1336 // Issue 1816809 |
| 1337 TEST(RE2, Bug1816809) { |
| 1338 RE2 re("(((((llx((-3)|(4)))(;(llx((-3)|(4))))*))))"); |
| 1339 StringPiece piece("llx-3;llx4"); |
| 1340 string x; |
| 1341 EXPECT_TRUE(RE2::Consume(&piece, re, &x)); |
| 1342 } |
| 1343 |
| 1344 // Issue 3061120 |
| 1345 TEST(RE2, Bug3061120) { |
| 1346 RE2 re("(?i)\\W"); |
| 1347 EXPECT_FALSE(RE2::PartialMatch("x", re)); // always worked |
| 1348 EXPECT_FALSE(RE2::PartialMatch("k", re)); // broke because of kelvin |
| 1349 EXPECT_FALSE(RE2::PartialMatch("s", re)); // broke because of latin long s |
| 1350 } |
| 1351 |
| 1352 TEST(RE2, CapturingGroupNames) { |
| 1353 // Opening parentheses annotated with group IDs: |
| 1354 // 12 3 45 6 7 |
| 1355 RE2 re("((abc)(?P<G2>)|((e+)(?P<G2>.*)(?P<G1>u+)))"); |
| 1356 EXPECT_TRUE(re.ok()); |
| 1357 const map<int, string>& have = re.CapturingGroupNames(); |
| 1358 map<int, string> want; |
| 1359 want[3] = "G2"; |
| 1360 want[6] = "G2"; |
| 1361 want[7] = "G1"; |
| 1362 EXPECT_EQ(want, have); |
| 1363 } |
| 1364 |
| 1365 TEST(RE2, RegexpToStringLossOfAnchor) { |
| 1366 EXPECT_EQ(RE2("^[a-c]at", RE2::POSIX).Regexp()->ToString(), "^[a-c]at"); |
| 1367 EXPECT_EQ(RE2("^[a-c]at").Regexp()->ToString(), "(?-m:^)[a-c]at"); |
| 1368 EXPECT_EQ(RE2("ca[t-z]$", RE2::POSIX).Regexp()->ToString(), "ca[t-z]$"); |
| 1369 EXPECT_EQ(RE2("ca[t-z]$").Regexp()->ToString(), "ca[t-z](?-m:$)"); |
| 1370 } |
| 1371 |
| 1372 } // namespace re2 |
OLD | NEW |