Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(286)

Side by Side Diff: third_party/re2/re2/testing/re2_test.cc

Issue 1544433002: Replace RE2 import with a dependency (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Re-Added LICENSE and OWNERS file Created 4 years, 12 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // -*- coding: utf-8 -*-
2 // Copyright 2002-2009 The RE2 Authors. All Rights Reserved.
3 // Use of this source code is governed by a BSD-style
4 // license that can be found in the LICENSE file.
5
6 // TODO: Test extractions for PartialMatch/Consume
7
8 #include <errno.h>
9 #ifndef _MSC_VER
10 #include <unistd.h> /* for sysconf */
11 #include <sys/mman.h>
12 #endif
13 #include <sys/stat.h>
14 #include <sys/types.h>
15 #include <vector>
16 #include "util/test.h"
17 #include "re2/re2.h"
18 #include "re2/regexp.h"
19
20 DECLARE_bool(logtostderr);
21
22 namespace re2 {
23
24 TEST(RE2, HexTests) {
25
26 VLOG(1) << "hex tests";
27
28 #define CHECK_HEX(type, value) \
29 do { \
30 type v; \
31 CHECK(RE2::FullMatch(#value, "([0-9a-fA-F]+)[uUlL]*", RE2::Hex(&v))); \
32 CHECK_EQ(v, 0x ## value); \
33 CHECK(RE2::FullMatch("0x" #value, "([0-9a-fA-FxX]+)[uUlL]*", RE2::CRadix(&v) )); \
34 CHECK_EQ(v, 0x ## value); \
35 } while(0)
36
37 CHECK_HEX(short, 2bad);
38 CHECK_HEX(unsigned short, 2badU);
39 CHECK_HEX(int, dead);
40 CHECK_HEX(unsigned int, deadU);
41 CHECK_HEX(long, 7eadbeefL);
42 CHECK_HEX(unsigned long, deadbeefUL);
43 CHECK_HEX(long long, 12345678deadbeefLL);
44 CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
45
46 #undef CHECK_HEX
47 }
48
49 TEST(RE2, OctalTests) {
50 VLOG(1) << "octal tests";
51
52 #define CHECK_OCTAL(type, value) \
53 do { \
54 type v; \
55 CHECK(RE2::FullMatch(#value, "([0-7]+)[uUlL]*", RE2::Octal(&v))); \
56 CHECK_EQ(v, 0 ## value); \
57 CHECK(RE2::FullMatch("0" #value, "([0-9a-fA-FxX]+)[uUlL]*", RE2::CRadix(&v)) ); \
58 CHECK_EQ(v, 0 ## value); \
59 } while(0)
60
61 CHECK_OCTAL(short, 77777);
62 CHECK_OCTAL(unsigned short, 177777U);
63 CHECK_OCTAL(int, 17777777777);
64 CHECK_OCTAL(unsigned int, 37777777777U);
65 CHECK_OCTAL(long, 17777777777L);
66 CHECK_OCTAL(unsigned long, 37777777777UL);
67 CHECK_OCTAL(long long, 777777777777777777777LL);
68 CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
69
70 #undef CHECK_OCTAL
71 }
72
73 TEST(RE2, DecimalTests) {
74 VLOG(1) << "decimal tests";
75
76 #define CHECK_DECIMAL(type, value) \
77 do { \
78 type v; \
79 CHECK(RE2::FullMatch(#value, "(-?[0-9]+)[uUlL]*", &v)); \
80 CHECK_EQ(v, value); \
81 CHECK(RE2::FullMatch(#value, "(-?[0-9a-fA-FxX]+)[uUlL]*", RE2::CRadix(&v))); \
82 CHECK_EQ(v, value); \
83 } while(0)
84
85 CHECK_DECIMAL(short, -1);
86 CHECK_DECIMAL(unsigned short, 9999);
87 CHECK_DECIMAL(int, -1000);
88 CHECK_DECIMAL(unsigned int, 12345U);
89 CHECK_DECIMAL(long, -10000000L);
90 CHECK_DECIMAL(unsigned long, 3083324652U);
91 CHECK_DECIMAL(long long, -100000000000000LL);
92 CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
93
94 #undef CHECK_DECIMAL
95 }
96
97 TEST(RE2, Replace) {
98 VLOG(1) << "TestReplace";
99
100 struct ReplaceTest {
101 const char *regexp;
102 const char *rewrite;
103 const char *original;
104 const char *single;
105 const char *global;
106 int greplace_count;
107 };
108 static const ReplaceTest tests[] = {
109 { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
110 "\\2\\1ay",
111 "the quick brown fox jumps over the lazy dogs.",
112 "ethay quick brown fox jumps over the lazy dogs.",
113 "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
114 9 },
115 { "\\w+",
116 "\\0-NOSPAM",
117 "abcd.efghi@google.com",
118 "abcd-NOSPAM.efghi@google.com",
119 "abcd-NOSPAM.efghi-NOSPAM@google-NOSPAM.com-NOSPAM",
120 4 },
121 { "^",
122 "(START)",
123 "foo",
124 "(START)foo",
125 "(START)foo",
126 1 },
127 { "^",
128 "(START)",
129 "",
130 "(START)",
131 "(START)",
132 1 },
133 { "$",
134 "(END)",
135 "",
136 "(END)",
137 "(END)",
138 1 },
139 { "b",
140 "bb",
141 "ababababab",
142 "abbabababab",
143 "abbabbabbabbabb",
144 5 },
145 { "b",
146 "bb",
147 "bbbbbb",
148 "bbbbbbb",
149 "bbbbbbbbbbbb",
150 6 },
151 { "b+",
152 "bb",
153 "bbbbbb",
154 "bb",
155 "bb",
156 1 },
157 { "b*",
158 "bb",
159 "bbbbbb",
160 "bb",
161 "bb",
162 1 },
163 { "b*",
164 "bb",
165 "aaaaa",
166 "bbaaaaa",
167 "bbabbabbabbabbabb",
168 6 },
169 // Check newline handling
170 { "a.*a",
171 "(\\0)",
172 "aba\naba",
173 "(aba)\naba",
174 "(aba)\n(aba)",
175 2 },
176 { "", NULL, NULL, NULL, NULL, 0 }
177 };
178
179 for (const ReplaceTest* t = tests; t->original != NULL; t++) {
180 VLOG(1) << StringPrintf("\"%s\" =~ s/%s/%s/g", t->original, t->regexp, t->re write);
181 string one(t->original);
182 CHECK(RE2::Replace(&one, t->regexp, t->rewrite));
183 CHECK_EQ(one, t->single);
184 string all(t->original);
185 CHECK_EQ(RE2::GlobalReplace(&all, t->regexp, t->rewrite), t->greplace_count)
186 << "Got: " << all;
187 CHECK_EQ(all, t->global);
188 }
189 }
190
191 static void TestCheckRewriteString(const char* regexp, const char* rewrite,
192 bool expect_ok) {
193 string error;
194 RE2 exp(regexp);
195 bool actual_ok = exp.CheckRewriteString(rewrite, &error);
196 EXPECT_EQ(expect_ok, actual_ok) << " for " << rewrite << " error: " << error;
197 }
198
199 TEST(CheckRewriteString, all) {
200 TestCheckRewriteString("abc", "foo", true);
201 TestCheckRewriteString("abc", "foo\\", false);
202 TestCheckRewriteString("abc", "foo\\0bar", true);
203
204 TestCheckRewriteString("a(b)c", "foo", true);
205 TestCheckRewriteString("a(b)c", "foo\\0bar", true);
206 TestCheckRewriteString("a(b)c", "foo\\1bar", true);
207 TestCheckRewriteString("a(b)c", "foo\\2bar", false);
208 TestCheckRewriteString("a(b)c", "f\\\\2o\\1o", true);
209
210 TestCheckRewriteString("a(b)(c)", "foo\\12", true);
211 TestCheckRewriteString("a(b)(c)", "f\\2o\\1o", true);
212 TestCheckRewriteString("a(b)(c)", "f\\oo\\1", false);
213 }
214
215 TEST(RE2, Extract) {
216 VLOG(1) << "TestExtract";
217
218 string s;
219
220 CHECK(RE2::Extract("boris@kremvax.ru", "(.*)@([^.]*)", "\\2!\\1", &s));
221 CHECK_EQ(s, "kremvax!boris");
222
223 CHECK(RE2::Extract("foo", ".*", "'\\0'", &s));
224 CHECK_EQ(s, "'foo'");
225 // check that false match doesn't overwrite
226 CHECK(!RE2::Extract("baz", "bar", "'\\0'", &s));
227 CHECK_EQ(s, "'foo'");
228 }
229
230 TEST(RE2, Consume) {
231 VLOG(1) << "TestConsume";
232
233 RE2 r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
234 string word;
235
236 string s(" aaa b!@#$@#$cccc");
237 StringPiece input(s);
238
239 CHECK(RE2::Consume(&input, r, &word));
240 CHECK_EQ(word, "aaa") << " input: " << input;
241 CHECK(RE2::Consume(&input, r, &word));
242 CHECK_EQ(word, "b") << " input: " << input;
243 CHECK(! RE2::Consume(&input, r, &word)) << " input: " << input;
244 }
245
246 TEST(RE2, ConsumeN) {
247 const string s(" one two three 4");
248 StringPiece input(s);
249
250 RE2::Arg argv[2];
251 const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
252
253 // 0 arg
254 EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)", args, 0)); // Skips "one".
255
256 // 1 arg
257 string word;
258 argv[0] = &word;
259 EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)", args, 1));
260 EXPECT_EQ("two", word);
261
262 // Multi-args
263 int n;
264 argv[1] = &n;
265 EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)\\s*(\\d+)", args, 2));
266 EXPECT_EQ("three", word);
267 EXPECT_EQ(4, n);
268 }
269
270 TEST(RE2, FindAndConsume) {
271 VLOG(1) << "TestFindAndConsume";
272
273 RE2 r("(\\w+)"); // matches a word
274 string word;
275
276 string s(" aaa b!@#$@#$cccc");
277 StringPiece input(s);
278
279 CHECK(RE2::FindAndConsume(&input, r, &word));
280 CHECK_EQ(word, "aaa");
281 CHECK(RE2::FindAndConsume(&input, r, &word));
282 CHECK_EQ(word, "b");
283 CHECK(RE2::FindAndConsume(&input, r, &word));
284 CHECK_EQ(word, "cccc");
285 CHECK(! RE2::FindAndConsume(&input, r, &word));
286
287 // Check that FindAndConsume works without any submatches.
288 // Earlier version used uninitialized data for
289 // length to consume.
290 input = "aaa";
291 CHECK(RE2::FindAndConsume(&input, "aaa"));
292 CHECK_EQ(input, "");
293 }
294
295 TEST(RE2, FindAndConsumeN) {
296 const string s(" one two three 4");
297 StringPiece input(s);
298
299 RE2::Arg argv[2];
300 const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
301
302 // 0 arg
303 EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)", args, 0)); // Skips "one".
304
305 // 1 arg
306 string word;
307 argv[0] = &word;
308 EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)", args, 1));
309 EXPECT_EQ("two", word);
310
311 // Multi-args
312 int n;
313 argv[1] = &n;
314 EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)\\s*(\\d+)", args, 2));
315 EXPECT_EQ("three", word);
316 EXPECT_EQ(4, n);
317 }
318
319 TEST(RE2, MatchNumberPeculiarity) {
320 VLOG(1) << "TestMatchNumberPeculiarity";
321
322 RE2 r("(foo)|(bar)|(baz)");
323 string word1;
324 string word2;
325 string word3;
326
327 CHECK(RE2::PartialMatch("foo", r, &word1, &word2, &word3));
328 CHECK_EQ(word1, "foo");
329 CHECK_EQ(word2, "");
330 CHECK_EQ(word3, "");
331 CHECK(RE2::PartialMatch("bar", r, &word1, &word2, &word3));
332 CHECK_EQ(word1, "");
333 CHECK_EQ(word2, "bar");
334 CHECK_EQ(word3, "");
335 CHECK(RE2::PartialMatch("baz", r, &word1, &word2, &word3));
336 CHECK_EQ(word1, "");
337 CHECK_EQ(word2, "");
338 CHECK_EQ(word3, "baz");
339 CHECK(!RE2::PartialMatch("f", r, &word1, &word2, &word3));
340
341 string a;
342 CHECK(RE2::FullMatch("hello", "(foo)|hello", &a));
343 CHECK_EQ(a, "");
344 }
345
346 TEST(RE2, Match) {
347 RE2 re("((\\w+):([0-9]+))"); // extracts host and port
348 StringPiece group[4];
349
350 // No match.
351 StringPiece s = "zyzzyva";
352 CHECK(!re.Match(s, 0, s.size(), RE2::UNANCHORED,
353 group, arraysize(group)));
354
355 // Matches and extracts.
356 s = "a chrisr:9000 here";
357 CHECK(re.Match(s, 0, s.size(), RE2::UNANCHORED,
358 group, arraysize(group)));
359 CHECK_EQ(group[0], "chrisr:9000");
360 CHECK_EQ(group[1], "chrisr:9000");
361 CHECK_EQ(group[2], "chrisr");
362 CHECK_EQ(group[3], "9000");
363
364 string all, host;
365 int port;
366 CHECK(RE2::PartialMatch("a chrisr:9000 here", re, &all, &host, &port));
367 CHECK_EQ(all, "chrisr:9000");
368 CHECK_EQ(host, "chrisr");
369 CHECK_EQ(port, 9000);
370 }
371
372 static void TestRecursion(int size, const char* pattern) {
373 // Fill up a string repeating the pattern given
374 string domain;
375 domain.resize(size);
376 size_t patlen = strlen(pattern);
377 for (int i = 0; i < size; i++) {
378 domain[i] = pattern[i % patlen];
379 }
380 // Just make sure it doesn't crash due to too much recursion.
381 RE2 re("([a-zA-Z0-9]|-)+(\\.([a-zA-Z0-9]|-)+)*(\\.)?", RE2::Quiet);
382 RE2::FullMatch(domain, re);
383 }
384
385 // A meta-quoted string, interpreted as a pattern, should always match
386 // the original unquoted string.
387 static void TestQuoteMeta(string unquoted,
388 const RE2::Options& options = RE2::DefaultOptions) {
389 string quoted = RE2::QuoteMeta(unquoted);
390 RE2 re(quoted, options);
391 EXPECT_TRUE(RE2::FullMatch(unquoted, re))
392 << "Unquoted='" << unquoted << "', quoted='" << quoted << "'.";
393 }
394
395 // A meta-quoted string, interpreted as a pattern, should always match
396 // the original unquoted string.
397 static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
398 const RE2::Options& options = RE2::DefaultOpti ons) {
399 string quoted = RE2::QuoteMeta(unquoted);
400 RE2 re(quoted, options);
401 EXPECT_FALSE(RE2::FullMatch(should_not_match, re))
402 << "Unquoted='" << unquoted << "', quoted='" << quoted << "'.";
403 }
404
405 // Tests that quoted meta characters match their original strings,
406 // and that a few things that shouldn't match indeed do not.
407 TEST(QuoteMeta, Simple) {
408 TestQuoteMeta("foo");
409 TestQuoteMeta("foo.bar");
410 TestQuoteMeta("foo\\.bar");
411 TestQuoteMeta("[1-9]");
412 TestQuoteMeta("1.5-2.0?");
413 TestQuoteMeta("\\d");
414 TestQuoteMeta("Who doesn't like ice cream?");
415 TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
416 TestQuoteMeta("((?!)xxx).*yyy");
417 TestQuoteMeta("([");
418 }
419 TEST(QuoteMeta, SimpleNegative) {
420 NegativeTestQuoteMeta("foo", "bar");
421 NegativeTestQuoteMeta("...", "bar");
422 NegativeTestQuoteMeta("\\.", ".");
423 NegativeTestQuoteMeta("\\.", "..");
424 NegativeTestQuoteMeta("(a)", "a");
425 NegativeTestQuoteMeta("(a|b)", "a");
426 NegativeTestQuoteMeta("(a|b)", "(a)");
427 NegativeTestQuoteMeta("(a|b)", "a|b");
428 NegativeTestQuoteMeta("[0-9]", "0");
429 NegativeTestQuoteMeta("[0-9]", "0-9");
430 NegativeTestQuoteMeta("[0-9]", "[9]");
431 NegativeTestQuoteMeta("((?!)xxx)", "xxx");
432 }
433
434 TEST(QuoteMeta, Latin1) {
435 TestQuoteMeta("3\xb2 = 9", RE2::Latin1);
436 }
437
438 TEST(QuoteMeta, UTF8) {
439 TestQuoteMeta("Plácido Domingo");
440 TestQuoteMeta("xyz"); // No fancy utf8.
441 TestQuoteMeta("\xc2\xb0"); // 2-byte utf8 -- a degree symbol.
442 TestQuoteMeta("27\xc2\xb0 degrees"); // As a middle character.
443 TestQuoteMeta("\xe2\x80\xb3"); // 3-byte utf8 -- a double prime.
444 TestQuoteMeta("\xf0\x9d\x85\x9f"); // 4-byte utf8 -- a music note.
445 TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, this should
446 // still work.
447 NegativeTestQuoteMeta("27\xc2\xb0",
448 "27\\\xc2\\\xb0"); // 2-byte utf8 -- a degree symbol.
449 }
450
451 TEST(QuoteMeta, HasNull) {
452 string has_null;
453
454 // string with one null character
455 has_null += '\0';
456 TestQuoteMeta(has_null);
457 NegativeTestQuoteMeta(has_null, "");
458
459 // Don't want null-followed-by-'1' to be interpreted as '\01'.
460 has_null += '1';
461 TestQuoteMeta(has_null);
462 NegativeTestQuoteMeta(has_null, "\1");
463 }
464
465 TEST(ProgramSize, BigProgram) {
466 RE2 re_simple("simple regexp");
467 RE2 re_medium("medium.*regexp");
468 RE2 re_complex("complex.{1,128}regexp");
469
470 CHECK_GT(re_simple.ProgramSize(), 0);
471 CHECK_GT(re_medium.ProgramSize(), re_simple.ProgramSize());
472 CHECK_GT(re_complex.ProgramSize(), re_medium.ProgramSize());
473 }
474
475 TEST(ProgramFanout, BigProgram) {
476 RE2 re1("(?:(?:(?:(?:(?:.)?){1})*)+)");
477 RE2 re10("(?:(?:(?:(?:(?:.)?){10})*)+)");
478 RE2 re100("(?:(?:(?:(?:(?:.)?){100})*)+)");
479 RE2 re1000("(?:(?:(?:(?:(?:.)?){1000})*)+)");
480
481 map<int, int> histogram;
482
483 // 3 is the largest non-empty bucket and has 1 element.
484 CHECK_EQ(3, re1.ProgramFanout(&histogram));
485 CHECK_EQ(1, histogram[3]);
486
487 // 7 is the largest non-empty bucket and has 10 elements.
488 CHECK_EQ(7, re10.ProgramFanout(&histogram));
489 CHECK_EQ(10, histogram[7]);
490
491 // 10 is the largest non-empty bucket and has 100 elements.
492 CHECK_EQ(10, re100.ProgramFanout(&histogram));
493 CHECK_EQ(100, histogram[10]);
494
495 // 13 is the largest non-empty bucket and has 1000 elements.
496 CHECK_EQ(13, re1000.ProgramFanout(&histogram));
497 CHECK_EQ(1000, histogram[13]);
498 }
499
500 // Issue 956519: handling empty character sets was
501 // causing NULL dereference. This tests a few empty character sets.
502 // (The way to get an empty character set is to negate a full one.)
503 TEST(EmptyCharset, Fuzz) {
504 static const char *empties[] = {
505 "[^\\S\\s]",
506 "[^\\S[:space:]]",
507 "[^\\D\\d]",
508 "[^\\D[:digit:]]"
509 };
510 for (int i = 0; i < arraysize(empties); i++)
511 CHECK(!RE2(empties[i]).Match("abc", 0, 3, RE2::UNANCHORED, NULL, 0));
512 }
513
514 // Bitstate assumes that kInstFail instructions in
515 // alternations or capture groups have been "compiled away".
516 TEST(EmptyCharset, BitstateAssumptions) {
517 // Captures trigger use of Bitstate.
518 static const char *nop_empties[] = {
519 "((((()))))" "[^\\S\\s]?",
520 "((((()))))" "([^\\S\\s])?",
521 "((((()))))" "([^\\S\\s]|[^\\S\\s])?",
522 "((((()))))" "(([^\\S\\s]|[^\\S\\s])|)"
523 };
524 StringPiece group[6];
525 for (int i = 0; i < arraysize(nop_empties); i++)
526 CHECK(RE2(nop_empties[i]).Match("", 0, 0, RE2::UNANCHORED, group, 6));
527 }
528
529 // Test that named groups work correctly.
530 TEST(Capture, NamedGroups) {
531 {
532 RE2 re("(hello world)");
533 CHECK_EQ(re.NumberOfCapturingGroups(), 1);
534 const map<string, int>& m = re.NamedCapturingGroups();
535 CHECK_EQ(m.size(), 0);
536 }
537
538 {
539 RE2 re("(?P<A>expr(?P<B>expr)(?P<C>expr))((expr)(?P<D>expr))");
540 CHECK_EQ(re.NumberOfCapturingGroups(), 6);
541 const map<string, int>& m = re.NamedCapturingGroups();
542 CHECK_EQ(m.size(), 4);
543 CHECK_EQ(m.find("A")->second, 1);
544 CHECK_EQ(m.find("B")->second, 2);
545 CHECK_EQ(m.find("C")->second, 3);
546 CHECK_EQ(m.find("D")->second, 6); // $4 and $5 are anonymous
547 }
548 }
549
550 TEST(RE2, CapturedGroupTest) {
551 RE2 re("directions from (?P<S>.*) to (?P<D>.*)");
552 int num_groups = re.NumberOfCapturingGroups();
553 EXPECT_EQ(2, num_groups);
554 string args[4];
555 RE2::Arg arg0(&args[0]);
556 RE2::Arg arg1(&args[1]);
557 RE2::Arg arg2(&args[2]);
558 RE2::Arg arg3(&args[3]);
559
560 const RE2::Arg* const matches[4] = {&arg0, &arg1, &arg2, &arg3};
561 EXPECT_TRUE(RE2::FullMatchN("directions from mountain view to san jose",
562 re, matches, num_groups));
563 const map<string, int>& named_groups = re.NamedCapturingGroups();
564 EXPECT_TRUE(named_groups.find("S") != named_groups.end());
565 EXPECT_TRUE(named_groups.find("D") != named_groups.end());
566
567 // The named group index is 1-based.
568 int source_group_index = named_groups.find("S")->second;
569 int destination_group_index = named_groups.find("D")->second;
570 EXPECT_EQ(1, source_group_index);
571 EXPECT_EQ(2, destination_group_index);
572
573 // The args is zero-based.
574 EXPECT_EQ("mountain view", args[source_group_index - 1]);
575 EXPECT_EQ("san jose", args[destination_group_index - 1]);
576 }
577
578 TEST(RE2, FullMatchWithNoArgs) {
579 CHECK(RE2::FullMatch("h", "h"));
580 CHECK(RE2::FullMatch("hello", "hello"));
581 CHECK(RE2::FullMatch("hello", "h.*o"));
582 CHECK(!RE2::FullMatch("othello", "h.*o")); // Must be anchored at front
583 CHECK(!RE2::FullMatch("hello!", "h.*o")); // Must be anchored at end
584 }
585
586 TEST(RE2, PartialMatch) {
587 CHECK(RE2::PartialMatch("x", "x"));
588 CHECK(RE2::PartialMatch("hello", "h.*o"));
589 CHECK(RE2::PartialMatch("othello", "h.*o"));
590 CHECK(RE2::PartialMatch("hello!", "h.*o"));
591 CHECK(RE2::PartialMatch("x", "((((((((((((((((((((x))))))))))))))))))))"));
592 }
593
594 TEST(RE2, PartialMatchN) {
595 RE2::Arg argv[2];
596 const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
597
598 // 0 arg
599 EXPECT_TRUE(RE2::PartialMatchN("hello", "e.*o", args, 0));
600 EXPECT_FALSE(RE2::PartialMatchN("othello", "a.*o", args, 0));
601
602 // 1 arg
603 int i;
604 argv[0] = &i;
605 EXPECT_TRUE(RE2::PartialMatchN("1001 nights", "(\\d+)", args, 1));
606 EXPECT_EQ(1001, i);
607 EXPECT_FALSE(RE2::PartialMatchN("three", "(\\d+)", args, 1));
608
609 // Multi-arg
610 string s;
611 argv[1] = &s;
612 EXPECT_TRUE(RE2::PartialMatchN("answer: 42:life", "(\\d+):(\\w+)", args, 2));
613 EXPECT_EQ(42, i);
614 EXPECT_EQ("life", s);
615 EXPECT_FALSE(RE2::PartialMatchN("hi1", "(\\w+)(1)", args, 2));
616 }
617
618 TEST(RE2, FullMatchZeroArg) {
619 // Zero-arg
620 CHECK(RE2::FullMatch("1001", "\\d+"));
621 }
622
623 TEST(RE2, FullMatchOneArg) {
624 int i;
625
626 // Single-arg
627 CHECK(RE2::FullMatch("1001", "(\\d+)", &i));
628 CHECK_EQ(i, 1001);
629 CHECK(RE2::FullMatch("-123", "(-?\\d+)", &i));
630 CHECK_EQ(i, -123);
631 CHECK(!RE2::FullMatch("10", "()\\d+", &i));
632 CHECK(!RE2::FullMatch("1234567890123456789012345678901234567890",
633 "(\\d+)", &i));
634 }
635
636 TEST(RE2, FullMatchIntegerArg) {
637 int i;
638
639 // Digits surrounding integer-arg
640 CHECK(RE2::FullMatch("1234", "1(\\d*)4", &i));
641 CHECK_EQ(i, 23);
642 CHECK(RE2::FullMatch("1234", "(\\d)\\d+", &i));
643 CHECK_EQ(i, 1);
644 CHECK(RE2::FullMatch("-1234", "(-\\d)\\d+", &i));
645 CHECK_EQ(i, -1);
646 CHECK(RE2::PartialMatch("1234", "(\\d)", &i));
647 CHECK_EQ(i, 1);
648 CHECK(RE2::PartialMatch("-1234", "(-\\d)", &i));
649 CHECK_EQ(i, -1);
650 }
651
652 TEST(RE2, FullMatchStringArg) {
653 string s;
654 // String-arg
655 CHECK(RE2::FullMatch("hello", "h(.*)o", &s));
656 CHECK_EQ(s, string("ell"));
657 }
658
659 TEST(RE2, FullMatchStringPieceArg) {
660 int i;
661 // StringPiece-arg
662 StringPiece sp;
663 CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &sp, &i));
664 CHECK_EQ(sp.size(), 4);
665 CHECK(memcmp(sp.data(), "ruby", 4) == 0);
666 CHECK_EQ(i, 1234);
667 }
668
669 TEST(RE2, FullMatchMultiArg) {
670 int i;
671 string s;
672 // Multi-arg
673 CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
674 CHECK_EQ(s, string("ruby"));
675 CHECK_EQ(i, 1234);
676 }
677
678 TEST(RE2, FullMatchN) {
679 RE2::Arg argv[2];
680 const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
681
682 // 0 arg
683 EXPECT_TRUE(RE2::FullMatchN("hello", "h.*o", args, 0));
684 EXPECT_FALSE(RE2::FullMatchN("othello", "h.*o", args, 0));
685
686 // 1 arg
687 int i;
688 argv[0] = &i;
689 EXPECT_TRUE(RE2::FullMatchN("1001", "(\\d+)", args, 1));
690 EXPECT_EQ(1001, i);
691 EXPECT_FALSE(RE2::FullMatchN("three", "(\\d+)", args, 1));
692
693 // Multi-arg
694 string s;
695 argv[1] = &s;
696 EXPECT_TRUE(RE2::FullMatchN("42:life", "(\\d+):(\\w+)", args, 2));
697 EXPECT_EQ(42, i);
698 EXPECT_EQ("life", s);
699 EXPECT_FALSE(RE2::FullMatchN("hi1", "(\\w+)(1)", args, 2));
700 }
701
702 TEST(RE2, FullMatchIgnoredArg) {
703 int i;
704 string s;
705 // Ignored arg
706 CHECK(RE2::FullMatch("ruby:1234", "(\\w+)(:)(\\d+)", &s, (void*)NULL, &i));
707 CHECK_EQ(s, string("ruby"));
708 CHECK_EQ(i, 1234);
709 }
710
711 TEST(RE2, FullMatchTypedNullArg) {
712 string s;
713
714 // Ignore non-void* NULL arg
715 CHECK(RE2::FullMatch("hello", "he(.*)lo", (char*)NULL));
716 CHECK(RE2::FullMatch("hello", "h(.*)o", (string*)NULL));
717 CHECK(RE2::FullMatch("hello", "h(.*)o", (StringPiece*)NULL));
718 CHECK(RE2::FullMatch("1234", "(.*)", (int*)NULL));
719 CHECK(RE2::FullMatch("1234567890123456", "(.*)", (long long*)NULL));
720 CHECK(RE2::FullMatch("123.4567890123456", "(.*)", (double*)NULL));
721 CHECK(RE2::FullMatch("123.4567890123456", "(.*)", (float*)NULL));
722
723 // Fail on non-void* NULL arg if the match doesn't parse for the given type.
724 CHECK(!RE2::FullMatch("hello", "h(.*)lo", &s, (char*)NULL));
725 CHECK(!RE2::FullMatch("hello", "(.*)", (int*)NULL));
726 CHECK(!RE2::FullMatch("1234567890123456", "(.*)", (int*)NULL));
727 CHECK(!RE2::FullMatch("hello", "(.*)", (double*)NULL));
728 CHECK(!RE2::FullMatch("hello", "(.*)", (float*)NULL));
729 }
730
731 // Check that numeric parsing code does not read past the end of
732 // the number being parsed.
733 // This implementation requires mmap(2) et al. and thus cannot
734 // be used unless they are available.
735 TEST(RE2, NULTerminated) {
736 #if defined(_POSIX_MAPPED_FILES) && _POSIX_MAPPED_FILES > 0
737 char *v;
738 int x;
739 long pagesize = sysconf(_SC_PAGE_SIZE);
740
741 #ifndef MAP_ANONYMOUS
742 #define MAP_ANONYMOUS MAP_ANON
743 #endif
744 v = static_cast<char*>(mmap(NULL, 2*pagesize, PROT_READ|PROT_WRITE,
745 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0));
746 CHECK(v != reinterpret_cast<char*>(-1));
747 LOG(INFO) << "Memory at " << (void*)v;
748 CHECK_EQ(munmap(v + pagesize, pagesize), 0) << " error " << errno;
749 v[pagesize - 1] = '1';
750
751 x = 0;
752 CHECK(RE2::FullMatch(StringPiece(v + pagesize - 1, 1), "(.*)", &x));
753 CHECK_EQ(x, 1);
754 #endif
755 }
756
757 TEST(RE2, FullMatchTypeTests) {
758 // Type tests
759 string zeros(1000, '0');
760 {
761 char c;
762 CHECK(RE2::FullMatch("Hello", "(H)ello", &c));
763 CHECK_EQ(c, 'H');
764 }
765 {
766 unsigned char c;
767 CHECK(RE2::FullMatch("Hello", "(H)ello", &c));
768 CHECK_EQ(c, static_cast<unsigned char>('H'));
769 }
770 {
771 int16 v;
772 CHECK(RE2::FullMatch("100", "(-?\\d+)", &v)); CHECK_EQ(v, 100);
773 CHECK(RE2::FullMatch("-100", "(-?\\d+)", &v)); CHECK_EQ(v, -100);
774 CHECK(RE2::FullMatch("32767", "(-?\\d+)", &v)); CHECK_EQ(v, 32767);
775 CHECK(RE2::FullMatch("-32768", "(-?\\d+)", &v)); CHECK_EQ(v, -32768);
776 CHECK(!RE2::FullMatch("-32769", "(-?\\d+)", &v));
777 CHECK(!RE2::FullMatch("32768", "(-?\\d+)", &v));
778 }
779 {
780 uint16 v;
781 CHECK(RE2::FullMatch("100", "(\\d+)", &v)); CHECK_EQ(v, 100);
782 CHECK(RE2::FullMatch("32767", "(\\d+)", &v)); CHECK_EQ(v, 32767);
783 CHECK(RE2::FullMatch("65535", "(\\d+)", &v)); CHECK_EQ(v, 65535);
784 CHECK(!RE2::FullMatch("65536", "(\\d+)", &v));
785 }
786 {
787 int32 v;
788 static const int32 max = 0x7fffffff;
789 static const int32 min = -max - 1;
790 CHECK(RE2::FullMatch("100", "(-?\\d+)", &v)); CHECK_EQ(v, 100);
791 CHECK(RE2::FullMatch("-100", "(-?\\d+)", &v)); CHECK_EQ(v, -100);
792 CHECK(RE2::FullMatch("2147483647", "(-?\\d+)", &v)); CHECK_EQ(v, max);
793 CHECK(RE2::FullMatch("-2147483648", "(-?\\d+)", &v)); CHECK_EQ(v, min);
794 CHECK(!RE2::FullMatch("-2147483649", "(-?\\d+)", &v));
795 CHECK(!RE2::FullMatch("2147483648", "(-?\\d+)", &v));
796
797 CHECK(RE2::FullMatch(zeros + "2147483647", "(-?\\d+)", &v));
798 CHECK_EQ(v, max);
799 CHECK(RE2::FullMatch("-" + zeros + "2147483648", "(-?\\d+)", &v));
800 CHECK_EQ(v, min);
801
802 CHECK(!RE2::FullMatch("-" + zeros + "2147483649", "(-?\\d+)", &v));
803 CHECK(RE2::FullMatch("0x7fffffff", "(.*)", RE2::CRadix(&v)));
804 CHECK_EQ(v, max);
805 CHECK(!RE2::FullMatch("000x7fffffff", "(.*)", RE2::CRadix(&v)));
806 }
807 {
808 uint32 v;
809 static const uint32 max = 0xfffffffful;
810 CHECK(RE2::FullMatch("100", "(\\d+)", &v)); CHECK_EQ(v, 100);
811 CHECK(RE2::FullMatch("4294967295", "(\\d+)", &v)); CHECK_EQ(v, max);
812 CHECK(!RE2::FullMatch("4294967296", "(\\d+)", &v));
813 CHECK(!RE2::FullMatch("-1", "(\\d+)", &v));
814
815 CHECK(RE2::FullMatch(zeros + "4294967295", "(\\d+)", &v)); CHECK_EQ(v, max);
816 }
817 {
818 int64 v;
819 static const int64 max = 0x7fffffffffffffffull;
820 static const int64 min = -max - 1;
821 char buf[32];
822
823 CHECK(RE2::FullMatch("100", "(-?\\d+)", &v)); CHECK_EQ(v, 100);
824 CHECK(RE2::FullMatch("-100", "(-?\\d+)", &v)); CHECK_EQ(v, -100);
825
826 snprintf(buf, sizeof(buf), "%lld", (long long int)max);
827 CHECK(RE2::FullMatch(buf, "(-?\\d+)", &v)); CHECK_EQ(v, max);
828
829 snprintf(buf, sizeof(buf), "%lld", (long long int)min);
830 CHECK(RE2::FullMatch(buf, "(-?\\d+)", &v)); CHECK_EQ(v, min);
831
832 snprintf(buf, sizeof(buf), "%lld", (long long int)max);
833 assert(buf[strlen(buf)-1] != '9');
834 buf[strlen(buf)-1]++;
835 CHECK(!RE2::FullMatch(buf, "(-?\\d+)", &v));
836
837 snprintf(buf, sizeof(buf), "%lld", (long long int)min);
838 assert(buf[strlen(buf)-1] != '9');
839 buf[strlen(buf)-1]++;
840 CHECK(!RE2::FullMatch(buf, "(-?\\d+)", &v));
841 }
842 {
843 uint64 v;
844 int64 v2;
845 static const uint64 max = 0xffffffffffffffffull;
846 char buf[32];
847
848 CHECK(RE2::FullMatch("100", "(-?\\d+)", &v)); CHECK_EQ(v, 100);
849 CHECK(RE2::FullMatch("-100", "(-?\\d+)", &v2)); CHECK_EQ(v2, -100);
850
851 snprintf(buf, sizeof(buf), "%llu", (long long unsigned)max);
852 CHECK(RE2::FullMatch(buf, "(-?\\d+)", &v)); CHECK_EQ(v, max);
853
854 assert(buf[strlen(buf)-1] != '9');
855 buf[strlen(buf)-1]++;
856 CHECK(!RE2::FullMatch(buf, "(-?\\d+)", &v));
857 }
858 }
859
860 TEST(RE2, FloatingPointFullMatchTypes) {
861 string zeros(1000, '0');
862 {
863 float v;
864 CHECK(RE2::FullMatch("100", "(.*)", &v)); CHECK_EQ(v, 100);
865 CHECK(RE2::FullMatch("-100.", "(.*)", &v)); CHECK_EQ(v, -100);
866 CHECK(RE2::FullMatch("1e23", "(.*)", &v)); CHECK_EQ(v, float(1e23));
867 CHECK(RE2::FullMatch(" 100", "(.*)", &v)); CHECK_EQ(v, 100);
868
869 CHECK(RE2::FullMatch(zeros + "1e23", "(.*)", &v));
870 CHECK_EQ(v, float(1e23));
871
872 // 6700000000081920.1 is an edge case.
873 // 6700000000081920 is exactly halfway between
874 // two float32s, so the .1 should make it round up.
875 // However, the .1 is outside the precision possible with
876 // a float64: the nearest float64 is 6700000000081920.
877 // So if the code uses strtod and then converts to float32,
878 // round-to-even will make it round down instead of up.
879 // To pass the test, the parser must call strtof directly.
880 // This test case is carefully chosen to use only a 17-digit
881 // number, since C does not guarantee to get the correctly
882 // rounded answer for strtod and strtof unless the input is
883 // short.
884 CHECK(RE2::FullMatch("0.1", "(.*)", &v));
885 CHECK_EQ(v, 0.1f) << StringPrintf("%.8g != %.8g", v, 0.1f);
886 CHECK(RE2::FullMatch("6700000000081920.1", "(.*)", &v));
887 CHECK_EQ(v, 6700000000081920.1f)
888 << StringPrintf("%.8g != %.8g", v, 6700000000081920.1f);
889 }
890 {
891 double v;
892 CHECK(RE2::FullMatch("100", "(.*)", &v)); CHECK_EQ(v, 100);
893 CHECK(RE2::FullMatch("-100.", "(.*)", &v)); CHECK_EQ(v, -100);
894 CHECK(RE2::FullMatch("1e23", "(.*)", &v)); CHECK_EQ(v, 1e23);
895 CHECK(RE2::FullMatch(zeros + "1e23", "(.*)", &v));
896 CHECK_EQ(v, double(1e23));
897
898 CHECK(RE2::FullMatch("0.1", "(.*)", &v));
899 CHECK_EQ(v, 0.1) << StringPrintf("%.17g != %.17g", v, 0.1);
900 CHECK(RE2::FullMatch("1.00000005960464485", "(.*)", &v));
901 CHECK_EQ(v, 1.0000000596046448)
902 << StringPrintf("%.17g != %.17g", v, 1.0000000596046448);
903 }
904 }
905
906 TEST(RE2, FullMatchAnchored) {
907 int i;
908 // Check that matching is fully anchored
909 CHECK(!RE2::FullMatch("x1001", "(\\d+)", &i));
910 CHECK(!RE2::FullMatch("1001x", "(\\d+)", &i));
911 CHECK(RE2::FullMatch("x1001", "x(\\d+)", &i)); CHECK_EQ(i, 1001);
912 CHECK(RE2::FullMatch("1001x", "(\\d+)x", &i)); CHECK_EQ(i, 1001);
913 }
914
915 TEST(RE2, FullMatchBraces) {
916 // Braces
917 CHECK(RE2::FullMatch("0abcd", "[0-9a-f+.-]{5,}"));
918 CHECK(RE2::FullMatch("0abcde", "[0-9a-f+.-]{5,}"));
919 CHECK(!RE2::FullMatch("0abc", "[0-9a-f+.-]{5,}"));
920 }
921
922 TEST(RE2, Complicated) {
923 // Complicated RE2
924 CHECK(RE2::FullMatch("foo", "foo|bar|[A-Z]"));
925 CHECK(RE2::FullMatch("bar", "foo|bar|[A-Z]"));
926 CHECK(RE2::FullMatch("X", "foo|bar|[A-Z]"));
927 CHECK(!RE2::FullMatch("XY", "foo|bar|[A-Z]"));
928 }
929
930 TEST(RE2, FullMatchEnd) {
931 // Check full-match handling (needs '$' tacked on internally)
932 CHECK(RE2::FullMatch("fo", "fo|foo"));
933 CHECK(RE2::FullMatch("foo", "fo|foo"));
934 CHECK(RE2::FullMatch("fo", "fo|foo$"));
935 CHECK(RE2::FullMatch("foo", "fo|foo$"));
936 CHECK(RE2::FullMatch("foo", "foo$"));
937 CHECK(!RE2::FullMatch("foo$bar", "foo\\$"));
938 CHECK(!RE2::FullMatch("fox", "fo|bar"));
939
940 // Uncomment the following if we change the handling of '$' to
941 // prevent it from matching a trailing newline
942 if (false) {
943 // Check that we don't get bitten by pcre's special handling of a
944 // '\n' at the end of the string matching '$'
945 CHECK(!RE2::PartialMatch("foo\n", "foo$"));
946 }
947 }
948
949 TEST(RE2, FullMatchArgCount) {
950 // Number of args
951 int a[16];
952 CHECK(RE2::FullMatch("", ""));
953
954 memset(a, 0, sizeof(0));
955 CHECK(RE2::FullMatch("1",
956 "(\\d){1}",
957 &a[0]));
958 CHECK_EQ(a[0], 1);
959
960 memset(a, 0, sizeof(0));
961 CHECK(RE2::FullMatch("12",
962 "(\\d)(\\d)",
963 &a[0], &a[1]));
964 CHECK_EQ(a[0], 1);
965 CHECK_EQ(a[1], 2);
966
967 memset(a, 0, sizeof(0));
968 CHECK(RE2::FullMatch("123",
969 "(\\d)(\\d)(\\d)",
970 &a[0], &a[1], &a[2]));
971 CHECK_EQ(a[0], 1);
972 CHECK_EQ(a[1], 2);
973 CHECK_EQ(a[2], 3);
974
975 memset(a, 0, sizeof(0));
976 CHECK(RE2::FullMatch("1234",
977 "(\\d)(\\d)(\\d)(\\d)",
978 &a[0], &a[1], &a[2], &a[3]));
979 CHECK_EQ(a[0], 1);
980 CHECK_EQ(a[1], 2);
981 CHECK_EQ(a[2], 3);
982 CHECK_EQ(a[3], 4);
983
984 memset(a, 0, sizeof(0));
985 CHECK(RE2::FullMatch("12345",
986 "(\\d)(\\d)(\\d)(\\d)(\\d)",
987 &a[0], &a[1], &a[2], &a[3],
988 &a[4]));
989 CHECK_EQ(a[0], 1);
990 CHECK_EQ(a[1], 2);
991 CHECK_EQ(a[2], 3);
992 CHECK_EQ(a[3], 4);
993 CHECK_EQ(a[4], 5);
994
995 memset(a, 0, sizeof(0));
996 CHECK(RE2::FullMatch("123456",
997 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)",
998 &a[0], &a[1], &a[2], &a[3],
999 &a[4], &a[5]));
1000 CHECK_EQ(a[0], 1);
1001 CHECK_EQ(a[1], 2);
1002 CHECK_EQ(a[2], 3);
1003 CHECK_EQ(a[3], 4);
1004 CHECK_EQ(a[4], 5);
1005 CHECK_EQ(a[5], 6);
1006
1007 memset(a, 0, sizeof(0));
1008 CHECK(RE2::FullMatch("1234567",
1009 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)",
1010 &a[0], &a[1], &a[2], &a[3],
1011 &a[4], &a[5], &a[6]));
1012 CHECK_EQ(a[0], 1);
1013 CHECK_EQ(a[1], 2);
1014 CHECK_EQ(a[2], 3);
1015 CHECK_EQ(a[3], 4);
1016 CHECK_EQ(a[4], 5);
1017 CHECK_EQ(a[5], 6);
1018 CHECK_EQ(a[6], 7);
1019
1020 memset(a, 0, sizeof(0));
1021 CHECK(RE2::FullMatch("1234567890123456",
1022 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1023 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)",
1024 &a[0], &a[1], &a[2], &a[3],
1025 &a[4], &a[5], &a[6], &a[7],
1026 &a[8], &a[9], &a[10], &a[11],
1027 &a[12], &a[13], &a[14], &a[15]));
1028 CHECK_EQ(a[0], 1);
1029 CHECK_EQ(a[1], 2);
1030 CHECK_EQ(a[2], 3);
1031 CHECK_EQ(a[3], 4);
1032 CHECK_EQ(a[4], 5);
1033 CHECK_EQ(a[5], 6);
1034 CHECK_EQ(a[6], 7);
1035 CHECK_EQ(a[7], 8);
1036 CHECK_EQ(a[8], 9);
1037 CHECK_EQ(a[9], 0);
1038 CHECK_EQ(a[10], 1);
1039 CHECK_EQ(a[11], 2);
1040 CHECK_EQ(a[12], 3);
1041 CHECK_EQ(a[13], 4);
1042 CHECK_EQ(a[14], 5);
1043 CHECK_EQ(a[15], 6);
1044 }
1045
1046 TEST(RE2, Accessors) {
1047 // Check the pattern() accessor
1048 {
1049 const string kPattern = "http://([^/]+)/.*";
1050 const RE2 re(kPattern);
1051 CHECK_EQ(kPattern, re.pattern());
1052 }
1053
1054 // Check RE2 error field.
1055 {
1056 RE2 re("foo");
1057 CHECK(re.error().empty()); // Must have no error
1058 CHECK(re.ok());
1059 CHECK(re.error_code() == RE2::NoError);
1060 }
1061 }
1062
1063 TEST(RE2, UTF8) {
1064 // Check UTF-8 handling
1065 // Three Japanese characters (nihongo)
1066 const char utf8_string[] = {
1067 (char)0xe6, (char)0x97, (char)0xa5, // 65e5
1068 (char)0xe6, (char)0x9c, (char)0xac, // 627c
1069 (char)0xe8, (char)0xaa, (char)0x9e, // 8a9e
1070 0
1071 };
1072 const char utf8_pattern[] = {
1073 '.',
1074 (char)0xe6, (char)0x9c, (char)0xac, // 627c
1075 '.',
1076 0
1077 };
1078
1079 // Both should match in either mode, bytes or UTF-8
1080 RE2 re_test1(".........", RE2::Latin1);
1081 CHECK(RE2::FullMatch(utf8_string, re_test1));
1082 RE2 re_test2("...");
1083 CHECK(RE2::FullMatch(utf8_string, re_test2));
1084
1085 // Check that '.' matches one byte or UTF-8 character
1086 // according to the mode.
1087 string s;
1088 RE2 re_test3("(.)", RE2::Latin1);
1089 CHECK(RE2::PartialMatch(utf8_string, re_test3, &s));
1090 CHECK_EQ(s, string("\xe6"));
1091 RE2 re_test4("(.)");
1092 CHECK(RE2::PartialMatch(utf8_string, re_test4, &s));
1093 CHECK_EQ(s, string("\xe6\x97\xa5"));
1094
1095 // Check that string matches itself in either mode
1096 RE2 re_test5(utf8_string, RE2::Latin1);
1097 CHECK(RE2::FullMatch(utf8_string, re_test5));
1098 RE2 re_test6(utf8_string);
1099 CHECK(RE2::FullMatch(utf8_string, re_test6));
1100
1101 // Check that pattern matches string only in UTF8 mode
1102 RE2 re_test7(utf8_pattern, RE2::Latin1);
1103 CHECK(!RE2::FullMatch(utf8_string, re_test7));
1104 RE2 re_test8(utf8_pattern);
1105 CHECK(RE2::FullMatch(utf8_string, re_test8));
1106 }
1107
1108 TEST(RE2, UngreedyUTF8) {
1109 // Check that ungreedy, UTF8 regular expressions don't match when they
1110 // oughtn't -- see bug 82246.
1111 {
1112 // This code always worked.
1113 const char* pattern = "\\w+X";
1114 const string target = "a aX";
1115 RE2 match_sentence(pattern, RE2::Latin1);
1116 RE2 match_sentence_re(pattern);
1117
1118 CHECK(!RE2::FullMatch(target, match_sentence));
1119 CHECK(!RE2::FullMatch(target, match_sentence_re));
1120 }
1121 {
1122 const char* pattern = "(?U)\\w+X";
1123 const string target = "a aX";
1124 RE2 match_sentence(pattern, RE2::Latin1);
1125 CHECK_EQ(match_sentence.error(), "");
1126 RE2 match_sentence_re(pattern);
1127
1128 CHECK(!RE2::FullMatch(target, match_sentence));
1129 CHECK(!RE2::FullMatch(target, match_sentence_re));
1130 }
1131 }
1132
1133 TEST(RE2, Rejects) {
1134 { RE2 re("a\\1", RE2::Quiet); CHECK(!re.ok()); }
1135 {
1136 RE2 re("a[x", RE2::Quiet);
1137 CHECK(!re.ok());
1138 }
1139 {
1140 RE2 re("a[z-a]", RE2::Quiet);
1141 CHECK(!re.ok());
1142 }
1143 {
1144 RE2 re("a[[:foobar:]]", RE2::Quiet);
1145 CHECK(!re.ok());
1146 }
1147 {
1148 RE2 re("a(b", RE2::Quiet);
1149 CHECK(!re.ok());
1150 }
1151 {
1152 RE2 re("a\\", RE2::Quiet);
1153 CHECK(!re.ok());
1154 }
1155 }
1156
1157 TEST(RE2, NoCrash) {
1158 // Test that using a bad regexp doesn't crash.
1159 {
1160 RE2 re("a\\", RE2::Quiet);
1161 CHECK(!re.ok());
1162 CHECK(!RE2::PartialMatch("a\\b", re));
1163 }
1164
1165 // Test that using an enormous regexp doesn't crash
1166 {
1167 RE2 re("(((.{100}){100}){100}){100}", RE2::Quiet);
1168 CHECK(!re.ok());
1169 CHECK(!RE2::PartialMatch("aaa", re));
1170 }
1171
1172 // Test that a crazy regexp still compiles and runs.
1173 {
1174 RE2 re(".{512}x", RE2::Quiet);
1175 CHECK(re.ok());
1176 string s;
1177 s.append(515, 'c');
1178 s.append("x");
1179 CHECK(RE2::PartialMatch(s, re));
1180 }
1181 }
1182
1183 TEST(RE2, Recursion) {
1184 // Test that recursion is stopped.
1185 // This test is PCRE-legacy -- there's no recursion in RE2.
1186 int bytes = 15 * 1024; // enough to crash PCRE
1187 TestRecursion(bytes, ".");
1188 TestRecursion(bytes, "a");
1189 TestRecursion(bytes, "a.");
1190 TestRecursion(bytes, "ab.");
1191 TestRecursion(bytes, "abc.");
1192 }
1193
1194 TEST(RE2, BigCountedRepetition) {
1195 // Test that counted repetition works, given tons of memory.
1196 RE2::Options opt;
1197 opt.set_max_mem(256<<20);
1198
1199 RE2 re(".{512}x", opt);
1200 CHECK(re.ok());
1201 string s;
1202 s.append(515, 'c');
1203 s.append("x");
1204 CHECK(RE2::PartialMatch(s, re));
1205 }
1206
1207 TEST(RE2, DeepRecursion) {
1208 // Test for deep stack recursion. This would fail with a
1209 // segmentation violation due to stack overflow before pcre was
1210 // patched.
1211 // Again, a PCRE legacy test. RE2 doesn't recurse.
1212 string comment("x*");
1213 string a(131072, 'a');
1214 comment += a;
1215 comment += "*x";
1216 RE2 re("((?:\\s|xx.*\n|x[*](?:\n|.)*?[*]x)*)");
1217 CHECK(RE2::FullMatch(comment, re));
1218 }
1219
1220 // Suggested by Josh Hyman. Failed when SearchOnePass was
1221 // not implementing case-folding.
1222 TEST(CaseInsensitive, MatchAndConsume) {
1223 string result;
1224 string text = "A fish named *Wanda*";
1225 StringPiece sp(text);
1226
1227 EXPECT_TRUE(RE2::PartialMatch(sp, "(?i)([wand]{5})", &result));
1228 EXPECT_TRUE(RE2::FindAndConsume(&sp, "(?i)([wand]{5})", &result));
1229 }
1230
1231 // RE2 should permit implicit conversions from string, StringPiece, const char*,
1232 // and C string literals.
1233 TEST(RE2, ImplicitConversions) {
1234 string re_string(".");
1235 StringPiece re_stringpiece(".");
1236 const char* re_cstring = ".";
1237 EXPECT_TRUE(RE2::PartialMatch("e", re_string));
1238 EXPECT_TRUE(RE2::PartialMatch("e", re_stringpiece));
1239 EXPECT_TRUE(RE2::PartialMatch("e", re_cstring));
1240 EXPECT_TRUE(RE2::PartialMatch("e", "."));
1241 }
1242
1243 // Bugs introduced by 8622304
1244 TEST(RE2, CL8622304) {
1245 // reported by ingow
1246 string dir;
1247 EXPECT_TRUE(RE2::FullMatch("D", "([^\\\\])")); // ok
1248 EXPECT_TRUE(RE2::FullMatch("D", "([^\\\\])", &dir)); // fails
1249
1250 // reported by jacobsa
1251 string key, val;
1252 EXPECT_TRUE(RE2::PartialMatch("bar:1,0x2F,030,4,5;baz:true;fooby:false,true",
1253 "(\\w+)(?::((?:[^;\\\\]|\\\\.)*))?;?",
1254 &key,
1255 &val));
1256 EXPECT_EQ(key, "bar");
1257 EXPECT_EQ(val, "1,0x2F,030,4,5");
1258 }
1259
1260
1261 // Check that RE2 returns correct regexp pieces on error.
1262 // In particular, make sure it returns whole runes
1263 // and that it always reports invalid UTF-8.
1264 // Also check that Perl error flag piece is big enough.
1265 static struct ErrorTest {
1266 const char *regexp;
1267 const char *error;
1268 } error_tests[] = {
1269 { "ab\\αcd", "\\α" },
1270 { "ef\\x☺01", "\\x☺0" },
1271 { "gh\\x1☺01", "\\x1☺" },
1272 { "ij\\x1", "\\x1" },
1273 { "kl\\x", "\\x" },
1274 { "uv\\x{0000☺}", "\\x{0000☺" },
1275 { "wx\\p{ABC", "\\p{ABC" },
1276 { "yz(?smiUX:abc)", "(?smiUX" }, // used to return (?s but the error is X
1277 { "aa(?sm☺i", "(?sm☺" },
1278 { "bb[abc", "[abc" },
1279
1280 { "mn\\x1\377", "" }, // no argument string returned for invalid UTF-8
1281 { "op\377qr", "" },
1282 { "st\\x{00000\377", "" },
1283 { "zz\\p{\377}", "" },
1284 { "zz\\x{00\377}", "" },
1285 { "zz(?P<name\377>abc)", "" },
1286 };
1287 TEST(RE2, ErrorArgs) {
1288 for (int i = 0; i < arraysize(error_tests); i++) {
1289 RE2 re(error_tests[i].regexp, RE2::Quiet);
1290 EXPECT_FALSE(re.ok());
1291 EXPECT_EQ(re.error_arg(), error_tests[i].error) << re.error();
1292 }
1293 }
1294
1295 // Check that "never match \n" mode never matches \n.
1296 static struct NeverTest {
1297 const char* regexp;
1298 const char* text;
1299 const char* match;
1300 } never_tests[] = {
1301 { "(.*)", "abc\ndef\nghi\n", "abc" },
1302 { "(?s)(abc.*def)", "abc\ndef\n", NULL },
1303 { "(abc(.|\n)*def)", "abc\ndef\n", NULL },
1304 { "(abc[^x]*def)", "abc\ndef\n", NULL },
1305 { "(abc[^x]*def)", "abczzzdef\ndef\n", "abczzzdef" },
1306 };
1307 TEST(RE2, NeverNewline) {
1308 RE2::Options opt;
1309 opt.set_never_nl(true);
1310 for (int i = 0; i < arraysize(never_tests); i++) {
1311 const NeverTest& t = never_tests[i];
1312 RE2 re(t.regexp, opt);
1313 if (t.match == NULL) {
1314 EXPECT_FALSE(re.PartialMatch(t.text, re));
1315 } else {
1316 StringPiece m;
1317 EXPECT_TRUE(re.PartialMatch(t.text, re, &m));
1318 EXPECT_EQ(m, t.match);
1319 }
1320 }
1321 }
1322
1323 // Check that dot_nl option works.
1324 TEST(RE2, DotNL) {
1325 RE2::Options opt;
1326 opt.set_dot_nl(true);
1327 EXPECT_TRUE(RE2::PartialMatch("\n", RE2(".", opt)));
1328 EXPECT_FALSE(RE2::PartialMatch("\n", RE2("(?-s).", opt)));
1329 opt.set_never_nl(true);
1330 EXPECT_FALSE(RE2::PartialMatch("\n", RE2(".", opt)));
1331 }
1332
1333 // Check that there are no capturing groups in "never capture" mode.
1334 TEST(RE2, NeverCapture) {
1335 RE2::Options opt;
1336 opt.set_never_capture(true);
1337 RE2 re("(r)(e)", opt);
1338 EXPECT_EQ(0, re.NumberOfCapturingGroups());
1339 }
1340
1341 // Bitstate bug was looking at submatch[0] even if nsubmatch == 0.
1342 // Triggered by a failed DFA search falling back to Bitstate when
1343 // using Match with a NULL submatch set. Bitstate tried to read
1344 // the submatch[0] entry even if nsubmatch was 0.
1345 TEST(RE2, BitstateCaptureBug) {
1346 RE2::Options opt;
1347 opt.set_max_mem(20000);
1348 RE2 re("(_________$)", opt);
1349 StringPiece s = "xxxxxxxxxxxxxxxxxxxxxxxxxx_________x";
1350 EXPECT_FALSE(re.Match(s, 0, s.size(), RE2::UNANCHORED, NULL, 0));
1351 }
1352
1353 // C++ version of bug 609710.
1354 TEST(RE2, UnicodeClasses) {
1355 const string str = "ABCDEFGHI譚永鋒";
1356 string a, b, c;
1357
1358 EXPECT_TRUE(RE2::FullMatch("A", "\\p{L}"));
1359 EXPECT_TRUE(RE2::FullMatch("A", "\\p{Lu}"));
1360 EXPECT_FALSE(RE2::FullMatch("A", "\\p{Ll}"));
1361 EXPECT_FALSE(RE2::FullMatch("A", "\\P{L}"));
1362 EXPECT_FALSE(RE2::FullMatch("A", "\\P{Lu}"));
1363 EXPECT_TRUE(RE2::FullMatch("A", "\\P{Ll}"));
1364
1365 EXPECT_TRUE(RE2::FullMatch("譚", "\\p{L}"));
1366 EXPECT_FALSE(RE2::FullMatch("譚", "\\p{Lu}"));
1367 EXPECT_FALSE(RE2::FullMatch("譚", "\\p{Ll}"));
1368 EXPECT_FALSE(RE2::FullMatch("譚", "\\P{L}"));
1369 EXPECT_TRUE(RE2::FullMatch("譚", "\\P{Lu}"));
1370 EXPECT_TRUE(RE2::FullMatch("譚", "\\P{Ll}"));
1371
1372 EXPECT_TRUE(RE2::FullMatch("永", "\\p{L}"));
1373 EXPECT_FALSE(RE2::FullMatch("永", "\\p{Lu}"));
1374 EXPECT_FALSE(RE2::FullMatch("永", "\\p{Ll}"));
1375 EXPECT_FALSE(RE2::FullMatch("永", "\\P{L}"));
1376 EXPECT_TRUE(RE2::FullMatch("永", "\\P{Lu}"));
1377 EXPECT_TRUE(RE2::FullMatch("永", "\\P{Ll}"));
1378
1379 EXPECT_TRUE(RE2::FullMatch("鋒", "\\p{L}"));
1380 EXPECT_FALSE(RE2::FullMatch("鋒", "\\p{Lu}"));
1381 EXPECT_FALSE(RE2::FullMatch("鋒", "\\p{Ll}"));
1382 EXPECT_FALSE(RE2::FullMatch("鋒", "\\P{L}"));
1383 EXPECT_TRUE(RE2::FullMatch("鋒", "\\P{Lu}"));
1384 EXPECT_TRUE(RE2::FullMatch("鋒", "\\P{Ll}"));
1385
1386 EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?(.).*?(.)", &a, &b, &c));
1387 EXPECT_EQ("A", a);
1388 EXPECT_EQ("B", b);
1389 EXPECT_EQ("C", c);
1390
1391 EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?([\\p{L}]).*?(.)", &a, &b, &c));
1392 EXPECT_EQ("A", a);
1393 EXPECT_EQ("B", b);
1394 EXPECT_EQ("C", c);
1395
1396 EXPECT_FALSE(RE2::PartialMatch(str, "\\P{L}"));
1397
1398 EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?([\\p{Lu}]).*?(.)", &a, &b, &c));
1399 EXPECT_EQ("A", a);
1400 EXPECT_EQ("B", b);
1401 EXPECT_EQ("C", c);
1402
1403 EXPECT_FALSE(RE2::PartialMatch(str, "[^\\p{Lu}\\p{Lo}]"));
1404
1405 EXPECT_TRUE(RE2::PartialMatch(str, ".*(.).*?([\\p{Lu}\\p{Lo}]).*?(.)", &a, &b, &c));
1406 EXPECT_EQ("譚", a);
1407 EXPECT_EQ("永", b);
1408 EXPECT_EQ("鋒", c);
1409 }
1410
1411 // Bug reported by saito. 2009/02/17
1412 TEST(RE2, NullVsEmptyString) {
1413 RE2 re2(".*");
1414 StringPiece v1("");
1415 EXPECT_TRUE(RE2::FullMatch(v1, re2));
1416
1417 StringPiece v2;
1418 EXPECT_TRUE(RE2::FullMatch(v2, re2));
1419 }
1420
1421 // Issue 1816809
1422 TEST(RE2, Bug1816809) {
1423 RE2 re("(((((llx((-3)|(4)))(;(llx((-3)|(4))))*))))");
1424 StringPiece piece("llx-3;llx4");
1425 string x;
1426 EXPECT_TRUE(RE2::Consume(&piece, re, &x));
1427 }
1428
1429 // Issue 3061120
1430 TEST(RE2, Bug3061120) {
1431 RE2 re("(?i)\\W");
1432 EXPECT_FALSE(RE2::PartialMatch("x", re)); // always worked
1433 EXPECT_FALSE(RE2::PartialMatch("k", re)); // broke because of kelvin
1434 EXPECT_FALSE(RE2::PartialMatch("s", re)); // broke because of latin long s
1435 }
1436
1437 TEST(RE2, CapturingGroupNames) {
1438 // Opening parentheses annotated with group IDs:
1439 // 12 3 45 6 7
1440 RE2 re("((abc)(?P<G2>)|((e+)(?P<G2>.*)(?P<G1>u+)))");
1441 EXPECT_TRUE(re.ok());
1442 const map<int, string>& have = re.CapturingGroupNames();
1443 map<int, string> want;
1444 want[3] = "G2";
1445 want[6] = "G2";
1446 want[7] = "G1";
1447 EXPECT_EQ(want, have);
1448 }
1449
1450 TEST(RE2, RegexpToStringLossOfAnchor) {
1451 EXPECT_EQ(RE2("^[a-c]at", RE2::POSIX).Regexp()->ToString(), "^[a-c]at");
1452 EXPECT_EQ(RE2("^[a-c]at").Regexp()->ToString(), "(?-m:^)[a-c]at");
1453 EXPECT_EQ(RE2("ca[t-z]$", RE2::POSIX).Regexp()->ToString(), "ca[t-z]$");
1454 EXPECT_EQ(RE2("ca[t-z]$").Regexp()->ToString(), "ca[t-z](?-m:$)");
1455 }
1456
1457 // Issue 10131674
1458 TEST(RE2, Bug10131674) {
1459 // Some of these escapes describe values that do not fit in a byte.
1460 RE2 re("\\140\\440\\174\\271\\150\\656\\106\\201\\004\\332", RE2::Latin1);
1461 EXPECT_FALSE(re.ok());
1462 EXPECT_FALSE(RE2::FullMatch("hello world", re));
1463 }
1464
1465 TEST(RE2, Bug18391750) {
1466 // Stray write past end of match_ in nfa.cc, caught by fuzzing + address sanit izer.
1467 const char t[] = {
1468 (char)0x28, (char)0x28, (char)0xfc, (char)0xfc, (char)0x08, (char)0x08,
1469 (char)0x26, (char)0x26, (char)0x28, (char)0xc2, (char)0x9b, (char)0xc5,
1470 (char)0xc5, (char)0xd4, (char)0x8f, (char)0x8f, (char)0x69, (char)0x69,
1471 (char)0xe7, (char)0x29, (char)0x7b, (char)0x37, (char)0x31, (char)0x31,
1472 (char)0x7d, (char)0xae, (char)0x7c, (char)0x7c, (char)0xf3, (char)0x29,
1473 (char)0xae, (char)0xae, (char)0x2e, (char)0x2a, (char)0x29, (char)0x00,
1474 };
1475 RE2::Options opt;
1476 opt.set_encoding(RE2::Options::EncodingLatin1);
1477 opt.set_longest_match(true);
1478 opt.set_dot_nl(true);
1479 opt.set_case_sensitive(false);
1480 RE2 re(t, opt);
1481 CHECK(re.ok());
1482 RE2::PartialMatch(t, re);
1483 }
1484
1485 TEST(RE2, Bug18458852) {
1486 // Bug in parser accepting invalid (too large) rune,
1487 // causing compiler to fail in DCHECK in UTF-8
1488 // character class code.
1489 const char b[] = {
1490 (char)0x28, (char)0x05, (char)0x05, (char)0x41, (char)0x41, (char)0x28,
1491 (char)0x24, (char)0x5b, (char)0x5e, (char)0xf5, (char)0x87, (char)0x87,
1492 (char)0x90, (char)0x29, (char)0x5d, (char)0x29, (char)0x29, (char)0x00,
1493 };
1494 RE2 re(b);
1495 CHECK(!re.ok());
1496 }
1497
1498 TEST(RE2, Bug18523943) {
1499 // Bug in bitstate: case kFailInst was merged into the default with LOG(DFATAL ).
1500
1501 RE2::Options opt;
1502 const char a[] = {
1503 (char)0x29, (char)0x29, (char)0x24, (char)0x00,
1504 };
1505 const char b[] = {
1506 (char)0x28, (char)0x0a, (char)0x2a, (char)0x2a, (char)0x29, (char)0x00,
1507 };
1508 opt.set_log_errors(false);
1509 opt.set_encoding(RE2::Options::EncodingLatin1);
1510 opt.set_posix_syntax(true);
1511 opt.set_longest_match(true);
1512 opt.set_literal(false);
1513 opt.set_never_nl(true);
1514
1515 RE2 re((const char*)b, opt);
1516 CHECK(re.ok());
1517 string s1;
1518 CHECK(!RE2::PartialMatch((const char*)a, re, &s1));
1519 }
1520
1521 TEST(RE2, Bug21371806) {
1522 // Bug in parser accepting Unicode groups in Latin-1 mode,
1523 // causing compiler to fail in DCHECK in prog.cc.
1524
1525 RE2::Options opt;
1526 opt.set_encoding(RE2::Options::EncodingLatin1);
1527
1528 RE2 re("g\\p{Zl}]", opt);
1529 CHECK(re.ok());
1530 }
1531
1532 } // namespace re2
OLDNEW
« no previous file with comments | « third_party/re2/re2/testing/re2_arg_test.cc ('k') | third_party/re2/re2/testing/regexp_benchmark.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698