Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(276)

Side by Side Diff: third_party/re2/re2/testing/tester.cc

Issue 1530113002: Revert of Update re2 (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/re2/re2/testing/tester.h ('k') | third_party/re2/re2/testing/unicode_test.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2008 The RE2 Authors. All Rights Reserved. 1 // Copyright 2008 The RE2 Authors. All Rights Reserved.
2 // Use of this source code is governed by a BSD-style 2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file. 3 // license that can be found in the LICENSE file.
4 4
5 // Regular expression engine tester -- test all the implementations against each other. 5 // Regular expression engine tester -- test all the implementations against each other.
6 6
7 #include "util/util.h" 7 #include "util/util.h"
8 #include "util/flags.h" 8 #include "util/flags.h"
9 #include "re2/testing/tester.h" 9 #include "re2/testing/tester.h"
10 #include "re2/prog.h" 10 #include "re2/prog.h"
(...skipping 228 matching lines...) Expand 10 before | Expand all | Expand 10 after
239 } 239 }
240 240
241 // Compile regexp to RE. 241 // Compile regexp to RE.
242 // PCRE as exposed by the RE interface isn't always usable. 242 // PCRE as exposed by the RE interface isn't always usable.
243 // 1. It disagrees about handling of empty-string reptitions 243 // 1. It disagrees about handling of empty-string reptitions
244 // like matching (a*)* against "b". PCRE treats the (a*) as 244 // like matching (a*)* against "b". PCRE treats the (a*) as
245 // occurring once, while we treat it as occurring not at all. 245 // occurring once, while we treat it as occurring not at all.
246 // 2. It treats $ as this weird thing meaning end of string 246 // 2. It treats $ as this weird thing meaning end of string
247 // or before the \n at the end of the string. 247 // or before the \n at the end of the string.
248 // 3. It doesn't implement POSIX leftmost-longest matching. 248 // 3. It doesn't implement POSIX leftmost-longest matching.
249 // 4. It lets \s match vertical tab.
250 // MimicsPCRE() detects 1 and 2. 249 // MimicsPCRE() detects 1 and 2.
251 if ((Engines() & (1<<kEnginePCRE)) && regexp_->MimicsPCRE() && 250 if ((Engines() & (1<<kEnginePCRE)) && regexp_->MimicsPCRE() &&
252 kind_ != Prog::kLongestMatch) { 251 kind_ != Prog::kLongestMatch) {
253 PCRE_Options o; 252 PCRE_Options o;
254 o.set_option(PCRE::UTF8); 253 o.set_option(PCRE::UTF8);
255 if (flags & Regexp::Latin1) 254 if (flags & Regexp::Latin1)
256 o.set_option(PCRE::None); 255 o.set_option(PCRE::None);
257 // PCRE has interface bug keeping us from finding $0, so 256 // PCRE has interface bug keeping us from finding $0, so
258 // add one more layer of parens. 257 // add one more layer of parens.
259 re_ = new PCRE("("+re+")", o); 258 re_ = new PCRE("("+re+")", o);
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after
337 result->matched = 336 result->matched =
338 prog_->SearchDFA(text, context, anchor, kind_, result->submatch, 337 prog_->SearchDFA(text, context, anchor, kind_, result->submatch,
339 &result->skipped, NULL); 338 &result->skipped, NULL);
340 // If anchored, no need for second run, 339 // If anchored, no need for second run,
341 // but do it anyway to find more bugs. 340 // but do it anyway to find more bugs.
342 if (result->matched) { 341 if (result->matched) {
343 if (!rprog_->SearchDFA(result->submatch[0], context, 342 if (!rprog_->SearchDFA(result->submatch[0], context,
344 Prog::kAnchored, Prog::kLongestMatch, 343 Prog::kAnchored, Prog::kLongestMatch,
345 result->submatch, 344 result->submatch,
346 &result->skipped, NULL)) { 345 &result->skipped, NULL)) {
347 LOG(ERROR) << "Reverse DFA inconsistency: " 346 LOG(ERROR) << "Reverse DFA inconsistency: " << CEscape(regexp_str_)
348 << CEscape(regexp_str_)
349 << " on " << CEscape(text); 347 << " on " << CEscape(text);
350 result->matched = false; 348 result->matched = false;
351 } 349 }
352 } 350 }
353 result->have_submatch0 = true; 351 result->have_submatch0 = true;
354 break; 352 break;
355 353
356 case kEngineOnePass: 354 case kEngineOnePass:
357 if (prog_ == NULL || 355 if (prog_ == NULL ||
358 anchor == Prog::kUnanchored || 356 anchor == Prog::kUnanchored ||
(...skipping 26 matching lines...) Expand all
385 } 383 }
386 384
387 RE2::Anchor re_anchor; 385 RE2::Anchor re_anchor;
388 if (anchor == Prog::kAnchored) 386 if (anchor == Prog::kAnchored)
389 re_anchor = RE2::ANCHOR_START; 387 re_anchor = RE2::ANCHOR_START;
390 else 388 else
391 re_anchor = RE2::UNANCHORED; 389 re_anchor = RE2::UNANCHORED;
392 if (kind_ == Prog::kFullMatch) 390 if (kind_ == Prog::kFullMatch)
393 re_anchor = RE2::ANCHOR_BOTH; 391 re_anchor = RE2::ANCHOR_BOTH;
394 392
395 result->matched = re2_->Match( 393 result->matched = re2_->Match(context,
396 context, 394 text.begin() - context.begin(),
397 static_cast<int>(text.begin() - context.begin()), 395 text.end() - context.begin(),
398 static_cast<int>(text.end() - context.begin()), 396 re_anchor, result->submatch, nsubmatch);
399 re_anchor,
400 result->submatch,
401 nsubmatch);
402 result->have_submatch = nsubmatch > 0; 397 result->have_submatch = nsubmatch > 0;
403 break; 398 break;
404 } 399 }
405 400
406 case kEnginePCRE: { 401 case kEnginePCRE: {
407 if (!re_ || text.begin() != context.begin() || 402 if (!re_ || text.begin() != context.begin() ||
408 text.end() != context.end()) { 403 text.end() != context.end()) {
409 result->skipped = true; 404 result->skipped = true;
410 break; 405 break;
411 } 406 }
412
413 // PCRE 8.34 or so started allowing vertical tab to match \s,
414 // following a change made in Perl 5.18. RE2 does not.
415 if ((regexp_str_.contains("\\s") || regexp_str_.contains("\\S")) &&
416 text.contains("\v")) {
417 result->skipped = true;
418 break;
419 }
420 407
421 const PCRE::Arg **argptr = new const PCRE::Arg*[nsubmatch]; 408 const PCRE::Arg **argptr = new const PCRE::Arg*[nsubmatch];
422 PCRE::Arg *a = new PCRE::Arg[nsubmatch]; 409 PCRE::Arg *a = new PCRE::Arg[nsubmatch];
423 for (int i = 0; i < nsubmatch; i++) { 410 for (int i = 0; i < nsubmatch; i++) {
424 a[i] = PCRE::Arg(&result->submatch[i]); 411 a[i] = PCRE::Arg(&result->submatch[i]);
425 argptr[i] = &a[i]; 412 argptr[i] = &a[i];
426 } 413 }
427 int consumed; 414 int consumed;
428 PCRE::Anchor pcre_anchor; 415 PCRE::Anchor pcre_anchor;
429 if (anchor == Prog::kAnchored) 416 if (anchor == Prog::kAnchored)
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after
511 498
512 Result r; 499 Result r;
513 RunSearch(i, text, context, anchor, &r); 500 RunSearch(i, text, context, anchor, &r);
514 if (ResultOkay(r, correct)) { 501 if (ResultOkay(r, correct)) {
515 if (FLAGS_log_okay) 502 if (FLAGS_log_okay)
516 LogMatch(r.skipped ? "Skipped: " : "Okay: ", i, text, context, anchor); 503 LogMatch(r.skipped ? "Skipped: " : "Okay: ", i, text, context, anchor);
517 continue; 504 continue;
518 } 505 }
519 506
520 // We disagree with PCRE on the meaning of some Unicode matches. 507 // We disagree with PCRE on the meaning of some Unicode matches.
521 // In particular, we treat non-ASCII UTF-8 as non-word characters. 508 // In particular, we treat all non-ASCII UTF-8 as word characters.
522 // We also treat "empty" character sets like [^\w\W] as being 509 // We also treat "empty" character sets like [^\w\W] as being
523 // impossible to match, while PCRE apparently excludes some code 510 // impossible to match, while PCRE apparently excludes some code
524 // points (e.g., 0x0080) from both \w and \W. 511 // points (e.g., 0x0080) from both \w and \W.
525 if (i == kEnginePCRE && NonASCII(text)) 512 if (i == kEnginePCRE && NonASCII(text))
526 continue; 513 continue;
527 514
528 if (!r.untrusted) 515 if (!r.untrusted)
529 all_okay = false; 516 all_okay = false;
530 517
531 LogMatch(r.untrusted ? "(Untrusted) Mismatch: " : "Mismatch: ", i, text, 518 LogMatch(r.untrusted ? "(Untrusted) Mismatch: " : "Mismatch: ", i, text,
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after
598 for (int j = 0; j < arraysize(parse_modes); j++) { 585 for (int j = 0; j < arraysize(parse_modes); j++) {
599 TestInstance* t = new TestInstance(regexp, kinds[i], 586 TestInstance* t = new TestInstance(regexp, kinds[i],
600 parse_modes[j].parse_flags); 587 parse_modes[j].parse_flags);
601 error_ |= t->error(); 588 error_ |= t->error();
602 v_.push_back(t); 589 v_.push_back(t);
603 } 590 }
604 } 591 }
605 } 592 }
606 593
607 Tester::~Tester() { 594 Tester::~Tester() {
608 for (size_t i = 0; i < v_.size(); i++) 595 for (int i = 0; i < v_.size(); i++)
609 delete v_[i]; 596 delete v_[i];
610 } 597 }
611 598
612 bool Tester::TestCase(const StringPiece& text, const StringPiece& context, 599 bool Tester::TestCase(const StringPiece& text, const StringPiece& context,
613 Prog::Anchor anchor) { 600 Prog::Anchor anchor) {
614 bool okay = true; 601 bool okay = true;
615 for (size_t i = 0; i < v_.size(); i++) 602 for (int i = 0; i < v_.size(); i++)
616 okay &= (!v_[i]->error() && v_[i]->RunCase(text, context, anchor)); 603 okay &= (!v_[i]->error() && v_[i]->RunCase(text, context, anchor));
617 return okay; 604 return okay;
618 } 605 }
619 606
620 static Prog::Anchor anchors[] = { 607 static Prog::Anchor anchors[] = {
621 Prog::kAnchored, 608 Prog::kAnchored,
622 Prog::kUnanchored 609 Prog::kUnanchored
623 }; 610 };
624 611
625 bool Tester::TestInput(const StringPiece& text) { 612 bool Tester::TestInput(const StringPiece& text) {
(...skipping 18 matching lines...) Expand all
644 return okay; 631 return okay;
645 } 632 }
646 633
647 bool TestRegexpOnText(const StringPiece& regexp, 634 bool TestRegexpOnText(const StringPiece& regexp,
648 const StringPiece& text) { 635 const StringPiece& text) {
649 Tester t(regexp); 636 Tester t(regexp);
650 return t.TestInput(text); 637 return t.TestInput(text);
651 } 638 }
652 639
653 } // namespace re2 640 } // namespace re2
OLDNEW
« no previous file with comments | « third_party/re2/re2/testing/tester.h ('k') | third_party/re2/re2/testing/unicode_test.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698