third_party/re2/re2/testing/tester.cc - Issue 1530113002: Revert of Update re2

Side by Side Diff: third_party/re2/re2/testing/tester.cc

Issue 1530113002: Revert of Update re2 (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 5 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2008 The RE2 Authors. All Rights Reserved.	1 // Copyright 2008 The RE2 Authors. All Rights Reserved.

2 // Use of this source code is governed by a BSD-style	2 // Use of this source code is governed by a BSD-style

3 // license that can be found in the LICENSE file.	3 // license that can be found in the LICENSE file.

4	4

5 // Regular expression engine tester -- test all the implementations against each other.	5 // Regular expression engine tester -- test all the implementations against each other.

6	6

7 #include "util/util.h"	7 #include "util/util.h"

8 #include "util/flags.h"	8 #include "util/flags.h"

9 #include "re2/testing/tester.h"	9 #include "re2/testing/tester.h"

10 #include "re2/prog.h"	10 #include "re2/prog.h"

(...skipping 228 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
239 }	239 }

240	240

241 // Compile regexp to RE.	241 // Compile regexp to RE.

242 // PCRE as exposed by the RE interface isn't always usable.	242 // PCRE as exposed by the RE interface isn't always usable.

243 // 1. It disagrees about handling of empty-string reptitions	243 // 1. It disagrees about handling of empty-string reptitions

244 // like matching (a) against "b". PCRE treats the (a*) as	244 // like matching (a) against "b". PCRE treats the (a*) as

245 // occurring once, while we treat it as occurring not at all.	245 // occurring once, while we treat it as occurring not at all.

246 // 2. It treats $ as this weird thing meaning end of string	246 // 2. It treats $ as this weird thing meaning end of string

247 // or before the \n at the end of the string.	247 // or before the \n at the end of the string.

248 // 3. It doesn't implement POSIX leftmost-longest matching.	248 // 3. It doesn't implement POSIX leftmost-longest matching.

249 // 4. It lets \s match vertical tab.

250 // MimicsPCRE() detects 1 and 2.	249 // MimicsPCRE() detects 1 and 2.

251 if ((Engines() & (1<<kEnginePCRE)) && regexp_->MimicsPCRE() &&	250 if ((Engines() & (1<<kEnginePCRE)) && regexp_->MimicsPCRE() &&

252 kind_ != Prog::kLongestMatch) {	251 kind_ != Prog::kLongestMatch) {

253 PCRE_Options o;	252 PCRE_Options o;

254 o.set_option(PCRE::UTF8);	253 o.set_option(PCRE::UTF8);

255 if (flags & Regexp::Latin1)	254 if (flags & Regexp::Latin1)

256 o.set_option(PCRE::None);	255 o.set_option(PCRE::None);

257 // PCRE has interface bug keeping us from finding $0, so	256 // PCRE has interface bug keeping us from finding $0, so

258 // add one more layer of parens.	257 // add one more layer of parens.

259 re_ = new PCRE("("+re+")", o);	258 re_ = new PCRE("("+re+")", o);

(...skipping 77 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
337 result->matched =	336 result->matched =

338 prog_->SearchDFA(text, context, anchor, kind_, result->submatch,	337 prog_->SearchDFA(text, context, anchor, kind_, result->submatch,

339 &result->skipped, NULL);	338 &result->skipped, NULL);

340 // If anchored, no need for second run,	339 // If anchored, no need for second run,

341 // but do it anyway to find more bugs.	340 // but do it anyway to find more bugs.

342 if (result->matched) {	341 if (result->matched) {

343 if (!rprog_->SearchDFA(result->submatch[0], context,	342 if (!rprog_->SearchDFA(result->submatch[0], context,

344 Prog::kAnchored, Prog::kLongestMatch,	343 Prog::kAnchored, Prog::kLongestMatch,

345 result->submatch,	344 result->submatch,

346 &result->skipped, NULL)) {	345 &result->skipped, NULL)) {

347 LOG(ERROR) << "Reverse DFA inconsistency: "	346 LOG(ERROR) << "Reverse DFA inconsistency: " << CEscape(regexp_str_)

348 << CEscape(regexp_str_)

349 << " on " << CEscape(text);	347 << " on " << CEscape(text);

350 result->matched = false;	348 result->matched = false;

351 }	349 }

352 }	350 }

353 result->have_submatch0 = true;	351 result->have_submatch0 = true;

354 break;	352 break;

355	353

356 case kEngineOnePass:	354 case kEngineOnePass:

357 if (prog_ == NULL \|\|	355 if (prog_ == NULL \|\|

358 anchor == Prog::kUnanchored \|\|	356 anchor == Prog::kUnanchored \|\|

(...skipping 26 matching lines...) Expand all Loading...
385 }	383 }

386	384

387 RE2::Anchor re_anchor;	385 RE2::Anchor re_anchor;

388 if (anchor == Prog::kAnchored)	386 if (anchor == Prog::kAnchored)

389 re_anchor = RE2::ANCHOR_START;	387 re_anchor = RE2::ANCHOR_START;

390 else	388 else

391 re_anchor = RE2::UNANCHORED;	389 re_anchor = RE2::UNANCHORED;

392 if (kind_ == Prog::kFullMatch)	390 if (kind_ == Prog::kFullMatch)

393 re_anchor = RE2::ANCHOR_BOTH;	391 re_anchor = RE2::ANCHOR_BOTH;

394	392

395 result->matched = re2_->Match(	393 result->matched = re2_->Match(context,

396 context,	394 text.begin() - context.begin(),

397 static_cast<int>(text.begin() - context.begin()),	395 text.end() - context.begin(),

398 static_cast<int>(text.end() - context.begin()),	396 re_anchor, result->submatch, nsubmatch);

399 re_anchor,

400 result->submatch,

401 nsubmatch);

402 result->have_submatch = nsubmatch > 0;	397 result->have_submatch = nsubmatch > 0;

403 break;	398 break;

404 }	399 }

405	400

406 case kEnginePCRE: {	401 case kEnginePCRE: {

407 if (!re_ \|\| text.begin() != context.begin() \|\|	402 if (!re_ \|\| text.begin() != context.begin() \|\|

408 text.end() != context.end()) {	403 text.end() != context.end()) {

409 result->skipped = true;	404 result->skipped = true;

410 break;	405 break;

411 }	406 }

412

413 // PCRE 8.34 or so started allowing vertical tab to match \s,

414 // following a change made in Perl 5.18. RE2 does not.

415 if ((regexp_str_.contains("\\s") \|\| regexp_str_.contains("\\S")) &&

416 text.contains("\v")) {

417 result->skipped = true;

418 break;

419 }

420	407

421 const PCRE::Arg *argptr = new const PCRE::Arg[nsubmatch];	408 const PCRE::Arg *argptr = new const PCRE::Arg[nsubmatch];

422 PCRE::Arg *a = new PCRE::Arg[nsubmatch];	409 PCRE::Arg *a = new PCRE::Arg[nsubmatch];

423 for (int i = 0; i < nsubmatch; i++) {	410 for (int i = 0; i < nsubmatch; i++) {

424 a[i] = PCRE::Arg(&result->submatch[i]);	411 a[i] = PCRE::Arg(&result->submatch[i]);

425 argptr[i] = &a[i];	412 argptr[i] = &a[i];

426 }	413 }

427 int consumed;	414 int consumed;

428 PCRE::Anchor pcre_anchor;	415 PCRE::Anchor pcre_anchor;

429 if (anchor == Prog::kAnchored)	416 if (anchor == Prog::kAnchored)

(...skipping 81 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
511	498

512 Result r;	499 Result r;

513 RunSearch(i, text, context, anchor, &r);	500 RunSearch(i, text, context, anchor, &r);

514 if (ResultOkay(r, correct)) {	501 if (ResultOkay(r, correct)) {

515 if (FLAGS_log_okay)	502 if (FLAGS_log_okay)

516 LogMatch(r.skipped ? "Skipped: " : "Okay: ", i, text, context, anchor);	503 LogMatch(r.skipped ? "Skipped: " : "Okay: ", i, text, context, anchor);

517 continue;	504 continue;

518 }	505 }

519	506

520 // We disagree with PCRE on the meaning of some Unicode matches.	507 // We disagree with PCRE on the meaning of some Unicode matches.

521 // In particular, we treat non-ASCII UTF-8 as non-word characters.	508 // In particular, we treat all non-ASCII UTF-8 as word characters.

522 // We also treat "empty" character sets like [^\w\W] as being	509 // We also treat "empty" character sets like [^\w\W] as being

523 // impossible to match, while PCRE apparently excludes some code	510 // impossible to match, while PCRE apparently excludes some code

524 // points (e.g., 0x0080) from both \w and \W.	511 // points (e.g., 0x0080) from both \w and \W.

525 if (i == kEnginePCRE && NonASCII(text))	512 if (i == kEnginePCRE && NonASCII(text))

526 continue;	513 continue;

527	514

528 if (!r.untrusted)	515 if (!r.untrusted)

529 all_okay = false;	516 all_okay = false;

530	517

531 LogMatch(r.untrusted ? "(Untrusted) Mismatch: " : "Mismatch: ", i, text,	518 LogMatch(r.untrusted ? "(Untrusted) Mismatch: " : "Mismatch: ", i, text,

(...skipping 66 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
598 for (int j = 0; j < arraysize(parse_modes); j++) {	585 for (int j = 0; j < arraysize(parse_modes); j++) {

599 TestInstance* t = new TestInstance(regexp, kinds[i],	586 TestInstance* t = new TestInstance(regexp, kinds[i],

600 parse_modes[j].parse_flags);	587 parse_modes[j].parse_flags);

601 error_ \|= t->error();	588 error_ \|= t->error();

602 v_.push_back(t);	589 v_.push_back(t);

603 }	590 }

604 }	591 }

605 }	592 }

606	593

607 Tester::~Tester() {	594 Tester::~Tester() {

608 for (size_t i = 0; i < v_.size(); i++)	595 for (int i = 0; i < v_.size(); i++)

609 delete v_[i];	596 delete v_[i];

610 }	597 }

611	598

612 bool Tester::TestCase(const StringPiece& text, const StringPiece& context,	599 bool Tester::TestCase(const StringPiece& text, const StringPiece& context,

613 Prog::Anchor anchor) {	600 Prog::Anchor anchor) {

614 bool okay = true;	601 bool okay = true;

615 for (size_t i = 0; i < v_.size(); i++)	602 for (int i = 0; i < v_.size(); i++)

616 okay &= (!v_[i]->error() && v_[i]->RunCase(text, context, anchor));	603 okay &= (!v_[i]->error() && v_[i]->RunCase(text, context, anchor));

617 return okay;	604 return okay;

618 }	605 }

619	606

620 static Prog::Anchor anchors[] = {	607 static Prog::Anchor anchors[] = {

621 Prog::kAnchored,	608 Prog::kAnchored,

622 Prog::kUnanchored	609 Prog::kUnanchored

623 };	610 };

624	611

625 bool Tester::TestInput(const StringPiece& text) {	612 bool Tester::TestInput(const StringPiece& text) {

(...skipping 18 matching lines...) Expand all Loading...
644 return okay;	631 return okay;

645 }	632 }

646	633

647 bool TestRegexpOnText(const StringPiece& regexp,	634 bool TestRegexpOnText(const StringPiece& regexp,

648 const StringPiece& text) {	635 const StringPiece& text) {

649 Tester t(regexp);	636 Tester t(regexp);

650 return t.TestInput(text);	637 return t.TestInput(text);

651 }	638 }

652	639

653 } // namespace re2	640 } // namespace re2

OLD	NEW

« no previous file with comments | « third_party/re2/re2/testing/tester.h ('k') | third_party/re2/re2/testing/unicode_test.py » ('j') | no next file with comments »