Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(24)

Unified Diff: third_party/re2/re2/mimics_pcre.cc

Issue 1544433002: Replace RE2 import with a dependency (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Re-Added LICENSE and OWNERS file Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « third_party/re2/re2/make_unicode_groups.py ('k') | third_party/re2/re2/nfa.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: third_party/re2/re2/mimics_pcre.cc
diff --git a/third_party/re2/re2/mimics_pcre.cc b/third_party/re2/re2/mimics_pcre.cc
deleted file mode 100644
index 0a55004d1baeed6129b47e3c83645cc2b065a059..0000000000000000000000000000000000000000
--- a/third_party/re2/re2/mimics_pcre.cc
+++ /dev/null
@@ -1,185 +0,0 @@
-// Copyright 2008 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Determine whether this library should match PCRE exactly
-// for a particular Regexp. (If so, the testing framework can
-// check that it does.)
-//
-// This library matches PCRE except in these cases:
-// * the regexp contains a repetition of an empty string,
-// like (a*)* or (a*)+. In this case, PCRE will treat
-// the repetition sequence as ending with an empty string,
-// while this library does not.
-// * Perl and PCRE differ on whether \v matches \n.
-// For historical reasons, this library implements the Perl behavior.
-// * Perl and PCRE allow $ in one-line mode to match either the very
-// end of the text or just before a \n at the end of the text.
-// This library requires it to match only the end of the text.
-// * Similarly, Perl and PCRE do not allow ^ in multi-line mode to
-// match the end of the text if the last character is a \n.
-// This library does allow it.
-//
-// Regexp::MimicsPCRE checks for any of these conditions.
-
-#include "util/util.h"
-#include "re2/regexp.h"
-#include "re2/walker-inl.h"
-
-namespace re2 {
-
-// Returns whether re might match an empty string.
-static bool CanBeEmptyString(Regexp *re);
-
-// Walker class to compute whether library handles a regexp
-// exactly as PCRE would. See comment at top for conditions.
-
-class PCREWalker : public Regexp::Walker<bool> {
- public:
- PCREWalker() {}
- bool PostVisit(Regexp* re, bool parent_arg, bool pre_arg, bool* child_args,
- int nchild_args);
-
- bool ShortVisit(Regexp* re, bool a) {
- // Should never be called: we use Walk not WalkExponential.
- LOG(DFATAL) << "EmptyStringWalker::ShortVisit called";
- return a;
- }
-};
-
-// Called after visiting each of re's children and accumulating
-// the return values in child_args. So child_args contains whether
-// this library mimics PCRE for those subexpressions.
-bool PCREWalker::PostVisit(Regexp* re, bool parent_arg, bool pre_arg,
- bool* child_args, int nchild_args) {
- // If children failed, so do we.
- for (int i = 0; i < nchild_args; i++)
- if (!child_args[i])
- return false;
-
- // Otherwise look for other reasons to fail.
- switch (re->op()) {
- // Look for repeated empty string.
- case kRegexpStar:
- case kRegexpPlus:
- case kRegexpQuest:
- if (CanBeEmptyString(re->sub()[0]))
- return false;
- break;
- case kRegexpRepeat:
- if (re->max() == -1 && CanBeEmptyString(re->sub()[0]))
- return false;
- break;
-
- // Look for \v
- case kRegexpLiteral:
- if (re->rune() == '\v')
- return false;
- break;
-
- // Look for $ in single-line mode.
- case kRegexpEndText:
- case kRegexpEmptyMatch:
- if (re->parse_flags() & Regexp::WasDollar)
- return false;
- break;
-
- // Look for ^ in multi-line mode.
- case kRegexpBeginLine:
- // No condition: in single-line mode ^ becomes kRegexpBeginText.
- return false;
-
- default:
- break;
- }
-
- // Not proven guilty.
- return true;
-}
-
-// Returns whether this regexp's behavior will mimic PCRE's exactly.
-bool Regexp::MimicsPCRE() {
- PCREWalker w;
- return w.Walk(this, true);
-}
-
-
-// Walker class to compute whether a Regexp can match an empty string.
-// It is okay to overestimate. For example, \b\B cannot match an empty
-// string, because \b and \B are mutually exclusive, but this isn't
-// that smart and will say it can. Spurious empty strings
-// will reduce the number of regexps we sanity check against PCRE,
-// but they won't break anything.
-
-class EmptyStringWalker : public Regexp::Walker<bool> {
- public:
- EmptyStringWalker() { }
- bool PostVisit(Regexp* re, bool parent_arg, bool pre_arg,
- bool* child_args, int nchild_args);
-
- bool ShortVisit(Regexp* re, bool a) {
- // Should never be called: we use Walk not WalkExponential.
- LOG(DFATAL) << "EmptyStringWalker::ShortVisit called";
- return a;
- }
-
- private:
- DISALLOW_COPY_AND_ASSIGN(EmptyStringWalker);
-};
-
-// Called after visiting re's children. child_args contains the return
-// value from each of the children's PostVisits (i.e., whether each child
-// can match an empty string). Returns whether this clause can match an
-// empty string.
-bool EmptyStringWalker::PostVisit(Regexp* re, bool parent_arg, bool pre_arg,
- bool* child_args, int nchild_args) {
- switch (re->op()) {
- case kRegexpNoMatch: // never empty
- case kRegexpLiteral:
- case kRegexpAnyChar:
- case kRegexpAnyByte:
- case kRegexpCharClass:
- case kRegexpLiteralString:
- return false;
-
- case kRegexpEmptyMatch: // always empty
- case kRegexpBeginLine: // always empty, when they match
- case kRegexpEndLine:
- case kRegexpNoWordBoundary:
- case kRegexpWordBoundary:
- case kRegexpBeginText:
- case kRegexpEndText:
- case kRegexpStar: // can always be empty
- case kRegexpQuest:
- case kRegexpHaveMatch:
- return true;
-
- case kRegexpConcat: // can be empty if all children can
- for (int i = 0; i < nchild_args; i++)
- if (!child_args[i])
- return false;
- return true;
-
- case kRegexpAlternate: // can be empty if any child can
- for (int i = 0; i < nchild_args; i++)
- if (child_args[i])
- return true;
- return false;
-
- case kRegexpPlus: // can be empty if the child can
- case kRegexpCapture:
- return child_args[0];
-
- case kRegexpRepeat: // can be empty if child can or is x{0}
- return child_args[0] || re->min() == 0;
- }
- return false;
-}
-
-// Returns whether re can match an empty string.
-static bool CanBeEmptyString(Regexp* re) {
- EmptyStringWalker w;
- return w.Walk(re, true);
-}
-
-} // namespace re2
« no previous file with comments | « third_party/re2/re2/make_unicode_groups.py ('k') | third_party/re2/re2/nfa.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698