| Index: third_party/re2/re2/mimics_pcre.cc
|
| diff --git a/third_party/re2/re2/mimics_pcre.cc b/third_party/re2/re2/mimics_pcre.cc
|
| deleted file mode 100644
|
| index 0a55004d1baeed6129b47e3c83645cc2b065a059..0000000000000000000000000000000000000000
|
| --- a/third_party/re2/re2/mimics_pcre.cc
|
| +++ /dev/null
|
| @@ -1,185 +0,0 @@
|
| -// Copyright 2008 The RE2 Authors. All Rights Reserved.
|
| -// Use of this source code is governed by a BSD-style
|
| -// license that can be found in the LICENSE file.
|
| -
|
| -// Determine whether this library should match PCRE exactly
|
| -// for a particular Regexp. (If so, the testing framework can
|
| -// check that it does.)
|
| -//
|
| -// This library matches PCRE except in these cases:
|
| -// * the regexp contains a repetition of an empty string,
|
| -// like (a*)* or (a*)+. In this case, PCRE will treat
|
| -// the repetition sequence as ending with an empty string,
|
| -// while this library does not.
|
| -// * Perl and PCRE differ on whether \v matches \n.
|
| -// For historical reasons, this library implements the Perl behavior.
|
| -// * Perl and PCRE allow $ in one-line mode to match either the very
|
| -// end of the text or just before a \n at the end of the text.
|
| -// This library requires it to match only the end of the text.
|
| -// * Similarly, Perl and PCRE do not allow ^ in multi-line mode to
|
| -// match the end of the text if the last character is a \n.
|
| -// This library does allow it.
|
| -//
|
| -// Regexp::MimicsPCRE checks for any of these conditions.
|
| -
|
| -#include "util/util.h"
|
| -#include "re2/regexp.h"
|
| -#include "re2/walker-inl.h"
|
| -
|
| -namespace re2 {
|
| -
|
| -// Returns whether re might match an empty string.
|
| -static bool CanBeEmptyString(Regexp *re);
|
| -
|
| -// Walker class to compute whether library handles a regexp
|
| -// exactly as PCRE would. See comment at top for conditions.
|
| -
|
| -class PCREWalker : public Regexp::Walker<bool> {
|
| - public:
|
| - PCREWalker() {}
|
| - bool PostVisit(Regexp* re, bool parent_arg, bool pre_arg, bool* child_args,
|
| - int nchild_args);
|
| -
|
| - bool ShortVisit(Regexp* re, bool a) {
|
| - // Should never be called: we use Walk not WalkExponential.
|
| - LOG(DFATAL) << "EmptyStringWalker::ShortVisit called";
|
| - return a;
|
| - }
|
| -};
|
| -
|
| -// Called after visiting each of re's children and accumulating
|
| -// the return values in child_args. So child_args contains whether
|
| -// this library mimics PCRE for those subexpressions.
|
| -bool PCREWalker::PostVisit(Regexp* re, bool parent_arg, bool pre_arg,
|
| - bool* child_args, int nchild_args) {
|
| - // If children failed, so do we.
|
| - for (int i = 0; i < nchild_args; i++)
|
| - if (!child_args[i])
|
| - return false;
|
| -
|
| - // Otherwise look for other reasons to fail.
|
| - switch (re->op()) {
|
| - // Look for repeated empty string.
|
| - case kRegexpStar:
|
| - case kRegexpPlus:
|
| - case kRegexpQuest:
|
| - if (CanBeEmptyString(re->sub()[0]))
|
| - return false;
|
| - break;
|
| - case kRegexpRepeat:
|
| - if (re->max() == -1 && CanBeEmptyString(re->sub()[0]))
|
| - return false;
|
| - break;
|
| -
|
| - // Look for \v
|
| - case kRegexpLiteral:
|
| - if (re->rune() == '\v')
|
| - return false;
|
| - break;
|
| -
|
| - // Look for $ in single-line mode.
|
| - case kRegexpEndText:
|
| - case kRegexpEmptyMatch:
|
| - if (re->parse_flags() & Regexp::WasDollar)
|
| - return false;
|
| - break;
|
| -
|
| - // Look for ^ in multi-line mode.
|
| - case kRegexpBeginLine:
|
| - // No condition: in single-line mode ^ becomes kRegexpBeginText.
|
| - return false;
|
| -
|
| - default:
|
| - break;
|
| - }
|
| -
|
| - // Not proven guilty.
|
| - return true;
|
| -}
|
| -
|
| -// Returns whether this regexp's behavior will mimic PCRE's exactly.
|
| -bool Regexp::MimicsPCRE() {
|
| - PCREWalker w;
|
| - return w.Walk(this, true);
|
| -}
|
| -
|
| -
|
| -// Walker class to compute whether a Regexp can match an empty string.
|
| -// It is okay to overestimate. For example, \b\B cannot match an empty
|
| -// string, because \b and \B are mutually exclusive, but this isn't
|
| -// that smart and will say it can. Spurious empty strings
|
| -// will reduce the number of regexps we sanity check against PCRE,
|
| -// but they won't break anything.
|
| -
|
| -class EmptyStringWalker : public Regexp::Walker<bool> {
|
| - public:
|
| - EmptyStringWalker() { }
|
| - bool PostVisit(Regexp* re, bool parent_arg, bool pre_arg,
|
| - bool* child_args, int nchild_args);
|
| -
|
| - bool ShortVisit(Regexp* re, bool a) {
|
| - // Should never be called: we use Walk not WalkExponential.
|
| - LOG(DFATAL) << "EmptyStringWalker::ShortVisit called";
|
| - return a;
|
| - }
|
| -
|
| - private:
|
| - DISALLOW_COPY_AND_ASSIGN(EmptyStringWalker);
|
| -};
|
| -
|
| -// Called after visiting re's children. child_args contains the return
|
| -// value from each of the children's PostVisits (i.e., whether each child
|
| -// can match an empty string). Returns whether this clause can match an
|
| -// empty string.
|
| -bool EmptyStringWalker::PostVisit(Regexp* re, bool parent_arg, bool pre_arg,
|
| - bool* child_args, int nchild_args) {
|
| - switch (re->op()) {
|
| - case kRegexpNoMatch: // never empty
|
| - case kRegexpLiteral:
|
| - case kRegexpAnyChar:
|
| - case kRegexpAnyByte:
|
| - case kRegexpCharClass:
|
| - case kRegexpLiteralString:
|
| - return false;
|
| -
|
| - case kRegexpEmptyMatch: // always empty
|
| - case kRegexpBeginLine: // always empty, when they match
|
| - case kRegexpEndLine:
|
| - case kRegexpNoWordBoundary:
|
| - case kRegexpWordBoundary:
|
| - case kRegexpBeginText:
|
| - case kRegexpEndText:
|
| - case kRegexpStar: // can always be empty
|
| - case kRegexpQuest:
|
| - case kRegexpHaveMatch:
|
| - return true;
|
| -
|
| - case kRegexpConcat: // can be empty if all children can
|
| - for (int i = 0; i < nchild_args; i++)
|
| - if (!child_args[i])
|
| - return false;
|
| - return true;
|
| -
|
| - case kRegexpAlternate: // can be empty if any child can
|
| - for (int i = 0; i < nchild_args; i++)
|
| - if (child_args[i])
|
| - return true;
|
| - return false;
|
| -
|
| - case kRegexpPlus: // can be empty if the child can
|
| - case kRegexpCapture:
|
| - return child_args[0];
|
| -
|
| - case kRegexpRepeat: // can be empty if child can or is x{0}
|
| - return child_args[0] || re->min() == 0;
|
| - }
|
| - return false;
|
| -}
|
| -
|
| -// Returns whether re can match an empty string.
|
| -static bool CanBeEmptyString(Regexp* re) {
|
| - EmptyStringWalker w;
|
| - return w.Walk(re, true);
|
| -}
|
| -
|
| -} // namespace re2
|
|
|