| Index: third_party/re2/re2/tostring.cc
|
| diff --git a/third_party/re2/re2/tostring.cc b/third_party/re2/re2/tostring.cc
|
| deleted file mode 100644
|
| index 0230c8c9ca141ea18c5b7ff273ccb45bce2c70df..0000000000000000000000000000000000000000
|
| --- a/third_party/re2/re2/tostring.cc
|
| +++ /dev/null
|
| @@ -1,345 +0,0 @@
|
| -// Copyright 2006 The RE2 Authors. All Rights Reserved.
|
| -// Use of this source code is governed by a BSD-style
|
| -// license that can be found in the LICENSE file.
|
| -
|
| -// Format a regular expression structure as a string.
|
| -// Tested by parse_test.cc
|
| -
|
| -#include "util/util.h"
|
| -#include "re2/regexp.h"
|
| -#include "re2/walker-inl.h"
|
| -
|
| -namespace re2 {
|
| -
|
| -enum {
|
| - PrecAtom,
|
| - PrecUnary,
|
| - PrecConcat,
|
| - PrecAlternate,
|
| - PrecEmpty,
|
| - PrecParen,
|
| - PrecToplevel,
|
| -};
|
| -
|
| -// Helper function. See description below.
|
| -static void AppendCCRange(string* t, Rune lo, Rune hi);
|
| -
|
| -// Walker to generate string in s_.
|
| -// The arg pointers are actually integers giving the
|
| -// context precedence.
|
| -// The child_args are always NULL.
|
| -class ToStringWalker : public Regexp::Walker<int> {
|
| - public:
|
| - explicit ToStringWalker(string* t) : t_(t) {}
|
| -
|
| - virtual int PreVisit(Regexp* re, int parent_arg, bool* stop);
|
| - virtual int PostVisit(Regexp* re, int parent_arg, int pre_arg,
|
| - int* child_args, int nchild_args);
|
| - virtual int ShortVisit(Regexp* re, int parent_arg) {
|
| - return 0;
|
| - }
|
| -
|
| - private:
|
| - string* t_; // The string the walker appends to.
|
| -
|
| - DISALLOW_COPY_AND_ASSIGN(ToStringWalker);
|
| -};
|
| -
|
| -string Regexp::ToString() {
|
| - string t;
|
| - ToStringWalker w(&t);
|
| - w.WalkExponential(this, PrecToplevel, 100000);
|
| - if (w.stopped_early())
|
| - t += " [truncated]";
|
| - return t;
|
| -}
|
| -
|
| -#define ToString DontCallToString // Avoid accidental recursion.
|
| -
|
| -// Visits re before children are processed.
|
| -// Appends ( if needed and passes new precedence to children.
|
| -int ToStringWalker::PreVisit(Regexp* re, int parent_arg, bool* stop) {
|
| - int prec = parent_arg;
|
| - int nprec = PrecAtom;
|
| -
|
| - switch (re->op()) {
|
| - case kRegexpNoMatch:
|
| - case kRegexpEmptyMatch:
|
| - case kRegexpLiteral:
|
| - case kRegexpAnyChar:
|
| - case kRegexpAnyByte:
|
| - case kRegexpBeginLine:
|
| - case kRegexpEndLine:
|
| - case kRegexpBeginText:
|
| - case kRegexpEndText:
|
| - case kRegexpWordBoundary:
|
| - case kRegexpNoWordBoundary:
|
| - case kRegexpCharClass:
|
| - case kRegexpHaveMatch:
|
| - nprec = PrecAtom;
|
| - break;
|
| -
|
| - case kRegexpConcat:
|
| - case kRegexpLiteralString:
|
| - if (prec < PrecConcat)
|
| - t_->append("(?:");
|
| - nprec = PrecConcat;
|
| - break;
|
| -
|
| - case kRegexpAlternate:
|
| - if (prec < PrecAlternate)
|
| - t_->append("(?:");
|
| - nprec = PrecAlternate;
|
| - break;
|
| -
|
| - case kRegexpCapture:
|
| - t_->append("(");
|
| - if (re->cap() == 0)
|
| - LOG(DFATAL) << "kRegexpCapture cap() == 0";
|
| - if (re->name()) {
|
| - t_->append("?P<");
|
| - t_->append(*re->name());
|
| - t_->append(">");
|
| - }
|
| - nprec = PrecParen;
|
| - break;
|
| -
|
| - case kRegexpStar:
|
| - case kRegexpPlus:
|
| - case kRegexpQuest:
|
| - case kRegexpRepeat:
|
| - if (prec < PrecUnary)
|
| - t_->append("(?:");
|
| - // The subprecedence here is PrecAtom instead of PrecUnary
|
| - // because PCRE treats two unary ops in a row as a parse error.
|
| - nprec = PrecAtom;
|
| - break;
|
| - }
|
| -
|
| - return nprec;
|
| -}
|
| -
|
| -static void AppendLiteral(string *t, Rune r, bool foldcase) {
|
| - if (r != 0 && r < 0x80 && strchr("(){}[]*+?|.^$\\", r)) {
|
| - t->append(1, '\\');
|
| - t->append(1, static_cast<char>(r));
|
| - } else if (foldcase && 'a' <= r && r <= 'z') {
|
| - if ('a' <= r && r <= 'z')
|
| - r += 'A' - 'a';
|
| - t->append(1, '[');
|
| - t->append(1, static_cast<char>(r));
|
| - t->append(1, static_cast<char>(r) + 'a' - 'A');
|
| - t->append(1, ']');
|
| - } else {
|
| - AppendCCRange(t, r, r);
|
| - }
|
| -}
|
| -
|
| -// Visits re after children are processed.
|
| -// For childless regexps, all the work is done here.
|
| -// For regexps with children, append any unary suffixes or ).
|
| -int ToStringWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg,
|
| - int* child_args, int nchild_args) {
|
| - int prec = parent_arg;
|
| - switch (re->op()) {
|
| - case kRegexpNoMatch:
|
| - // There's no simple symbol for "no match", but
|
| - // [^0-Runemax] excludes everything.
|
| - t_->append("[^\\x00-\\x{10ffff}]");
|
| - break;
|
| -
|
| - case kRegexpEmptyMatch:
|
| - // Append (?:) to make empty string visible,
|
| - // unless this is already being parenthesized.
|
| - if (prec < PrecEmpty)
|
| - t_->append("(?:)");
|
| - break;
|
| -
|
| - case kRegexpLiteral:
|
| - AppendLiteral(t_, re->rune(),
|
| - (re->parse_flags() & Regexp::FoldCase) != 0);
|
| - break;
|
| -
|
| - case kRegexpLiteralString:
|
| - for (int i = 0; i < re->nrunes(); i++)
|
| - AppendLiteral(t_, re->runes()[i],
|
| - (re->parse_flags() & Regexp::FoldCase) != 0);
|
| - if (prec < PrecConcat)
|
| - t_->append(")");
|
| - break;
|
| -
|
| - case kRegexpConcat:
|
| - if (prec < PrecConcat)
|
| - t_->append(")");
|
| - break;
|
| -
|
| - case kRegexpAlternate:
|
| - // Clumsy but workable: the children all appended |
|
| - // at the end of their strings, so just remove the last one.
|
| - if ((*t_)[t_->size()-1] == '|')
|
| - t_->erase(t_->size()-1);
|
| - else
|
| - LOG(DFATAL) << "Bad final char: " << t_;
|
| - if (prec < PrecAlternate)
|
| - t_->append(")");
|
| - break;
|
| -
|
| - case kRegexpStar:
|
| - t_->append("*");
|
| - if (re->parse_flags() & Regexp::NonGreedy)
|
| - t_->append("?");
|
| - if (prec < PrecUnary)
|
| - t_->append(")");
|
| - break;
|
| -
|
| - case kRegexpPlus:
|
| - t_->append("+");
|
| - if (re->parse_flags() & Regexp::NonGreedy)
|
| - t_->append("?");
|
| - if (prec < PrecUnary)
|
| - t_->append(")");
|
| - break;
|
| -
|
| - case kRegexpQuest:
|
| - t_->append("?");
|
| - if (re->parse_flags() & Regexp::NonGreedy)
|
| - t_->append("?");
|
| - if (prec < PrecUnary)
|
| - t_->append(")");
|
| - break;
|
| -
|
| - case kRegexpRepeat:
|
| - if (re->max() == -1)
|
| - t_->append(StringPrintf("{%d,}", re->min()));
|
| - else if (re->min() == re->max())
|
| - t_->append(StringPrintf("{%d}", re->min()));
|
| - else
|
| - t_->append(StringPrintf("{%d,%d}", re->min(), re->max()));
|
| - if (re->parse_flags() & Regexp::NonGreedy)
|
| - t_->append("?");
|
| - if (prec < PrecUnary)
|
| - t_->append(")");
|
| - break;
|
| -
|
| - case kRegexpAnyChar:
|
| - t_->append(".");
|
| - break;
|
| -
|
| - case kRegexpAnyByte:
|
| - t_->append("\\C");
|
| - break;
|
| -
|
| - case kRegexpBeginLine:
|
| - t_->append("^");
|
| - break;
|
| -
|
| - case kRegexpEndLine:
|
| - t_->append("$");
|
| - break;
|
| -
|
| - case kRegexpBeginText:
|
| - t_->append("(?-m:^)");
|
| - break;
|
| -
|
| - case kRegexpEndText:
|
| - if (re->parse_flags() & Regexp::WasDollar)
|
| - t_->append("(?-m:$)");
|
| - else
|
| - t_->append("\\z");
|
| - break;
|
| -
|
| - case kRegexpWordBoundary:
|
| - t_->append("\\b");
|
| - break;
|
| -
|
| - case kRegexpNoWordBoundary:
|
| - t_->append("\\B");
|
| - break;
|
| -
|
| - case kRegexpCharClass: {
|
| - if (re->cc()->size() == 0) {
|
| - t_->append("[^\\x00-\\x{10ffff}]");
|
| - break;
|
| - }
|
| - t_->append("[");
|
| - // Heuristic: show class as negated if it contains the
|
| - // non-character 0xFFFE.
|
| - CharClass* cc = re->cc();
|
| - if (cc->Contains(0xFFFE)) {
|
| - cc = cc->Negate();
|
| - t_->append("^");
|
| - }
|
| - for (CharClass::iterator i = cc->begin(); i != cc->end(); ++i)
|
| - AppendCCRange(t_, i->lo, i->hi);
|
| - if (cc != re->cc())
|
| - cc->Delete();
|
| - t_->append("]");
|
| - break;
|
| - }
|
| -
|
| - case kRegexpCapture:
|
| - t_->append(")");
|
| - break;
|
| -
|
| - case kRegexpHaveMatch:
|
| - // There's no syntax accepted by the parser to generate
|
| - // this node (it is generated by RE2::Set) so make something
|
| - // up that is readable but won't compile.
|
| - t_->append("(?HaveMatch:%d)", re->match_id());
|
| - break;
|
| - }
|
| -
|
| - // If the parent is an alternation, append the | for it.
|
| - if (prec == PrecAlternate)
|
| - t_->append("|");
|
| -
|
| - return 0;
|
| -}
|
| -
|
| -// Appends a rune for use in a character class to the string t.
|
| -static void AppendCCChar(string* t, Rune r) {
|
| - if (0x20 <= r && r <= 0x7E) {
|
| - if (strchr("[]^-\\", r))
|
| - t->append("\\");
|
| - t->append(1, static_cast<char>(r));
|
| - return;
|
| - }
|
| - switch (r) {
|
| - default:
|
| - break;
|
| -
|
| - case '\r':
|
| - t->append("\\r");
|
| - return;
|
| -
|
| - case '\t':
|
| - t->append("\\t");
|
| - return;
|
| -
|
| - case '\n':
|
| - t->append("\\n");
|
| - return;
|
| -
|
| - case '\f':
|
| - t->append("\\f");
|
| - return;
|
| - }
|
| -
|
| - if (r < 0x100) {
|
| - StringAppendF(t, "\\x%02x", static_cast<int>(r));
|
| - return;
|
| - }
|
| - StringAppendF(t, "\\x{%x}", static_cast<int>(r));
|
| -}
|
| -
|
| -static void AppendCCRange(string* t, Rune lo, Rune hi) {
|
| - if (lo > hi)
|
| - return;
|
| - AppendCCChar(t, lo);
|
| - if (lo < hi) {
|
| - t->append("-");
|
| - AppendCCChar(t, hi);
|
| - }
|
| -}
|
| -
|
| -} // namespace re2
|
|
|