third_party/re2/re2/prefilter.cc - Issue 1544433002: Replace RE2 import with a dependency

Unified Diff: third_party/re2/re2/prefilter.cc

Issue 1544433002: Replace RE2 import with a dependency (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Re-Added LICENSE and OWNERS file Created 5 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: third_party/re2/re2/prefilter.cc

diff --git a/third_party/re2/re2/prefilter.cc b/third_party/re2/re2/prefilter.cc

deleted file mode 100644

index 45e43c9c095a251993f7eaa95ca242e007fefb5e..0000000000000000000000000000000000000000

--- a/third_party/re2/re2/prefilter.cc

+++ /dev/null

@@ -1,709 +0,0 @@

-// Use of this source code is governed by a BSD-style

-// license that can be found in the LICENSE file.

-#include "util/util.h"

-#include "re2/prefilter.h"

-#include "re2/re2.h"

-#include "re2/unicode_casefold.h"

-#include "re2/walker-inl.h"

-namespace re2 {

-static const int Trace = false;

-typedef set<string>::iterator SSIter;

-typedef set<string>::const_iterator ConstSSIter;

-GLOBAL_MUTEX(alloc_id_mutex);

-static int alloc_id = 100000; // Used for debugging.

-// Initializes a Prefilter, allocating subs_ as necessary.

-Prefilter::Prefilter(Op op) {

- op_ = op;

- subs_ = NULL;

- if (op_ == AND || op_ == OR)

- subs_ = new vector<Prefilter*>;

- GLOBAL_MUTEX_LOCK(alloc_id_mutex);

- alloc_id_ = alloc_id++;

- GLOBAL_MUTEX_UNLOCK(alloc_id_mutex);

- VLOG(10) << "alloc_id: " << alloc_id_;

-// Destroys a Prefilter.

-Prefilter::~Prefilter() {

- VLOG(10) << "Deleted: " << alloc_id_;

- if (subs_) {

- for (size_t i = 0; i < subs_->size(); i++)

- delete (*subs_)[i];

- delete subs_;

- subs_ = NULL;

- }

-// Simplify if the node is an empty Or or And.

-Prefilter* Prefilter::Simplify() {

- if (op_ != AND && op_ != OR) {

- return this;

- }

- // Nothing left in the AND/OR.

- if (subs_->size() == 0) {

- if (op_ == AND)

- op_ = ALL; // AND of nothing is true

- else

- op_ = NONE; // OR of nothing is false

- return this;

- }

- // Just one subnode: throw away wrapper.

- if (subs_->size() == 1) {

- Prefilter* a = (*subs_)[0];

- subs_->clear();

- delete this;

- return a->Simplify();

- }

- return this;

-// Combines two Prefilters together to create an "op" (AND or OR).

-// The passed Prefilters will be part of the returned Prefilter or deleted.

-// Does lots of work to avoid creating unnecessarily complicated structures.

-Prefilter* Prefilter::AndOr(Op op, Prefilter* a, Prefilter* b) {

- // If a, b can be rewritten as op, do so.

- a = a->Simplify();

- b = b->Simplify();

- // Canonicalize: a->op <= b->op.

- if (a->op() > b->op()) {

- Prefilter* t = a;

- a = b;

- b = t;

- }

- // Trivial cases.

- // ALL AND b = b

- // NONE OR b = b

- // ALL OR b = ALL

- // NONE AND b = NONE

- // Don't need to look at b, because of canonicalization above.

- // ALL and NONE are smallest opcodes.

- if (a->op() == ALL || a->op() == NONE) {

- if ((a->op() == ALL && op == AND) ||

- (a->op() == NONE && op == OR)) {

- delete a;

- return b;

- } else {

- delete b;

- return a;

- }

- // If a and b match op, merge their contents.

- if (a->op() == op && b->op() == op) {

- for (size_t i = 0; i < b->subs()->size(); i++) {

- Prefilter* bb = (*b->subs())[i];

- a->subs()->push_back(bb);

- }

- b->subs()->clear();

- delete b;

- return a;

- }

- // If a already has the same op as the op that is under construction

- // add in b (similarly if b already has the same op, add in a).

- if (b->op() == op) {

- Prefilter* t = a;

- a = b;

- b = t;

- }

- if (a->op() == op) {

- a->subs()->push_back(b);

- return a;

- }

- // Otherwise just return the op.

- Prefilter* c = new Prefilter(op);

- c->subs()->push_back(a);

- c->subs()->push_back(b);

- return c;

-Prefilter* Prefilter::And(Prefilter* a, Prefilter* b) {

- return AndOr(AND, a, b);

-Prefilter* Prefilter::Or(Prefilter* a, Prefilter* b) {

- return AndOr(OR, a, b);

-static void SimplifyStringSet(set<string> *ss) {

- // Now make sure that the strings aren't redundant. For example, if

- // we know "ab" is a required string, then it doesn't help at all to

- // know that "abc" is also a required string, so delete "abc". This

- // is because, when we are performing a string search to filter

- // regexps, matching ab will already allow this regexp to be a

- // candidate for match, so further matching abc is redundant.

- for (SSIter i = ss->begin(); i != ss->end(); ++i) {

- SSIter j = i;

- ++j;

- while (j != ss->end()) {

- // Increment j early so that we can erase the element it points to.

- SSIter old_j = j;

- ++j;

- if (old_j->find(*i) != string::npos)

- ss->erase(old_j);

- }

-Prefilter* Prefilter::OrStrings(set<string>* ss) {

- SimplifyStringSet(ss);

- Prefilter* or_prefilter = NULL;

- if (!ss->empty()) {

- or_prefilter = new Prefilter(NONE);

- for (SSIter i = ss->begin(); i != ss->end(); ++i)

- or_prefilter = Or(or_prefilter, FromString(*i));

- }

- return or_prefilter;

-static Rune ToLowerRune(Rune r) {

- if (r < Runeself) {

- if ('A' <= r && r <= 'Z')

- r += 'a' - 'A';

- return r;

- }

- const CaseFold *f = LookupCaseFold(unicode_tolower, num_unicode_tolower, r);

- if (f == NULL || r < f->lo)

- return r;

- return ApplyFold(f, r);

-static Rune ToLowerRuneLatin1(Rune r) {

- if ('A' <= r && r <= 'Z')

- r += 'a' - 'A';

- return r;

-Prefilter* Prefilter::FromString(const string& str) {

- Prefilter* m = new Prefilter(Prefilter::ATOM);

- m->atom_ = str;

- return m;

-// Information about a regexp used during computation of Prefilter.

-// Can be thought of as information about the set of strings matching

-// the given regular expression.

-class Prefilter::Info {

- public:

- Info();

- ~Info();

- // More constructors. They delete their Info* arguments.

- static Info* Alt(Info* a, Info* b);

- static Info* Concat(Info* a, Info* b);

- static Info* And(Info* a, Info* b);

- static Info* Star(Info* a);

- static Info* Plus(Info* a);

- static Info* Quest(Info* a);

- static Info* EmptyString();

- static Info* NoMatch();

- static Info* AnyChar();

- static Info* CClass(CharClass* cc, bool latin1);

- static Info* Literal(Rune r);

- static Info* LiteralLatin1(Rune r);

- static Info* AnyMatch();

- // Format Info as a string.

- string ToString();

- // Caller takes ownership of the Prefilter.

- Prefilter* TakeMatch();

- set<string>& exact() { return exact_; }

- bool is_exact() const { return is_exact_; }

- class Walker;

- private:

- set<string> exact_;

- // When is_exact_ is true, the strings that match

- // are placed in exact_. When it is no longer an exact

- // set of strings that match this RE, then is_exact_

- // is false and the match_ contains the required match

- // criteria.

- bool is_exact_;

- // Accumulated Prefilter query that any

- // match for this regexp is guaranteed to match.

- Prefilter* match_;

-};

-Prefilter::Info::Info()

- : is_exact_(false),

- match_(NULL) {

-Prefilter::Info::~Info() {

- delete match_;

-Prefilter* Prefilter::Info::TakeMatch() {

- if (is_exact_) {

- match_ = Prefilter::OrStrings(&exact_);

- is_exact_ = false;

- }

- Prefilter* m = match_;

- match_ = NULL;

- return m;

-// Format a Info in string form.

-string Prefilter::Info::ToString() {

- if (is_exact_) {

- int n = 0;

- string s;

- for (set<string>::iterator i = exact_.begin(); i != exact_.end(); ++i) {

- if (n++ > 0)

- s += ",";

- s += *i;

- }

- return s;

- }

- if (match_)

- return match_->DebugString();

- return "";

-// Add the strings from src to dst.

-static void CopyIn(const set<string>& src, set<string>* dst) {

- for (ConstSSIter i = src.begin(); i != src.end(); ++i)

- dst->insert(*i);

-// Add the cross-product of a and b to dst.

-// (For each string i in a and j in b, add i+j.)

-static void CrossProduct(const set<string>& a,

- const set<string>& b,

- set<string>* dst) {

- for (ConstSSIter i = a.begin(); i != a.end(); ++i)

- for (ConstSSIter j = b.begin(); j != b.end(); ++j)

- dst->insert(*i + *j);

-// Concats a and b. Requires that both are exact sets.

-// Forms an exact set that is a crossproduct of a and b.

-Prefilter::Info* Prefilter::Info::Concat(Info* a, Info* b) {

- if (a == NULL)

- return b;

- DCHECK(a->is_exact_);

- DCHECK(b && b->is_exact_);

- Info *ab = new Info();

- CrossProduct(a->exact_, b->exact_, &ab->exact_);

- ab->is_exact_ = true;

- delete a;

- delete b;

- return ab;

-// Constructs an inexact Info for ab given a and b.

-// Used only when a or b is not exact or when the

-// exact cross product is likely to be too big.

-Prefilter::Info* Prefilter::Info::And(Info* a, Info* b) {

- if (a == NULL)

- return b;

- if (b == NULL)

- return a;

- Info *ab = new Info();

- ab->match_ = Prefilter::And(a->TakeMatch(), b->TakeMatch());

- ab->is_exact_ = false;

- delete a;

- delete b;

- return ab;

-// Constructs Info for a|b given a and b.

-Prefilter::Info* Prefilter::Info::Alt(Info* a, Info* b) {

- Info *ab = new Info();

- if (a->is_exact_ && b->is_exact_) {

- CopyIn(a->exact_, &ab->exact_);

- CopyIn(b->exact_, &ab->exact_);

- ab->is_exact_ = true;

- } else {

- // Either a or b has is_exact_ = false. If the other

- // one has is_exact_ = true, we move it to match_ and

- // then create a OR of a,b. The resulting Info has

- // is_exact_ = false.

- ab->match_ = Prefilter::Or(a->TakeMatch(), b->TakeMatch());

- ab->is_exact_ = false;

- }

- delete a;

- delete b;

- return ab;

-// Constructs Info for a? given a.

-Prefilter::Info* Prefilter::Info::Quest(Info *a) {

- Info *ab = new Info();

- ab->is_exact_ = false;

- ab->match_ = new Prefilter(ALL);

- delete a;

- return ab;

-// Constructs Info for a* given a.

-// Same as a? -- not much to do.

-Prefilter::Info* Prefilter::Info::Star(Info *a) {

- return Quest(a);

-// Constructs Info for a+ given a. If a was exact set, it isn't

-// anymore.

-Prefilter::Info* Prefilter::Info::Plus(Info *a) {

- Info *ab = new Info();

- ab->match_ = a->TakeMatch();

- ab->is_exact_ = false;

- delete a;

- return ab;

-static string RuneToString(Rune r) {

- char buf[UTFmax];

- int n = runetochar(buf, &r);

- return string(buf, n);

-static string RuneToStringLatin1(Rune r) {

- char c = r & 0xff;

- return string(&c, 1);

-// Constructs Info for literal rune.

-Prefilter::Info* Prefilter::Info::Literal(Rune r) {

- Info* info = new Info();

- info->exact_.insert(RuneToString(ToLowerRune(r)));

- info->is_exact_ = true;

- return info;

-// Constructs Info for literal rune for Latin1 encoded string.

-Prefilter::Info* Prefilter::Info::LiteralLatin1(Rune r) {

- Info* info = new Info();

- info->exact_.insert(RuneToStringLatin1(ToLowerRuneLatin1(r)));

- info->is_exact_ = true;

- return info;

-// Constructs Info for dot (any character).

-Prefilter::Info* Prefilter::Info::AnyChar() {

- Prefilter::Info* info = new Prefilter::Info();

- info->match_ = new Prefilter(ALL);

- return info;

-// Constructs Prefilter::Info for no possible match.

-Prefilter::Info* Prefilter::Info::NoMatch() {

- Prefilter::Info* info = new Prefilter::Info();

- info->match_ = new Prefilter(NONE);

- return info;

-// Constructs Prefilter::Info for any possible match.

-// This Prefilter::Info is valid for any regular expression,

-// since it makes no assertions whatsoever about the

-// strings being matched.

-Prefilter::Info* Prefilter::Info::AnyMatch() {

- Prefilter::Info *info = new Prefilter::Info();

- info->match_ = new Prefilter(ALL);

- return info;

-// Constructs Prefilter::Info for just the empty string.

-Prefilter::Info* Prefilter::Info::EmptyString() {

- Prefilter::Info* info = new Prefilter::Info();

- info->is_exact_ = true;

- info->exact_.insert("");

- return info;

-// Constructs Prefilter::Info for a character class.

-typedef CharClass::iterator CCIter;

-Prefilter::Info* Prefilter::Info::CClass(CharClass *cc,

- bool latin1) {

- if (Trace) {

- VLOG(0) << "CharClassInfo:";

- for (CCIter i = cc->begin(); i != cc->end(); ++i)

- VLOG(0) << " " << i->lo << "-" << i->hi;

- }

- // If the class is too large, it's okay to overestimate.

- if (cc->size() > 10)

- return AnyChar();

- Prefilter::Info *a = new Prefilter::Info();

- for (CCIter i = cc->begin(); i != cc->end(); ++i)

- for (Rune r = i->lo; r <= i->hi; r++) {

- if (latin1) {

- a->exact_.insert(RuneToStringLatin1(ToLowerRuneLatin1(r)));

- } else {

- a->exact_.insert(RuneToString(ToLowerRune(r)));

- }

- a->is_exact_ = true;

- if (Trace) {

- VLOG(0) << " = " << a->ToString();

- }

- return a;

-class Prefilter::Info::Walker : public Regexp::Walker<Prefilter::Info*> {

- public:

- Walker(bool latin1) : latin1_(latin1) {}

- virtual Info* PostVisit(

- Regexp* re, Info* parent_arg,

- Info* pre_arg,

- Info** child_args, int nchild_args);

- virtual Info* ShortVisit(

- Regexp* re,

- Info* parent_arg);

- bool latin1() { return latin1_; }

- private:

- bool latin1_;

- DISALLOW_COPY_AND_ASSIGN(Walker);

-};

-Prefilter::Info* Prefilter::BuildInfo(Regexp* re) {

- if (Trace) {

- LOG(INFO) << "BuildPrefilter::Info: " << re->ToString();

- }

- bool latin1 = (re->parse_flags() & Regexp::Latin1) != 0;

- Prefilter::Info::Walker w(latin1);

- Prefilter::Info* info = w.WalkExponential(re, NULL, 100000);

- if (w.stopped_early()) {

- delete info;

- return NULL;

- }

- return info;

-Prefilter::Info* Prefilter::Info::Walker::ShortVisit(

- Regexp* re, Prefilter::Info* parent_arg) {

- return AnyMatch();

-// Constructs the Prefilter::Info for the given regular expression.

-// Assumes re is simplified.

-Prefilter::Info* Prefilter::Info::Walker::PostVisit(

- Regexp* re, Prefilter::Info* parent_arg,

- Prefilter::Info* pre_arg, Prefilter::Info** child_args,

- int nchild_args) {

- Prefilter::Info *info;

- switch (re->op()) {

- default:

- case kRegexpRepeat:

- LOG(DFATAL) << "Bad regexp op " << re->op();

- info = EmptyString();

- break;

- case kRegexpNoMatch:

- info = NoMatch();

- break;

- // These ops match the empty string:

- case kRegexpEmptyMatch: // anywhere

- case kRegexpBeginLine: // at beginning of line

- case kRegexpEndLine: // at end of line

- case kRegexpBeginText: // at beginning of text

- case kRegexpEndText: // at end of text

- case kRegexpWordBoundary: // at word boundary

- case kRegexpNoWordBoundary: // not at word boundary

- info = EmptyString();

- break;

- case kRegexpLiteral:

- if (latin1()) {

- info = LiteralLatin1(re->rune());

- }

- else {

- info = Literal(re->rune());

- }

- break;

- case kRegexpLiteralString:

- if (re->nrunes() == 0) {

- info = NoMatch();

- break;

- }

- if (latin1()) {

- info = LiteralLatin1(re->runes()[0]);

- for (int i = 1; i < re->nrunes(); i++) {

- info = Concat(info, LiteralLatin1(re->runes()[i]));

- }

- } else {

- info = Literal(re->runes()[0]);

- for (int i = 1; i < re->nrunes(); i++) {

- info = Concat(info, Literal(re->runes()[i]));

- }

- break;

- case kRegexpConcat: {

- // Accumulate in info.

- // Exact is concat of recent contiguous exact nodes.

- info = NULL;

- Info* exact = NULL;

- for (int i = 0; i < nchild_args; i++) {

- Info* ci = child_args[i]; // child info

- if (!ci->is_exact() ||

- (exact && ci->exact().size() * exact->exact().size() > 16)) {

- // Exact run is over.

- info = And(info, exact);

- exact = NULL;

- // Add this child's info.

- info = And(info, ci);

- } else {

- // Append to exact run.

- exact = Concat(exact, ci);

- }

- info = And(info, exact);

- }

- break;

- case kRegexpAlternate:

- info = child_args[0];

- for (int i = 1; i < nchild_args; i++)

- info = Alt(info, child_args[i]);

- VLOG(10) << "Alt: " << info->ToString();

- break;

- case kRegexpStar:

- info = Star(child_args[0]);

- break;

- case kRegexpQuest:

- info = Quest(child_args[0]);

- break;

- case kRegexpPlus:

- info = Plus(child_args[0]);

- break;

- case kRegexpAnyChar:

- // Claim nothing, except that it's not empty.

- info = AnyChar();

- break;

- case kRegexpCharClass:

- info = CClass(re->cc(), latin1());

- break;

- case kRegexpCapture:

- // These don't affect the set of matching strings.

- info = child_args[0];

- break;

- }

- if (Trace) {

- VLOG(0) << "BuildInfo " << re->ToString()

- << ": " << (info ? info->ToString() : "");

- }

- return info;

-Prefilter* Prefilter::FromRegexp(Regexp* re) {

- if (re == NULL)

- return NULL;

- Regexp* simple = re->Simplify();

- Prefilter::Info *info = BuildInfo(simple);

- simple->Decref();

- if (info == NULL)

- return NULL;

- Prefilter* m = info->TakeMatch();

- delete info;

- return m;

-string Prefilter::DebugString() const {

- switch (op_) {

- default:

- LOG(DFATAL) << "Bad op in Prefilter::DebugString: " << op_;

- return StringPrintf("op%d", op_);

- case NONE:

- return "*no-matches*";

- case ATOM:

- return atom_;

- case ALL:

- return "";

- case AND: {

- string s = "";

- for (size_t i = 0; i < subs_->size(); i++) {

- if (i > 0)

- s += " ";

- Prefilter* sub = (*subs_)[i];

- s += sub ? sub->DebugString() : "<nil>";

- }

- return s;

- }

- case OR: {

- string s = "(";

- for (size_t i = 0; i < subs_->size(); i++) {

- if (i > 0)

- s += "|";

- Prefilter* sub = (*subs_)[i];

- s += sub ? sub->DebugString() : "<nil>";

- }

- s += ")";

- return s;

- }

-Prefilter* Prefilter::FromRE2(const RE2* re2) {

- if (re2 == NULL)

- return NULL;

- Regexp* regexp = re2->Regexp();

- if (regexp == NULL)

- return NULL;

- return FromRegexp(regexp);

-} // namespace re2

« no previous file with comments | « third_party/re2/re2/prefilter.h ('k') | third_party/re2/re2/prefilter_tree.h » ('j') | no next file with comments »