Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(177)

Unified Diff: third_party/re2/re2/prefilter.cc

Issue 1544433002: Replace RE2 import with a dependency (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Re-Added LICENSE and OWNERS file Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « third_party/re2/re2/prefilter.h ('k') | third_party/re2/re2/prefilter_tree.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: third_party/re2/re2/prefilter.cc
diff --git a/third_party/re2/re2/prefilter.cc b/third_party/re2/re2/prefilter.cc
deleted file mode 100644
index 45e43c9c095a251993f7eaa95ca242e007fefb5e..0000000000000000000000000000000000000000
--- a/third_party/re2/re2/prefilter.cc
+++ /dev/null
@@ -1,709 +0,0 @@
-// Copyright 2009 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "util/util.h"
-#include "re2/prefilter.h"
-#include "re2/re2.h"
-#include "re2/unicode_casefold.h"
-#include "re2/walker-inl.h"
-
-namespace re2 {
-
-static const int Trace = false;
-
-typedef set<string>::iterator SSIter;
-typedef set<string>::const_iterator ConstSSIter;
-
-GLOBAL_MUTEX(alloc_id_mutex);
-static int alloc_id = 100000; // Used for debugging.
-// Initializes a Prefilter, allocating subs_ as necessary.
-Prefilter::Prefilter(Op op) {
- op_ = op;
- subs_ = NULL;
- if (op_ == AND || op_ == OR)
- subs_ = new vector<Prefilter*>;
-
- GLOBAL_MUTEX_LOCK(alloc_id_mutex);
- alloc_id_ = alloc_id++;
- GLOBAL_MUTEX_UNLOCK(alloc_id_mutex);
- VLOG(10) << "alloc_id: " << alloc_id_;
-}
-
-// Destroys a Prefilter.
-Prefilter::~Prefilter() {
- VLOG(10) << "Deleted: " << alloc_id_;
- if (subs_) {
- for (size_t i = 0; i < subs_->size(); i++)
- delete (*subs_)[i];
- delete subs_;
- subs_ = NULL;
- }
-}
-
-// Simplify if the node is an empty Or or And.
-Prefilter* Prefilter::Simplify() {
- if (op_ != AND && op_ != OR) {
- return this;
- }
-
- // Nothing left in the AND/OR.
- if (subs_->size() == 0) {
- if (op_ == AND)
- op_ = ALL; // AND of nothing is true
- else
- op_ = NONE; // OR of nothing is false
-
- return this;
- }
-
- // Just one subnode: throw away wrapper.
- if (subs_->size() == 1) {
- Prefilter* a = (*subs_)[0];
- subs_->clear();
- delete this;
- return a->Simplify();
- }
-
- return this;
-}
-
-// Combines two Prefilters together to create an "op" (AND or OR).
-// The passed Prefilters will be part of the returned Prefilter or deleted.
-// Does lots of work to avoid creating unnecessarily complicated structures.
-Prefilter* Prefilter::AndOr(Op op, Prefilter* a, Prefilter* b) {
- // If a, b can be rewritten as op, do so.
- a = a->Simplify();
- b = b->Simplify();
-
- // Canonicalize: a->op <= b->op.
- if (a->op() > b->op()) {
- Prefilter* t = a;
- a = b;
- b = t;
- }
-
- // Trivial cases.
- // ALL AND b = b
- // NONE OR b = b
- // ALL OR b = ALL
- // NONE AND b = NONE
- // Don't need to look at b, because of canonicalization above.
- // ALL and NONE are smallest opcodes.
- if (a->op() == ALL || a->op() == NONE) {
- if ((a->op() == ALL && op == AND) ||
- (a->op() == NONE && op == OR)) {
- delete a;
- return b;
- } else {
- delete b;
- return a;
- }
- }
-
- // If a and b match op, merge their contents.
- if (a->op() == op && b->op() == op) {
- for (size_t i = 0; i < b->subs()->size(); i++) {
- Prefilter* bb = (*b->subs())[i];
- a->subs()->push_back(bb);
- }
- b->subs()->clear();
- delete b;
- return a;
- }
-
- // If a already has the same op as the op that is under construction
- // add in b (similarly if b already has the same op, add in a).
- if (b->op() == op) {
- Prefilter* t = a;
- a = b;
- b = t;
- }
- if (a->op() == op) {
- a->subs()->push_back(b);
- return a;
- }
-
- // Otherwise just return the op.
- Prefilter* c = new Prefilter(op);
- c->subs()->push_back(a);
- c->subs()->push_back(b);
- return c;
-}
-
-Prefilter* Prefilter::And(Prefilter* a, Prefilter* b) {
- return AndOr(AND, a, b);
-}
-
-Prefilter* Prefilter::Or(Prefilter* a, Prefilter* b) {
- return AndOr(OR, a, b);
-}
-
-static void SimplifyStringSet(set<string> *ss) {
- // Now make sure that the strings aren't redundant. For example, if
- // we know "ab" is a required string, then it doesn't help at all to
- // know that "abc" is also a required string, so delete "abc". This
- // is because, when we are performing a string search to filter
- // regexps, matching ab will already allow this regexp to be a
- // candidate for match, so further matching abc is redundant.
-
- for (SSIter i = ss->begin(); i != ss->end(); ++i) {
- SSIter j = i;
- ++j;
- while (j != ss->end()) {
- // Increment j early so that we can erase the element it points to.
- SSIter old_j = j;
- ++j;
- if (old_j->find(*i) != string::npos)
- ss->erase(old_j);
- }
- }
-}
-
-Prefilter* Prefilter::OrStrings(set<string>* ss) {
- SimplifyStringSet(ss);
- Prefilter* or_prefilter = NULL;
- if (!ss->empty()) {
- or_prefilter = new Prefilter(NONE);
- for (SSIter i = ss->begin(); i != ss->end(); ++i)
- or_prefilter = Or(or_prefilter, FromString(*i));
- }
- return or_prefilter;
-}
-
-static Rune ToLowerRune(Rune r) {
- if (r < Runeself) {
- if ('A' <= r && r <= 'Z')
- r += 'a' - 'A';
- return r;
- }
-
- const CaseFold *f = LookupCaseFold(unicode_tolower, num_unicode_tolower, r);
- if (f == NULL || r < f->lo)
- return r;
- return ApplyFold(f, r);
-}
-
-static Rune ToLowerRuneLatin1(Rune r) {
- if ('A' <= r && r <= 'Z')
- r += 'a' - 'A';
- return r;
-}
-
-Prefilter* Prefilter::FromString(const string& str) {
- Prefilter* m = new Prefilter(Prefilter::ATOM);
- m->atom_ = str;
- return m;
-}
-
-// Information about a regexp used during computation of Prefilter.
-// Can be thought of as information about the set of strings matching
-// the given regular expression.
-class Prefilter::Info {
- public:
- Info();
- ~Info();
-
- // More constructors. They delete their Info* arguments.
- static Info* Alt(Info* a, Info* b);
- static Info* Concat(Info* a, Info* b);
- static Info* And(Info* a, Info* b);
- static Info* Star(Info* a);
- static Info* Plus(Info* a);
- static Info* Quest(Info* a);
- static Info* EmptyString();
- static Info* NoMatch();
- static Info* AnyChar();
- static Info* CClass(CharClass* cc, bool latin1);
- static Info* Literal(Rune r);
- static Info* LiteralLatin1(Rune r);
- static Info* AnyMatch();
-
- // Format Info as a string.
- string ToString();
-
- // Caller takes ownership of the Prefilter.
- Prefilter* TakeMatch();
-
- set<string>& exact() { return exact_; }
-
- bool is_exact() const { return is_exact_; }
-
- class Walker;
-
- private:
- set<string> exact_;
-
- // When is_exact_ is true, the strings that match
- // are placed in exact_. When it is no longer an exact
- // set of strings that match this RE, then is_exact_
- // is false and the match_ contains the required match
- // criteria.
- bool is_exact_;
-
- // Accumulated Prefilter query that any
- // match for this regexp is guaranteed to match.
- Prefilter* match_;
-};
-
-
-Prefilter::Info::Info()
- : is_exact_(false),
- match_(NULL) {
-}
-
-Prefilter::Info::~Info() {
- delete match_;
-}
-
-Prefilter* Prefilter::Info::TakeMatch() {
- if (is_exact_) {
- match_ = Prefilter::OrStrings(&exact_);
- is_exact_ = false;
- }
- Prefilter* m = match_;
- match_ = NULL;
- return m;
-}
-
-// Format a Info in string form.
-string Prefilter::Info::ToString() {
- if (is_exact_) {
- int n = 0;
- string s;
- for (set<string>::iterator i = exact_.begin(); i != exact_.end(); ++i) {
- if (n++ > 0)
- s += ",";
- s += *i;
- }
- return s;
- }
-
- if (match_)
- return match_->DebugString();
-
- return "";
-}
-
-// Add the strings from src to dst.
-static void CopyIn(const set<string>& src, set<string>* dst) {
- for (ConstSSIter i = src.begin(); i != src.end(); ++i)
- dst->insert(*i);
-}
-
-// Add the cross-product of a and b to dst.
-// (For each string i in a and j in b, add i+j.)
-static void CrossProduct(const set<string>& a,
- const set<string>& b,
- set<string>* dst) {
- for (ConstSSIter i = a.begin(); i != a.end(); ++i)
- for (ConstSSIter j = b.begin(); j != b.end(); ++j)
- dst->insert(*i + *j);
-}
-
-// Concats a and b. Requires that both are exact sets.
-// Forms an exact set that is a crossproduct of a and b.
-Prefilter::Info* Prefilter::Info::Concat(Info* a, Info* b) {
- if (a == NULL)
- return b;
- DCHECK(a->is_exact_);
- DCHECK(b && b->is_exact_);
- Info *ab = new Info();
-
- CrossProduct(a->exact_, b->exact_, &ab->exact_);
- ab->is_exact_ = true;
-
- delete a;
- delete b;
- return ab;
-}
-
-// Constructs an inexact Info for ab given a and b.
-// Used only when a or b is not exact or when the
-// exact cross product is likely to be too big.
-Prefilter::Info* Prefilter::Info::And(Info* a, Info* b) {
- if (a == NULL)
- return b;
- if (b == NULL)
- return a;
-
- Info *ab = new Info();
-
- ab->match_ = Prefilter::And(a->TakeMatch(), b->TakeMatch());
- ab->is_exact_ = false;
- delete a;
- delete b;
- return ab;
-}
-
-// Constructs Info for a|b given a and b.
-Prefilter::Info* Prefilter::Info::Alt(Info* a, Info* b) {
- Info *ab = new Info();
-
- if (a->is_exact_ && b->is_exact_) {
- CopyIn(a->exact_, &ab->exact_);
- CopyIn(b->exact_, &ab->exact_);
- ab->is_exact_ = true;
- } else {
- // Either a or b has is_exact_ = false. If the other
- // one has is_exact_ = true, we move it to match_ and
- // then create a OR of a,b. The resulting Info has
- // is_exact_ = false.
- ab->match_ = Prefilter::Or(a->TakeMatch(), b->TakeMatch());
- ab->is_exact_ = false;
- }
-
- delete a;
- delete b;
- return ab;
-}
-
-// Constructs Info for a? given a.
-Prefilter::Info* Prefilter::Info::Quest(Info *a) {
- Info *ab = new Info();
-
- ab->is_exact_ = false;
- ab->match_ = new Prefilter(ALL);
- delete a;
- return ab;
-}
-
-// Constructs Info for a* given a.
-// Same as a? -- not much to do.
-Prefilter::Info* Prefilter::Info::Star(Info *a) {
- return Quest(a);
-}
-
-// Constructs Info for a+ given a. If a was exact set, it isn't
-// anymore.
-Prefilter::Info* Prefilter::Info::Plus(Info *a) {
- Info *ab = new Info();
-
- ab->match_ = a->TakeMatch();
- ab->is_exact_ = false;
-
- delete a;
- return ab;
-}
-
-static string RuneToString(Rune r) {
- char buf[UTFmax];
- int n = runetochar(buf, &r);
- return string(buf, n);
-}
-
-static string RuneToStringLatin1(Rune r) {
- char c = r & 0xff;
- return string(&c, 1);
-}
-
-// Constructs Info for literal rune.
-Prefilter::Info* Prefilter::Info::Literal(Rune r) {
- Info* info = new Info();
- info->exact_.insert(RuneToString(ToLowerRune(r)));
- info->is_exact_ = true;
- return info;
-}
-
-// Constructs Info for literal rune for Latin1 encoded string.
-Prefilter::Info* Prefilter::Info::LiteralLatin1(Rune r) {
- Info* info = new Info();
- info->exact_.insert(RuneToStringLatin1(ToLowerRuneLatin1(r)));
- info->is_exact_ = true;
- return info;
-}
-
-// Constructs Info for dot (any character).
-Prefilter::Info* Prefilter::Info::AnyChar() {
- Prefilter::Info* info = new Prefilter::Info();
- info->match_ = new Prefilter(ALL);
- return info;
-}
-
-// Constructs Prefilter::Info for no possible match.
-Prefilter::Info* Prefilter::Info::NoMatch() {
- Prefilter::Info* info = new Prefilter::Info();
- info->match_ = new Prefilter(NONE);
- return info;
-}
-
-// Constructs Prefilter::Info for any possible match.
-// This Prefilter::Info is valid for any regular expression,
-// since it makes no assertions whatsoever about the
-// strings being matched.
-Prefilter::Info* Prefilter::Info::AnyMatch() {
- Prefilter::Info *info = new Prefilter::Info();
- info->match_ = new Prefilter(ALL);
- return info;
-}
-
-// Constructs Prefilter::Info for just the empty string.
-Prefilter::Info* Prefilter::Info::EmptyString() {
- Prefilter::Info* info = new Prefilter::Info();
- info->is_exact_ = true;
- info->exact_.insert("");
- return info;
-}
-
-// Constructs Prefilter::Info for a character class.
-typedef CharClass::iterator CCIter;
-Prefilter::Info* Prefilter::Info::CClass(CharClass *cc,
- bool latin1) {
- if (Trace) {
- VLOG(0) << "CharClassInfo:";
- for (CCIter i = cc->begin(); i != cc->end(); ++i)
- VLOG(0) << " " << i->lo << "-" << i->hi;
- }
-
- // If the class is too large, it's okay to overestimate.
- if (cc->size() > 10)
- return AnyChar();
-
- Prefilter::Info *a = new Prefilter::Info();
- for (CCIter i = cc->begin(); i != cc->end(); ++i)
- for (Rune r = i->lo; r <= i->hi; r++) {
- if (latin1) {
- a->exact_.insert(RuneToStringLatin1(ToLowerRuneLatin1(r)));
- } else {
- a->exact_.insert(RuneToString(ToLowerRune(r)));
- }
- }
-
-
- a->is_exact_ = true;
-
- if (Trace) {
- VLOG(0) << " = " << a->ToString();
- }
-
- return a;
-}
-
-class Prefilter::Info::Walker : public Regexp::Walker<Prefilter::Info*> {
- public:
- Walker(bool latin1) : latin1_(latin1) {}
-
- virtual Info* PostVisit(
- Regexp* re, Info* parent_arg,
- Info* pre_arg,
- Info** child_args, int nchild_args);
-
- virtual Info* ShortVisit(
- Regexp* re,
- Info* parent_arg);
-
- bool latin1() { return latin1_; }
- private:
- bool latin1_;
- DISALLOW_COPY_AND_ASSIGN(Walker);
-};
-
-Prefilter::Info* Prefilter::BuildInfo(Regexp* re) {
- if (Trace) {
- LOG(INFO) << "BuildPrefilter::Info: " << re->ToString();
- }
-
- bool latin1 = (re->parse_flags() & Regexp::Latin1) != 0;
- Prefilter::Info::Walker w(latin1);
- Prefilter::Info* info = w.WalkExponential(re, NULL, 100000);
-
- if (w.stopped_early()) {
- delete info;
- return NULL;
- }
-
- return info;
-}
-
-Prefilter::Info* Prefilter::Info::Walker::ShortVisit(
- Regexp* re, Prefilter::Info* parent_arg) {
- return AnyMatch();
-}
-
-// Constructs the Prefilter::Info for the given regular expression.
-// Assumes re is simplified.
-Prefilter::Info* Prefilter::Info::Walker::PostVisit(
- Regexp* re, Prefilter::Info* parent_arg,
- Prefilter::Info* pre_arg, Prefilter::Info** child_args,
- int nchild_args) {
- Prefilter::Info *info;
- switch (re->op()) {
- default:
- case kRegexpRepeat:
- LOG(DFATAL) << "Bad regexp op " << re->op();
- info = EmptyString();
- break;
-
- case kRegexpNoMatch:
- info = NoMatch();
- break;
-
- // These ops match the empty string:
- case kRegexpEmptyMatch: // anywhere
- case kRegexpBeginLine: // at beginning of line
- case kRegexpEndLine: // at end of line
- case kRegexpBeginText: // at beginning of text
- case kRegexpEndText: // at end of text
- case kRegexpWordBoundary: // at word boundary
- case kRegexpNoWordBoundary: // not at word boundary
- info = EmptyString();
- break;
-
- case kRegexpLiteral:
- if (latin1()) {
- info = LiteralLatin1(re->rune());
- }
- else {
- info = Literal(re->rune());
- }
- break;
-
- case kRegexpLiteralString:
- if (re->nrunes() == 0) {
- info = NoMatch();
- break;
- }
- if (latin1()) {
- info = LiteralLatin1(re->runes()[0]);
- for (int i = 1; i < re->nrunes(); i++) {
- info = Concat(info, LiteralLatin1(re->runes()[i]));
- }
- } else {
- info = Literal(re->runes()[0]);
- for (int i = 1; i < re->nrunes(); i++) {
- info = Concat(info, Literal(re->runes()[i]));
- }
- }
- break;
-
- case kRegexpConcat: {
- // Accumulate in info.
- // Exact is concat of recent contiguous exact nodes.
- info = NULL;
- Info* exact = NULL;
- for (int i = 0; i < nchild_args; i++) {
- Info* ci = child_args[i]; // child info
- if (!ci->is_exact() ||
- (exact && ci->exact().size() * exact->exact().size() > 16)) {
- // Exact run is over.
- info = And(info, exact);
- exact = NULL;
- // Add this child's info.
- info = And(info, ci);
- } else {
- // Append to exact run.
- exact = Concat(exact, ci);
- }
- }
- info = And(info, exact);
- }
- break;
-
- case kRegexpAlternate:
- info = child_args[0];
- for (int i = 1; i < nchild_args; i++)
- info = Alt(info, child_args[i]);
- VLOG(10) << "Alt: " << info->ToString();
- break;
-
- case kRegexpStar:
- info = Star(child_args[0]);
- break;
-
- case kRegexpQuest:
- info = Quest(child_args[0]);
- break;
-
- case kRegexpPlus:
- info = Plus(child_args[0]);
- break;
-
- case kRegexpAnyChar:
- // Claim nothing, except that it's not empty.
- info = AnyChar();
- break;
-
- case kRegexpCharClass:
- info = CClass(re->cc(), latin1());
- break;
-
- case kRegexpCapture:
- // These don't affect the set of matching strings.
- info = child_args[0];
- break;
- }
-
- if (Trace) {
- VLOG(0) << "BuildInfo " << re->ToString()
- << ": " << (info ? info->ToString() : "");
- }
-
- return info;
-}
-
-
-Prefilter* Prefilter::FromRegexp(Regexp* re) {
- if (re == NULL)
- return NULL;
-
- Regexp* simple = re->Simplify();
- Prefilter::Info *info = BuildInfo(simple);
-
- simple->Decref();
- if (info == NULL)
- return NULL;
-
- Prefilter* m = info->TakeMatch();
-
- delete info;
- return m;
-}
-
-string Prefilter::DebugString() const {
- switch (op_) {
- default:
- LOG(DFATAL) << "Bad op in Prefilter::DebugString: " << op_;
- return StringPrintf("op%d", op_);
- case NONE:
- return "*no-matches*";
- case ATOM:
- return atom_;
- case ALL:
- return "";
- case AND: {
- string s = "";
- for (size_t i = 0; i < subs_->size(); i++) {
- if (i > 0)
- s += " ";
- Prefilter* sub = (*subs_)[i];
- s += sub ? sub->DebugString() : "<nil>";
- }
- return s;
- }
- case OR: {
- string s = "(";
- for (size_t i = 0; i < subs_->size(); i++) {
- if (i > 0)
- s += "|";
- Prefilter* sub = (*subs_)[i];
- s += sub ? sub->DebugString() : "<nil>";
- }
- s += ")";
- return s;
- }
- }
-}
-
-Prefilter* Prefilter::FromRE2(const RE2* re2) {
- if (re2 == NULL)
- return NULL;
-
- Regexp* regexp = re2->Regexp();
- if (regexp == NULL)
- return NULL;
-
- return FromRegexp(regexp);
-}
-
-
-} // namespace re2
« no previous file with comments | « third_party/re2/re2/prefilter.h ('k') | third_party/re2/re2/prefilter_tree.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698