third_party/re2/re2/regexp.cc - Issue 1544433002: Replace RE2 import with a dependency

Unified Diff: third_party/re2/re2/regexp.cc

Issue 1544433002: Replace RE2 import with a dependency (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Re-Added LICENSE and OWNERS file Created 5 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: third_party/re2/re2/regexp.cc

diff --git a/third_party/re2/re2/regexp.cc b/third_party/re2/re2/regexp.cc

deleted file mode 100644

index 99e72e50d55f350ace2ad9162365ac198e1b09a8..0000000000000000000000000000000000000000

--- a/third_party/re2/re2/regexp.cc

+++ /dev/null

@@ -1,938 +0,0 @@

-// Use of this source code is governed by a BSD-style

-// license that can be found in the LICENSE file.

-// Regular expression representation.

-// Tested by parse_test.cc

-#include "util/util.h"

-#include "re2/regexp.h"

-#include "re2/stringpiece.h"

-#include "re2/walker-inl.h"

-namespace re2 {

-// Constructor. Allocates vectors as appropriate for operator.

-Regexp::Regexp(RegexpOp op, ParseFlags parse_flags)

- : op_(static_cast<uint8>(op)),

- simple_(false),

- parse_flags_(static_cast<uint16>(parse_flags)),

- ref_(1),

- nsub_(0),

- down_(NULL) {

- subone_ = NULL;

- memset(the_union_, 0, sizeof the_union_);

-// Destructor. Assumes already cleaned up children.

-// Private: use Decref() instead of delete to destroy Regexps.

-// Can't call Decref on the sub-Regexps here because

-// that could cause arbitrarily deep recursion, so

-// required Decref() to have handled them for us.

-Regexp::~Regexp() {

- if (nsub_ > 0)

- LOG(DFATAL) << "Regexp not destroyed.";

- switch (op_) {

- default:

- break;

- case kRegexpCapture:

- delete name_;

- break;

- case kRegexpLiteralString:

- delete[] runes_;

- break;

- case kRegexpCharClass:

- if (cc_)

- cc_->Delete();

- delete ccb_;

- break;

- }

-// If it's possible to destroy this regexp without recurring,

-// do so and return true. Else return false.

-bool Regexp::QuickDestroy() {

- if (nsub_ == 0) {

- delete this;

- return true;

- }

- return false;

-static map<Regexp*, int> *ref_map;

-GLOBAL_MUTEX(ref_mutex);

-int Regexp::Ref() {

- if (ref_ < kMaxRef)

- return ref_;

- GLOBAL_MUTEX_LOCK(ref_mutex);

- int r = 0;

- if (ref_map != NULL) {

- r = (*ref_map)[this];

- }

- GLOBAL_MUTEX_UNLOCK(ref_mutex);

- return r;

-// Increments reference count, returns object as convenience.

-Regexp* Regexp::Incref() {

- if (ref_ >= kMaxRef-1) {

- // Store ref count in overflow map.

- GLOBAL_MUTEX_LOCK(ref_mutex);

- if (ref_map == NULL) {

- ref_map = new map<Regexp*, int>;

- }

- if (ref_ == kMaxRef) {

- // already overflowed

- (*ref_map)[this]++;

- } else {

- // overflowing now

- (*ref_map)[this] = kMaxRef;

- ref_ = kMaxRef;

- }

- GLOBAL_MUTEX_UNLOCK(ref_mutex);

- return this;

- }

- ref_++;

- return this;

-// Decrements reference count and deletes this object if count reaches 0.

-void Regexp::Decref() {

- if (ref_ == kMaxRef) {

- // Ref count is stored in overflow map.

- GLOBAL_MUTEX_LOCK(ref_mutex);

- int r = (*ref_map)[this] - 1;

- if (r < kMaxRef) {

- ref_ = static_cast<uint16>(r);

- ref_map->erase(this);

- } else {

- (*ref_map)[this] = r;

- }

- GLOBAL_MUTEX_UNLOCK(ref_mutex);

- return;

- }

- ref_--;

- if (ref_ == 0)

- Destroy();

-// Deletes this object; ref count has count reached 0.

-void Regexp::Destroy() {

- if (QuickDestroy())

- return;

- // Handle recursive Destroy with explicit stack

- // to avoid arbitrarily deep recursion on process stack [sigh].

- down_ = NULL;

- Regexp* stack = this;

- while (stack != NULL) {

- Regexp* re = stack;

- stack = re->down_;

- if (re->ref_ != 0)

- LOG(DFATAL) << "Bad reference count " << re->ref_;

- if (re->nsub_ > 0) {

- Regexp** subs = re->sub();

- for (int i = 0; i < re->nsub_; i++) {

- Regexp* sub = subs[i];

- if (sub == NULL)

- continue;

- if (sub->ref_ == kMaxRef)

- sub->Decref();

- else

- --sub->ref_;

- if (sub->ref_ == 0 && !sub->QuickDestroy()) {

- sub->down_ = stack;

- stack = sub;

- }

- if (re->nsub_ > 1)

- delete[] subs;

- re->nsub_ = 0;

- }

- delete re;

- }

-void Regexp::AddRuneToString(Rune r) {

- DCHECK(op_ == kRegexpLiteralString);

- if (nrunes_ == 0) {

- // start with 8

- runes_ = new Rune[8];

- } else if (nrunes_ >= 8 && (nrunes_ & (nrunes_ - 1)) == 0) {

- // double on powers of two

- Rune *old = runes_;

- runes_ = new Rune[nrunes_ * 2];

- for (int i = 0; i < nrunes_; i++)

- runes_[i] = old[i];

- delete[] old;

- }

- runes_[nrunes_++] = r;

-Regexp* Regexp::HaveMatch(int match_id, ParseFlags flags) {

- Regexp* re = new Regexp(kRegexpHaveMatch, flags);

- re->match_id_ = match_id;

- return re;

-Regexp* Regexp::Plus(Regexp* sub, ParseFlags flags) {

- if (sub->op() == kRegexpPlus && sub->parse_flags() == flags)

- return sub;

- Regexp* re = new Regexp(kRegexpPlus, flags);

- re->AllocSub(1);

- re->sub()[0] = sub;

- return re;

-Regexp* Regexp::Star(Regexp* sub, ParseFlags flags) {

- if (sub->op() == kRegexpStar && sub->parse_flags() == flags)

- return sub;

- Regexp* re = new Regexp(kRegexpStar, flags);

- re->AllocSub(1);

- re->sub()[0] = sub;

- return re;

-Regexp* Regexp::Quest(Regexp* sub, ParseFlags flags) {

- if (sub->op() == kRegexpQuest && sub->parse_flags() == flags)

- return sub;

- Regexp* re = new Regexp(kRegexpQuest, flags);

- re->AllocSub(1);

- re->sub()[0] = sub;

- return re;

-Regexp* Regexp::ConcatOrAlternate(RegexpOp op, Regexp** sub, int nsub,

- ParseFlags flags, bool can_factor) {

- if (nsub == 1)

- return sub[0];

- if (nsub == 0) {

- if (op == kRegexpAlternate)

- return new Regexp(kRegexpNoMatch, flags);

- else

- return new Regexp(kRegexpEmptyMatch, flags);

- }

- Regexp** subcopy = NULL;

- if (op == kRegexpAlternate && can_factor) {

- // Going to edit sub; make a copy so we don't step on caller.

- subcopy = new Regexp*[nsub];

- memmove(subcopy, sub, nsub * sizeof sub[0]);

- sub = subcopy;

- nsub = FactorAlternation(sub, nsub, flags);

- if (nsub == 1) {

- Regexp* re = sub[0];

- delete[] subcopy;

- return re;

- }

- if (nsub > kMaxNsub) {

- // Too many subexpressions to fit in a single Regexp.

- // Make a two-level tree. Two levels gets us to 65535^2.

- int nbigsub = (nsub+kMaxNsub-1)/kMaxNsub;

- Regexp* re = new Regexp(op, flags);

- re->AllocSub(nbigsub);

- Regexp** subs = re->sub();

- for (int i = 0; i < nbigsub - 1; i++)

- subs[i] = ConcatOrAlternate(op, sub+i*kMaxNsub, kMaxNsub, flags, false);

- subs[nbigsub - 1] = ConcatOrAlternate(op, sub+(nbigsub-1)*kMaxNsub,

- nsub - (nbigsub-1)*kMaxNsub, flags,

- false);

- delete[] subcopy;

- return re;

- }

- Regexp* re = new Regexp(op, flags);

- re->AllocSub(nsub);

- Regexp** subs = re->sub();

- for (int i = 0; i < nsub; i++)

- subs[i] = sub[i];

- delete[] subcopy;

- return re;

-Regexp* Regexp::Concat(Regexp** sub, int nsub, ParseFlags flags) {

- return ConcatOrAlternate(kRegexpConcat, sub, nsub, flags, false);

-Regexp* Regexp::Alternate(Regexp** sub, int nsub, ParseFlags flags) {

- return ConcatOrAlternate(kRegexpAlternate, sub, nsub, flags, true);

-Regexp* Regexp::AlternateNoFactor(Regexp** sub, int nsub, ParseFlags flags) {

- return ConcatOrAlternate(kRegexpAlternate, sub, nsub, flags, false);

-Regexp* Regexp::Capture(Regexp* sub, ParseFlags flags, int cap) {

- Regexp* re = new Regexp(kRegexpCapture, flags);

- re->AllocSub(1);

- re->sub()[0] = sub;

- re->cap_ = cap;

- return re;

-Regexp* Regexp::Repeat(Regexp* sub, ParseFlags flags, int min, int max) {

- Regexp* re = new Regexp(kRegexpRepeat, flags);

- re->AllocSub(1);

- re->sub()[0] = sub;

- re->min_ = min;

- re->max_ = max;

- return re;

-Regexp* Regexp::NewLiteral(Rune rune, ParseFlags flags) {

- Regexp* re = new Regexp(kRegexpLiteral, flags);

- re->rune_ = rune;

- return re;

-Regexp* Regexp::LiteralString(Rune* runes, int nrunes, ParseFlags flags) {

- if (nrunes <= 0)

- return new Regexp(kRegexpEmptyMatch, flags);

- if (nrunes == 1)

- return NewLiteral(runes[0], flags);

- Regexp* re = new Regexp(kRegexpLiteralString, flags);

- for (int i = 0; i < nrunes; i++)

- re->AddRuneToString(runes[i]);

- return re;

-Regexp* Regexp::NewCharClass(CharClass* cc, ParseFlags flags) {

- Regexp* re = new Regexp(kRegexpCharClass, flags);

- re->cc_ = cc;

- return re;

-// Swaps this and that in place.

-void Regexp::Swap(Regexp* that) {

- // Can use memmove because Regexp is just a struct (no vtable).

- char tmp[sizeof *this];

- memmove(tmp, this, sizeof tmp);

- memmove(this, that, sizeof tmp);

- memmove(that, tmp, sizeof tmp);

-// Tests equality of all top-level structure but not subregexps.

-static bool TopEqual(Regexp* a, Regexp* b) {

- if (a->op() != b->op())

- return false;

- switch (a->op()) {

- case kRegexpNoMatch:

- case kRegexpEmptyMatch:

- case kRegexpAnyChar:

- case kRegexpAnyByte:

- case kRegexpBeginLine:

- case kRegexpEndLine:

- case kRegexpWordBoundary:

- case kRegexpNoWordBoundary:

- case kRegexpBeginText:

- return true;

- case kRegexpEndText:

- // The parse flags remember whether it's \z or (?-m:$),

- // which matters when testing against PCRE.

- return ((a->parse_flags() ^ b->parse_flags()) & Regexp::WasDollar) == 0;

- case kRegexpLiteral:

- return a->rune() == b->rune() &&

- ((a->parse_flags() ^ b->parse_flags()) & Regexp::FoldCase) == 0;

- case kRegexpLiteralString:

- return a->nrunes() == b->nrunes() &&

- ((a->parse_flags() ^ b->parse_flags()) & Regexp::FoldCase) == 0 &&

- memcmp(a->runes(), b->runes(),

- a->nrunes() * sizeof a->runes()[0]) == 0;

- case kRegexpAlternate:

- case kRegexpConcat:

- return a->nsub() == b->nsub();

- case kRegexpStar:

- case kRegexpPlus:

- case kRegexpQuest:

- return ((a->parse_flags() ^ b->parse_flags()) & Regexp::NonGreedy) == 0;

- case kRegexpRepeat:

- return ((a->parse_flags() ^ b->parse_flags()) & Regexp::NonGreedy) == 0 &&

- a->min() == b->min() &&

- a->max() == b->max();

- case kRegexpCapture:

- return a->cap() == b->cap() && a->name() == b->name();

- case kRegexpHaveMatch:

- return a->match_id() == b->match_id();

- case kRegexpCharClass: {

- CharClass* acc = a->cc();

- CharClass* bcc = b->cc();

- return acc->size() == bcc->size() &&

- acc->end() - acc->begin() == bcc->end() - bcc->begin() &&

- memcmp(acc->begin(), bcc->begin(),

- (acc->end() - acc->begin()) * sizeof acc->begin()[0]) == 0;

- }

- LOG(DFATAL) << "Unexpected op in Regexp::Equal: " << a->op();

- return 0;

-bool Regexp::Equal(Regexp* a, Regexp* b) {

- if (a == NULL || b == NULL)

- return a == b;

- if (!TopEqual(a, b))

- return false;

- // Fast path:

- // return without allocating vector if there are no subregexps.

- switch (a->op()) {

- case kRegexpAlternate:

- case kRegexpConcat:

- case kRegexpStar:

- case kRegexpPlus:

- case kRegexpQuest:

- case kRegexpRepeat:

- case kRegexpCapture:

- break;

- default:

- return true;

- }

- // Committed to doing real work.

- // The stack (vector) has pairs of regexps waiting to

- // be compared. The regexps are only equal if

- // all the pairs end up being equal.

- vector<Regexp*> stk;

- for (;;) {

- // Invariant: TopEqual(a, b) == true.

- Regexp* a2;

- Regexp* b2;

- switch (a->op()) {

- default:

- break;

- case kRegexpAlternate:

- case kRegexpConcat:

- for (int i = 0; i < a->nsub(); i++) {

- a2 = a->sub()[i];

- b2 = b->sub()[i];

- if (!TopEqual(a2, b2))

- return false;

- stk.push_back(a2);

- stk.push_back(b2);

- }

- break;

- case kRegexpStar:

- case kRegexpPlus:

- case kRegexpQuest:

- case kRegexpRepeat:

- case kRegexpCapture:

- a2 = a->sub()[0];

- b2 = b->sub()[0];

- if (!TopEqual(a2, b2))

- return false;

- // Really:

- // stk.push_back(a2);

- // stk.push_back(b2);

- // break;

- // but faster to assign directly and loop.

- a = a2;

- b = b2;

- continue;

- }

- size_t n = stk.size();

- if (n == 0)

- break;

- DCHECK_GE(n, 2);

- a = stk[n-2];

- b = stk[n-1];

- stk.resize(n-2);

- }

- return true;

-// Keep in sync with enum RegexpStatusCode in regexp.h

-static const char *kErrorStrings[] = {

- "no error",

- "unexpected error",

- "invalid escape sequence",

- "invalid character class",

- "invalid character class range",

- "missing ]",

- "missing )",

- "trailing \\",

- "no argument for repetition operator",

- "invalid repetition size",

- "bad repetition operator",

- "invalid perl operator",

- "invalid UTF-8",

- "invalid named capture group",

-};

-string RegexpStatus::CodeText(enum RegexpStatusCode code) {

- if (code < 0 || code >= arraysize(kErrorStrings))

- code = kRegexpInternalError;

- return kErrorStrings[code];

-string RegexpStatus::Text() const {

- if (error_arg_.empty())

- return CodeText(code_);

- string s;

- s.append(CodeText(code_));

- s.append(": ");

- s.append(error_arg_.data(), error_arg_.size());

- return s;

-void RegexpStatus::Copy(const RegexpStatus& status) {

- code_ = status.code_;

- error_arg_ = status.error_arg_;

-typedef int Ignored; // Walker<void> doesn't exist

-// Walker subclass to count capturing parens in regexp.

-class NumCapturesWalker : public Regexp::Walker<Ignored> {

- public:

- NumCapturesWalker() : ncapture_(0) {}

- int ncapture() { return ncapture_; }

- virtual Ignored PreVisit(Regexp* re, Ignored ignored, bool* stop) {

- if (re->op() == kRegexpCapture)

- ncapture_++;

- return ignored;

- }

- virtual Ignored ShortVisit(Regexp* re, Ignored ignored) {

- // Should never be called: we use Walk not WalkExponential.

- LOG(DFATAL) << "NumCapturesWalker::ShortVisit called";

- return ignored;

- }

- private:

- int ncapture_;

- DISALLOW_COPY_AND_ASSIGN(NumCapturesWalker);

-};

-int Regexp::NumCaptures() {

- NumCapturesWalker w;

- w.Walk(this, 0);

- return w.ncapture();

-// Walker class to build map of named capture groups and their indices.

-class NamedCapturesWalker : public Regexp::Walker<Ignored> {

- public:

- NamedCapturesWalker() : map_(NULL) {}

- ~NamedCapturesWalker() { delete map_; }

- map<string, int>* TakeMap() {

- map<string, int>* m = map_;

- map_ = NULL;

- return m;

- }

- Ignored PreVisit(Regexp* re, Ignored ignored, bool* stop) {

- if (re->op() == kRegexpCapture && re->name() != NULL) {

- // Allocate map once we find a name.

- if (map_ == NULL)

- map_ = new map<string, int>;

- // Record first occurrence of each name.

- // (The rule is that if you have the same name

- // multiple times, only the leftmost one counts.)

- if (map_->find(*re->name()) == map_->end())

- (*map_)[*re->name()] = re->cap();

- }

- return ignored;

- }

- virtual Ignored ShortVisit(Regexp* re, Ignored ignored) {

- // Should never be called: we use Walk not WalkExponential.

- LOG(DFATAL) << "NamedCapturesWalker::ShortVisit called";

- return ignored;

- }

- private:

- map<string, int>* map_;

- DISALLOW_COPY_AND_ASSIGN(NamedCapturesWalker);

-};

-map<string, int>* Regexp::NamedCaptures() {

- NamedCapturesWalker w;

- w.Walk(this, 0);

- return w.TakeMap();

-// Walker class to build map from capture group indices to their names.

-class CaptureNamesWalker : public Regexp::Walker<Ignored> {

- public:

- CaptureNamesWalker() : map_(NULL) {}

- ~CaptureNamesWalker() { delete map_; }

- map<int, string>* TakeMap() {

- map<int, string>* m = map_;

- map_ = NULL;

- return m;

- }

- Ignored PreVisit(Regexp* re, Ignored ignored, bool* stop) {

- if (re->op() == kRegexpCapture && re->name() != NULL) {

- // Allocate map once we find a name.

- if (map_ == NULL)

- map_ = new map<int, string>;

- (*map_)[re->cap()] = *re->name();

- }

- return ignored;

- }

- virtual Ignored ShortVisit(Regexp* re, Ignored ignored) {

- // Should never be called: we use Walk not WalkExponential.

- LOG(DFATAL) << "CaptureNamesWalker::ShortVisit called";

- return ignored;

- }

- private:

- map<int, string>* map_;

- DISALLOW_COPY_AND_ASSIGN(CaptureNamesWalker);

-};

-map<int, string>* Regexp::CaptureNames() {

- CaptureNamesWalker w;

- w.Walk(this, 0);

- return w.TakeMap();

-// Determines whether regexp matches must be anchored

-// with a fixed string prefix. If so, returns the prefix and

-// the regexp that remains after the prefix. The prefix might

-// be ASCII case-insensitive.

-bool Regexp::RequiredPrefix(string *prefix, bool *foldcase, Regexp** suffix) {

- // No need for a walker: the regexp must be of the form

- // 1. some number of ^ anchors

- // 2. a literal char or string

- // 3. the rest

- prefix->clear();

- *foldcase = false;

- *suffix = NULL;

- if (op_ != kRegexpConcat)

- return false;

- // Some number of anchors, then a literal or concatenation.

- int i = 0;

- Regexp** sub = this->sub();

- while (i < nsub_ && sub[i]->op_ == kRegexpBeginText)

- i++;

- if (i == 0 || i >= nsub_)

- return false;

- Regexp* re = sub[i];

- switch (re->op_) {

- default:

- return false;

- case kRegexpLiteralString:

- // Convert to string in proper encoding.

- if (re->parse_flags() & Latin1) {

- prefix->resize(re->nrunes_);

- for (int j = 0; j < re->nrunes_; j++)

- (*prefix)[j] = static_cast<char>(re->runes_[j]);

- } else {

- // Convert to UTF-8 in place.

- // Assume worst-case space and then trim.

- prefix->resize(re->nrunes_ * UTFmax);

- char *p = &(*prefix)[0];

- for (int j = 0; j < re->nrunes_; j++) {

- Rune r = re->runes_[j];

- if (r < Runeself)

- *p++ = static_cast<char>(r);

- else

- p += runetochar(p, &r);

- }

- prefix->resize(p - &(*prefix)[0]);

- }

- break;

- case kRegexpLiteral:

- if ((re->parse_flags() & Latin1) || re->rune_ < Runeself) {

- prefix->append(1, static_cast<char>(re->rune_));

- } else {

- char buf[UTFmax];

- prefix->append(buf, runetochar(buf, &re->rune_));

- }

- break;

- }

- *foldcase = (sub[i]->parse_flags() & FoldCase) != 0;

- i++;

- // The rest.

- if (i < nsub_) {

- for (int j = i; j < nsub_; j++)

- sub[j]->Incref();

- re = Concat(sub + i, nsub_ - i, parse_flags());

- } else {

- re = new Regexp(kRegexpEmptyMatch, parse_flags());

- }

- *suffix = re;

- return true;

-// Character class builder is a balanced binary tree (STL set)

-// containing non-overlapping, non-abutting RuneRanges.

-// The less-than operator used in the tree treats two

-// ranges as equal if they overlap at all, so that

-// lookups for a particular Rune are possible.

-CharClassBuilder::CharClassBuilder() {

- nrunes_ = 0;

- upper_ = 0;

- lower_ = 0;

-// Add lo-hi to the class; return whether class got bigger.

-bool CharClassBuilder::AddRange(Rune lo, Rune hi) {

- if (hi < lo)

- return false;

- if (lo <= 'z' && hi >= 'A') {

- // Overlaps some alpha, maybe not all.

- // Update bitmaps telling which ASCII letters are in the set.

- Rune lo1 = max<Rune>(lo, 'A');

- Rune hi1 = min<Rune>(hi, 'Z');

- if (lo1 <= hi1)

- upper_ |= ((1 << (hi1 - lo1 + 1)) - 1) << (lo1 - 'A');

- lo1 = max<Rune>(lo, 'a');

- hi1 = min<Rune>(hi, 'z');

- if (lo1 <= hi1)

- lower_ |= ((1 << (hi1 - lo1 + 1)) - 1) << (lo1 - 'a');

- }

- { // Check whether lo, hi is already in the class.

- iterator it = ranges_.find(RuneRange(lo, lo));

- if (it != end() && it->lo <= lo && hi <= it->hi)

- return false;

- }

- // Look for a range abutting lo on the left.

- // If it exists, take it out and increase our range.

- if (lo > 0) {

- iterator it = ranges_.find(RuneRange(lo-1, lo-1));

- if (it != end()) {

- lo = it->lo;

- if (it->hi > hi)

- hi = it->hi;

- nrunes_ -= it->hi - it->lo + 1;

- ranges_.erase(it);

- }

- // Look for a range abutting hi on the right.

- // If it exists, take it out and increase our range.

- if (hi < Runemax) {

- iterator it = ranges_.find(RuneRange(hi+1, hi+1));

- if (it != end()) {

- hi = it->hi;

- nrunes_ -= it->hi - it->lo + 1;

- ranges_.erase(it);

- }

- // Look for ranges between lo and hi. Take them out.

- // This is only safe because the set has no overlapping ranges.

- // We've already removed any ranges abutting lo and hi, so

- // any that overlap [lo, hi] must be contained within it.

- for (;;) {

- iterator it = ranges_.find(RuneRange(lo, hi));

- if (it == end())

- break;

- nrunes_ -= it->hi - it->lo + 1;

- ranges_.erase(it);

- }

- // Finally, add [lo, hi].

- nrunes_ += hi - lo + 1;

- ranges_.insert(RuneRange(lo, hi));

- return true;

-void CharClassBuilder::AddCharClass(CharClassBuilder *cc) {

- for (iterator it = cc->begin(); it != cc->end(); ++it)

- AddRange(it->lo, it->hi);

-bool CharClassBuilder::Contains(Rune r) {

- return ranges_.find(RuneRange(r, r)) != end();

-// Does the character class behave the same on A-Z as on a-z?

-bool CharClassBuilder::FoldsASCII() {

- return ((upper_ ^ lower_) & AlphaMask) == 0;

-CharClassBuilder* CharClassBuilder::Copy() {

- CharClassBuilder* cc = new CharClassBuilder;

- for (iterator it = begin(); it != end(); ++it)

- cc->ranges_.insert(RuneRange(it->lo, it->hi));

- cc->upper_ = upper_;

- cc->lower_ = lower_;

- cc->nrunes_ = nrunes_;

- return cc;

-void CharClassBuilder::RemoveAbove(Rune r) {

- if (r >= Runemax)

- return;

- if (r < 'z') {

- if (r < 'a')

- lower_ = 0;

- else

- lower_ &= AlphaMask >> ('z' - r);

- }

- if (r < 'Z') {

- if (r < 'A')

- upper_ = 0;

- else

- upper_ &= AlphaMask >> ('Z' - r);

- }

- for (;;) {

- iterator it = ranges_.find(RuneRange(r + 1, Runemax));

- if (it == end())

- break;

- RuneRange rr = *it;

- ranges_.erase(it);

- nrunes_ -= rr.hi - rr.lo + 1;

- if (rr.lo <= r) {

- rr.hi = r;

- ranges_.insert(rr);

- nrunes_ += rr.hi - rr.lo + 1;

- }

-void CharClassBuilder::Negate() {

- // Build up negation and then copy in.

- // Could edit ranges in place, but C++ won't let me.

- vector<RuneRange> v;

- v.reserve(ranges_.size() + 1);

- // In negation, first range begins at 0, unless

- // the current class begins at 0.

- iterator it = begin();

- if (it == end()) {

- v.push_back(RuneRange(0, Runemax));

- } else {

- int nextlo = 0;

- if (it->lo == 0) {

- nextlo = it->hi + 1;

- ++it;

- }

- for (; it != end(); ++it) {

- v.push_back(RuneRange(nextlo, it->lo - 1));

- nextlo = it->hi + 1;

- }

- if (nextlo <= Runemax)

- v.push_back(RuneRange(nextlo, Runemax));

- }

- ranges_.clear();

- for (size_t i = 0; i < v.size(); i++)

- ranges_.insert(v[i]);

- upper_ = AlphaMask & ~upper_;

- lower_ = AlphaMask & ~lower_;

- nrunes_ = Runemax+1 - nrunes_;

-// Character class is a sorted list of ranges.

-// The ranges are allocated in the same block as the header,

-// necessitating a special allocator and Delete method.

-CharClass* CharClass::New(int maxranges) {

- CharClass* cc;

- uint8* data = new uint8[sizeof *cc + maxranges*sizeof cc->ranges_[0]];

- cc = reinterpret_cast<CharClass*>(data);

- cc->ranges_ = reinterpret_cast<RuneRange*>(data + sizeof *cc);

- cc->nranges_ = 0;

- cc->folds_ascii_ = false;

- cc->nrunes_ = 0;

- return cc;

-void CharClass::Delete() {

- uint8 *data = reinterpret_cast<uint8*>(this);

- delete[] data;

-CharClass* CharClass::Negate() {

- CharClass* cc = CharClass::New(nranges_+1);

- cc->folds_ascii_ = folds_ascii_;

- cc->nrunes_ = Runemax + 1 - nrunes_;

- int n = 0;

- int nextlo = 0;

- for (CharClass::iterator it = begin(); it != end(); ++it) {

- if (it->lo == nextlo) {

- nextlo = it->hi + 1;

- } else {

- cc->ranges_[n++] = RuneRange(nextlo, it->lo - 1);

- nextlo = it->hi + 1;

- }

- if (nextlo <= Runemax)

- cc->ranges_[n++] = RuneRange(nextlo, Runemax);

- cc->nranges_ = n;

- return cc;

-bool CharClass::Contains(Rune r) {

- RuneRange* rr = ranges_;

- int n = nranges_;

- while (n > 0) {

- int m = n/2;

- if (rr[m].hi < r) {

- rr += m+1;

- n -= m+1;

- } else if (r < rr[m].lo) {

- n = m;

- } else { // rr[m].lo <= r && r <= rr[m].hi

- return true;

- }

- return false;

-CharClass* CharClassBuilder::GetCharClass() {

- CharClass* cc = CharClass::New(static_cast<int>(ranges_.size()));

- int n = 0;

- for (iterator it = begin(); it != end(); ++it)

- cc->ranges_[n++] = *it;

- cc->nranges_ = n;

- DCHECK_LE(n, static_cast<int>(ranges_.size()));

- cc->nrunes_ = nrunes_;

- cc->folds_ascii_ = FoldsASCII();

- return cc;

-} // namespace re2

« no previous file with comments | « third_party/re2/re2/regexp.h ('k') | third_party/re2/re2/set.h » ('j') | no next file with comments »