| Index: third_party/re2/re2/prog.cc
|
| diff --git a/third_party/re2/re2/prog.cc b/third_party/re2/re2/prog.cc
|
| deleted file mode 100644
|
| index 499f560f8d64fc01752a3501f1f0bd9f14c3b803..0000000000000000000000000000000000000000
|
| --- a/third_party/re2/re2/prog.cc
|
| +++ /dev/null
|
| @@ -1,343 +0,0 @@
|
| -// Copyright 2007 The RE2 Authors. All Rights Reserved.
|
| -// Use of this source code is governed by a BSD-style
|
| -// license that can be found in the LICENSE file.
|
| -
|
| -// Compiled regular expression representation.
|
| -// Tested by compile_test.cc
|
| -
|
| -#include "util/util.h"
|
| -#include "util/sparse_set.h"
|
| -#include "re2/prog.h"
|
| -#include "re2/stringpiece.h"
|
| -
|
| -namespace re2 {
|
| -
|
| -// Constructors per Inst opcode
|
| -
|
| -void Prog::Inst::InitAlt(uint32 out, uint32 out1) {
|
| - DCHECK_EQ(out_opcode_, 0);
|
| - set_out_opcode(out, kInstAlt);
|
| - out1_ = out1;
|
| -}
|
| -
|
| -void Prog::Inst::InitByteRange(int lo, int hi, int foldcase, uint32 out) {
|
| - DCHECK_EQ(out_opcode_, 0);
|
| - set_out_opcode(out, kInstByteRange);
|
| - lo_ = lo & 0xFF;
|
| - hi_ = hi & 0xFF;
|
| - foldcase_ = foldcase & 0xFF;
|
| -}
|
| -
|
| -void Prog::Inst::InitCapture(int cap, uint32 out) {
|
| - DCHECK_EQ(out_opcode_, 0);
|
| - set_out_opcode(out, kInstCapture);
|
| - cap_ = cap;
|
| -}
|
| -
|
| -void Prog::Inst::InitEmptyWidth(EmptyOp empty, uint32 out) {
|
| - DCHECK_EQ(out_opcode_, 0);
|
| - set_out_opcode(out, kInstEmptyWidth);
|
| - empty_ = empty;
|
| -}
|
| -
|
| -void Prog::Inst::InitMatch(int32 id) {
|
| - DCHECK_EQ(out_opcode_, 0);
|
| - set_opcode(kInstMatch);
|
| - match_id_ = id;
|
| -}
|
| -
|
| -void Prog::Inst::InitNop(uint32 out) {
|
| - DCHECK_EQ(out_opcode_, 0);
|
| - set_opcode(kInstNop);
|
| -}
|
| -
|
| -void Prog::Inst::InitFail() {
|
| - DCHECK_EQ(out_opcode_, 0);
|
| - set_opcode(kInstFail);
|
| -}
|
| -
|
| -string Prog::Inst::Dump() {
|
| - switch (opcode()) {
|
| - default:
|
| - return StringPrintf("opcode %d", static_cast<int>(opcode()));
|
| -
|
| - case kInstAlt:
|
| - return StringPrintf("alt -> %d | %d", out(), out1_);
|
| -
|
| - case kInstAltMatch:
|
| - return StringPrintf("altmatch -> %d | %d", out(), out1_);
|
| -
|
| - case kInstByteRange:
|
| - return StringPrintf("byte%s [%02x-%02x] -> %d",
|
| - foldcase_ ? "/i" : "",
|
| - lo_, hi_, out());
|
| -
|
| - case kInstCapture:
|
| - return StringPrintf("capture %d -> %d", cap_, out());
|
| -
|
| - case kInstEmptyWidth:
|
| - return StringPrintf("emptywidth %#x -> %d",
|
| - static_cast<int>(empty_), out());
|
| -
|
| - case kInstMatch:
|
| - return StringPrintf("match! %d", match_id());
|
| -
|
| - case kInstNop:
|
| - return StringPrintf("nop -> %d", out());
|
| -
|
| - case kInstFail:
|
| - return StringPrintf("fail");
|
| - }
|
| -}
|
| -
|
| -Prog::Prog()
|
| - : anchor_start_(false),
|
| - anchor_end_(false),
|
| - reversed_(false),
|
| - did_onepass_(false),
|
| - start_(0),
|
| - start_unanchored_(0),
|
| - size_(0),
|
| - byte_inst_count_(0),
|
| - bytemap_range_(0),
|
| - flags_(0),
|
| - onepass_statesize_(0),
|
| - inst_(NULL),
|
| - dfa_first_(NULL),
|
| - dfa_longest_(NULL),
|
| - dfa_mem_(0),
|
| - delete_dfa_(NULL),
|
| - unbytemap_(NULL),
|
| - onepass_nodes_(NULL),
|
| - onepass_start_(NULL) {
|
| -}
|
| -
|
| -Prog::~Prog() {
|
| - if (delete_dfa_) {
|
| - if (dfa_first_)
|
| - delete_dfa_(dfa_first_);
|
| - if (dfa_longest_)
|
| - delete_dfa_(dfa_longest_);
|
| - }
|
| - delete[] onepass_nodes_;
|
| - delete[] inst_;
|
| - delete[] unbytemap_;
|
| -}
|
| -
|
| -typedef SparseSet Workq;
|
| -
|
| -static inline void AddToQueue(Workq* q, int id) {
|
| - if (id != 0)
|
| - q->insert(id);
|
| -}
|
| -
|
| -static string ProgToString(Prog* prog, Workq* q) {
|
| - string s;
|
| -
|
| - for (Workq::iterator i = q->begin(); i != q->end(); ++i) {
|
| - int id = *i;
|
| - Prog::Inst* ip = prog->inst(id);
|
| - StringAppendF(&s, "%d. %s\n", id, ip->Dump().c_str());
|
| - AddToQueue(q, ip->out());
|
| - if (ip->opcode() == kInstAlt || ip->opcode() == kInstAltMatch)
|
| - AddToQueue(q, ip->out1());
|
| - }
|
| - return s;
|
| -}
|
| -
|
| -string Prog::Dump() {
|
| - string map;
|
| - if (false) { // Debugging
|
| - int lo = 0;
|
| - StringAppendF(&map, "byte map:\n");
|
| - for (int i = 0; i < bytemap_range_; i++) {
|
| - StringAppendF(&map, "\t%d. [%02x-%02x]\n", i, lo, unbytemap_[i]);
|
| - lo = unbytemap_[i] + 1;
|
| - }
|
| - StringAppendF(&map, "\n");
|
| - }
|
| -
|
| - Workq q(size_);
|
| - AddToQueue(&q, start_);
|
| - return map + ProgToString(this, &q);
|
| -}
|
| -
|
| -string Prog::DumpUnanchored() {
|
| - Workq q(size_);
|
| - AddToQueue(&q, start_unanchored_);
|
| - return ProgToString(this, &q);
|
| -}
|
| -
|
| -static bool IsMatch(Prog*, Prog::Inst*);
|
| -
|
| -// Peep-hole optimizer.
|
| -void Prog::Optimize() {
|
| - Workq q(size_);
|
| -
|
| - // Eliminate nops. Most are taken out during compilation
|
| - // but a few are hard to avoid.
|
| - q.clear();
|
| - AddToQueue(&q, start_);
|
| - for (Workq::iterator i = q.begin(); i != q.end(); ++i) {
|
| - int id = *i;
|
| -
|
| - Inst* ip = inst(id);
|
| - int j = ip->out();
|
| - Inst* jp;
|
| - while (j != 0 && (jp=inst(j))->opcode() == kInstNop) {
|
| - j = jp->out();
|
| - }
|
| - ip->set_out(j);
|
| - AddToQueue(&q, ip->out());
|
| -
|
| - if (ip->opcode() == kInstAlt) {
|
| - j = ip->out1();
|
| - while (j != 0 && (jp=inst(j))->opcode() == kInstNop) {
|
| - j = jp->out();
|
| - }
|
| - ip->out1_ = j;
|
| - AddToQueue(&q, ip->out1());
|
| - }
|
| - }
|
| -
|
| - // Insert kInstAltMatch instructions
|
| - // Look for
|
| - // ip: Alt -> j | k
|
| - // j: ByteRange [00-FF] -> ip
|
| - // k: Match
|
| - // or the reverse (the above is the greedy one).
|
| - // Rewrite Alt to AltMatch.
|
| - q.clear();
|
| - AddToQueue(&q, start_);
|
| - for (Workq::iterator i = q.begin(); i != q.end(); ++i) {
|
| - int id = *i;
|
| - Inst* ip = inst(id);
|
| - AddToQueue(&q, ip->out());
|
| - if (ip->opcode() == kInstAlt)
|
| - AddToQueue(&q, ip->out1());
|
| -
|
| - if (ip->opcode() == kInstAlt) {
|
| - Inst* j = inst(ip->out());
|
| - Inst* k = inst(ip->out1());
|
| - if (j->opcode() == kInstByteRange && j->out() == id &&
|
| - j->lo() == 0x00 && j->hi() == 0xFF &&
|
| - IsMatch(this, k)) {
|
| - ip->set_opcode(kInstAltMatch);
|
| - continue;
|
| - }
|
| - if (IsMatch(this, j) &&
|
| - k->opcode() == kInstByteRange && k->out() == id &&
|
| - k->lo() == 0x00 && k->hi() == 0xFF) {
|
| - ip->set_opcode(kInstAltMatch);
|
| - }
|
| - }
|
| - }
|
| -}
|
| -
|
| -// Is ip a guaranteed match at end of text, perhaps after some capturing?
|
| -static bool IsMatch(Prog* prog, Prog::Inst* ip) {
|
| - for (;;) {
|
| - switch (ip->opcode()) {
|
| - default:
|
| - LOG(DFATAL) << "Unexpected opcode in IsMatch: " << ip->opcode();
|
| - return false;
|
| -
|
| - case kInstAlt:
|
| - case kInstAltMatch:
|
| - case kInstByteRange:
|
| - case kInstFail:
|
| - case kInstEmptyWidth:
|
| - return false;
|
| -
|
| - case kInstCapture:
|
| - case kInstNop:
|
| - ip = prog->inst(ip->out());
|
| - break;
|
| -
|
| - case kInstMatch:
|
| - return true;
|
| - }
|
| - }
|
| -}
|
| -
|
| -uint32 Prog::EmptyFlags(const StringPiece& text, const char* p) {
|
| - int flags = 0;
|
| -
|
| - // ^ and \A
|
| - if (p == text.begin())
|
| - flags |= kEmptyBeginText | kEmptyBeginLine;
|
| - else if (p[-1] == '\n')
|
| - flags |= kEmptyBeginLine;
|
| -
|
| - // $ and \z
|
| - if (p == text.end())
|
| - flags |= kEmptyEndText | kEmptyEndLine;
|
| - else if (p < text.end() && p[0] == '\n')
|
| - flags |= kEmptyEndLine;
|
| -
|
| - // \b and \B
|
| - if (p == text.begin() && p == text.end()) {
|
| - // no word boundary here
|
| - } else if (p == text.begin()) {
|
| - if (IsWordChar(p[0]))
|
| - flags |= kEmptyWordBoundary;
|
| - } else if (p == text.end()) {
|
| - if (IsWordChar(p[-1]))
|
| - flags |= kEmptyWordBoundary;
|
| - } else {
|
| - if (IsWordChar(p[-1]) != IsWordChar(p[0]))
|
| - flags |= kEmptyWordBoundary;
|
| - }
|
| - if (!(flags & kEmptyWordBoundary))
|
| - flags |= kEmptyNonWordBoundary;
|
| -
|
| - return flags;
|
| -}
|
| -
|
| -void Prog::MarkByteRange(int lo, int hi) {
|
| - DCHECK_GE(lo, 0);
|
| - DCHECK_GE(hi, 0);
|
| - DCHECK_LE(lo, 255);
|
| - DCHECK_LE(hi, 255);
|
| - DCHECK_LE(lo, hi);
|
| - if (0 < lo && lo <= 255)
|
| - byterange_.Set(lo - 1);
|
| - if (0 <= hi && hi <= 255)
|
| - byterange_.Set(hi);
|
| -}
|
| -
|
| -void Prog::ComputeByteMap() {
|
| - // Fill in bytemap with byte classes for prog_.
|
| - // Ranges of bytes that are treated as indistinguishable
|
| - // by the regexp program are mapped to a single byte class.
|
| - // The vector prog_->byterange() marks the end of each
|
| - // such range.
|
| - const Bitmap<256>& v = byterange();
|
| -
|
| - COMPILE_ASSERT(8*sizeof(v.Word(0)) == 32, wordsize);
|
| - uint8 n = 0;
|
| - uint32 bits = 0;
|
| - for (int i = 0; i < 256; i++) {
|
| - if ((i&31) == 0)
|
| - bits = v.Word(i >> 5);
|
| - bytemap_[i] = n;
|
| - n += bits & 1;
|
| - bits >>= 1;
|
| - }
|
| - bytemap_range_ = bytemap_[255] + 1;
|
| - unbytemap_ = new uint8[bytemap_range_];
|
| - for (int i = 0; i < 256; i++)
|
| - unbytemap_[bytemap_[i]] = static_cast<uint8>(i);
|
| -
|
| - if (0) { // For debugging: use trivial byte map.
|
| - for (int i = 0; i < 256; i++) {
|
| - bytemap_[i] = static_cast<uint8>(i);
|
| - unbytemap_[i] = static_cast<uint8>(i);
|
| - }
|
| - bytemap_range_ = 256;
|
| - LOG(INFO) << "Using trivial bytemap.";
|
| - }
|
| -}
|
| -
|
| -} // namespace re2
|
| -
|
|
|