Index: third_party/re2/re2/prog.cc |
diff --git a/third_party/re2/re2/prog.cc b/third_party/re2/re2/prog.cc |
deleted file mode 100644 |
index 499f560f8d64fc01752a3501f1f0bd9f14c3b803..0000000000000000000000000000000000000000 |
--- a/third_party/re2/re2/prog.cc |
+++ /dev/null |
@@ -1,343 +0,0 @@ |
-// Copyright 2007 The RE2 Authors. All Rights Reserved. |
-// Use of this source code is governed by a BSD-style |
-// license that can be found in the LICENSE file. |
- |
-// Compiled regular expression representation. |
-// Tested by compile_test.cc |
- |
-#include "util/util.h" |
-#include "util/sparse_set.h" |
-#include "re2/prog.h" |
-#include "re2/stringpiece.h" |
- |
-namespace re2 { |
- |
-// Constructors per Inst opcode |
- |
-void Prog::Inst::InitAlt(uint32 out, uint32 out1) { |
- DCHECK_EQ(out_opcode_, 0); |
- set_out_opcode(out, kInstAlt); |
- out1_ = out1; |
-} |
- |
-void Prog::Inst::InitByteRange(int lo, int hi, int foldcase, uint32 out) { |
- DCHECK_EQ(out_opcode_, 0); |
- set_out_opcode(out, kInstByteRange); |
- lo_ = lo & 0xFF; |
- hi_ = hi & 0xFF; |
- foldcase_ = foldcase & 0xFF; |
-} |
- |
-void Prog::Inst::InitCapture(int cap, uint32 out) { |
- DCHECK_EQ(out_opcode_, 0); |
- set_out_opcode(out, kInstCapture); |
- cap_ = cap; |
-} |
- |
-void Prog::Inst::InitEmptyWidth(EmptyOp empty, uint32 out) { |
- DCHECK_EQ(out_opcode_, 0); |
- set_out_opcode(out, kInstEmptyWidth); |
- empty_ = empty; |
-} |
- |
-void Prog::Inst::InitMatch(int32 id) { |
- DCHECK_EQ(out_opcode_, 0); |
- set_opcode(kInstMatch); |
- match_id_ = id; |
-} |
- |
-void Prog::Inst::InitNop(uint32 out) { |
- DCHECK_EQ(out_opcode_, 0); |
- set_opcode(kInstNop); |
-} |
- |
-void Prog::Inst::InitFail() { |
- DCHECK_EQ(out_opcode_, 0); |
- set_opcode(kInstFail); |
-} |
- |
-string Prog::Inst::Dump() { |
- switch (opcode()) { |
- default: |
- return StringPrintf("opcode %d", static_cast<int>(opcode())); |
- |
- case kInstAlt: |
- return StringPrintf("alt -> %d | %d", out(), out1_); |
- |
- case kInstAltMatch: |
- return StringPrintf("altmatch -> %d | %d", out(), out1_); |
- |
- case kInstByteRange: |
- return StringPrintf("byte%s [%02x-%02x] -> %d", |
- foldcase_ ? "/i" : "", |
- lo_, hi_, out()); |
- |
- case kInstCapture: |
- return StringPrintf("capture %d -> %d", cap_, out()); |
- |
- case kInstEmptyWidth: |
- return StringPrintf("emptywidth %#x -> %d", |
- static_cast<int>(empty_), out()); |
- |
- case kInstMatch: |
- return StringPrintf("match! %d", match_id()); |
- |
- case kInstNop: |
- return StringPrintf("nop -> %d", out()); |
- |
- case kInstFail: |
- return StringPrintf("fail"); |
- } |
-} |
- |
-Prog::Prog() |
- : anchor_start_(false), |
- anchor_end_(false), |
- reversed_(false), |
- did_onepass_(false), |
- start_(0), |
- start_unanchored_(0), |
- size_(0), |
- byte_inst_count_(0), |
- bytemap_range_(0), |
- flags_(0), |
- onepass_statesize_(0), |
- inst_(NULL), |
- dfa_first_(NULL), |
- dfa_longest_(NULL), |
- dfa_mem_(0), |
- delete_dfa_(NULL), |
- unbytemap_(NULL), |
- onepass_nodes_(NULL), |
- onepass_start_(NULL) { |
-} |
- |
-Prog::~Prog() { |
- if (delete_dfa_) { |
- if (dfa_first_) |
- delete_dfa_(dfa_first_); |
- if (dfa_longest_) |
- delete_dfa_(dfa_longest_); |
- } |
- delete[] onepass_nodes_; |
- delete[] inst_; |
- delete[] unbytemap_; |
-} |
- |
-typedef SparseSet Workq; |
- |
-static inline void AddToQueue(Workq* q, int id) { |
- if (id != 0) |
- q->insert(id); |
-} |
- |
-static string ProgToString(Prog* prog, Workq* q) { |
- string s; |
- |
- for (Workq::iterator i = q->begin(); i != q->end(); ++i) { |
- int id = *i; |
- Prog::Inst* ip = prog->inst(id); |
- StringAppendF(&s, "%d. %s\n", id, ip->Dump().c_str()); |
- AddToQueue(q, ip->out()); |
- if (ip->opcode() == kInstAlt || ip->opcode() == kInstAltMatch) |
- AddToQueue(q, ip->out1()); |
- } |
- return s; |
-} |
- |
-string Prog::Dump() { |
- string map; |
- if (false) { // Debugging |
- int lo = 0; |
- StringAppendF(&map, "byte map:\n"); |
- for (int i = 0; i < bytemap_range_; i++) { |
- StringAppendF(&map, "\t%d. [%02x-%02x]\n", i, lo, unbytemap_[i]); |
- lo = unbytemap_[i] + 1; |
- } |
- StringAppendF(&map, "\n"); |
- } |
- |
- Workq q(size_); |
- AddToQueue(&q, start_); |
- return map + ProgToString(this, &q); |
-} |
- |
-string Prog::DumpUnanchored() { |
- Workq q(size_); |
- AddToQueue(&q, start_unanchored_); |
- return ProgToString(this, &q); |
-} |
- |
-static bool IsMatch(Prog*, Prog::Inst*); |
- |
-// Peep-hole optimizer. |
-void Prog::Optimize() { |
- Workq q(size_); |
- |
- // Eliminate nops. Most are taken out during compilation |
- // but a few are hard to avoid. |
- q.clear(); |
- AddToQueue(&q, start_); |
- for (Workq::iterator i = q.begin(); i != q.end(); ++i) { |
- int id = *i; |
- |
- Inst* ip = inst(id); |
- int j = ip->out(); |
- Inst* jp; |
- while (j != 0 && (jp=inst(j))->opcode() == kInstNop) { |
- j = jp->out(); |
- } |
- ip->set_out(j); |
- AddToQueue(&q, ip->out()); |
- |
- if (ip->opcode() == kInstAlt) { |
- j = ip->out1(); |
- while (j != 0 && (jp=inst(j))->opcode() == kInstNop) { |
- j = jp->out(); |
- } |
- ip->out1_ = j; |
- AddToQueue(&q, ip->out1()); |
- } |
- } |
- |
- // Insert kInstAltMatch instructions |
- // Look for |
- // ip: Alt -> j | k |
- // j: ByteRange [00-FF] -> ip |
- // k: Match |
- // or the reverse (the above is the greedy one). |
- // Rewrite Alt to AltMatch. |
- q.clear(); |
- AddToQueue(&q, start_); |
- for (Workq::iterator i = q.begin(); i != q.end(); ++i) { |
- int id = *i; |
- Inst* ip = inst(id); |
- AddToQueue(&q, ip->out()); |
- if (ip->opcode() == kInstAlt) |
- AddToQueue(&q, ip->out1()); |
- |
- if (ip->opcode() == kInstAlt) { |
- Inst* j = inst(ip->out()); |
- Inst* k = inst(ip->out1()); |
- if (j->opcode() == kInstByteRange && j->out() == id && |
- j->lo() == 0x00 && j->hi() == 0xFF && |
- IsMatch(this, k)) { |
- ip->set_opcode(kInstAltMatch); |
- continue; |
- } |
- if (IsMatch(this, j) && |
- k->opcode() == kInstByteRange && k->out() == id && |
- k->lo() == 0x00 && k->hi() == 0xFF) { |
- ip->set_opcode(kInstAltMatch); |
- } |
- } |
- } |
-} |
- |
-// Is ip a guaranteed match at end of text, perhaps after some capturing? |
-static bool IsMatch(Prog* prog, Prog::Inst* ip) { |
- for (;;) { |
- switch (ip->opcode()) { |
- default: |
- LOG(DFATAL) << "Unexpected opcode in IsMatch: " << ip->opcode(); |
- return false; |
- |
- case kInstAlt: |
- case kInstAltMatch: |
- case kInstByteRange: |
- case kInstFail: |
- case kInstEmptyWidth: |
- return false; |
- |
- case kInstCapture: |
- case kInstNop: |
- ip = prog->inst(ip->out()); |
- break; |
- |
- case kInstMatch: |
- return true; |
- } |
- } |
-} |
- |
-uint32 Prog::EmptyFlags(const StringPiece& text, const char* p) { |
- int flags = 0; |
- |
- // ^ and \A |
- if (p == text.begin()) |
- flags |= kEmptyBeginText | kEmptyBeginLine; |
- else if (p[-1] == '\n') |
- flags |= kEmptyBeginLine; |
- |
- // $ and \z |
- if (p == text.end()) |
- flags |= kEmptyEndText | kEmptyEndLine; |
- else if (p < text.end() && p[0] == '\n') |
- flags |= kEmptyEndLine; |
- |
- // \b and \B |
- if (p == text.begin() && p == text.end()) { |
- // no word boundary here |
- } else if (p == text.begin()) { |
- if (IsWordChar(p[0])) |
- flags |= kEmptyWordBoundary; |
- } else if (p == text.end()) { |
- if (IsWordChar(p[-1])) |
- flags |= kEmptyWordBoundary; |
- } else { |
- if (IsWordChar(p[-1]) != IsWordChar(p[0])) |
- flags |= kEmptyWordBoundary; |
- } |
- if (!(flags & kEmptyWordBoundary)) |
- flags |= kEmptyNonWordBoundary; |
- |
- return flags; |
-} |
- |
-void Prog::MarkByteRange(int lo, int hi) { |
- DCHECK_GE(lo, 0); |
- DCHECK_GE(hi, 0); |
- DCHECK_LE(lo, 255); |
- DCHECK_LE(hi, 255); |
- DCHECK_LE(lo, hi); |
- if (0 < lo && lo <= 255) |
- byterange_.Set(lo - 1); |
- if (0 <= hi && hi <= 255) |
- byterange_.Set(hi); |
-} |
- |
-void Prog::ComputeByteMap() { |
- // Fill in bytemap with byte classes for prog_. |
- // Ranges of bytes that are treated as indistinguishable |
- // by the regexp program are mapped to a single byte class. |
- // The vector prog_->byterange() marks the end of each |
- // such range. |
- const Bitmap<256>& v = byterange(); |
- |
- COMPILE_ASSERT(8*sizeof(v.Word(0)) == 32, wordsize); |
- uint8 n = 0; |
- uint32 bits = 0; |
- for (int i = 0; i < 256; i++) { |
- if ((i&31) == 0) |
- bits = v.Word(i >> 5); |
- bytemap_[i] = n; |
- n += bits & 1; |
- bits >>= 1; |
- } |
- bytemap_range_ = bytemap_[255] + 1; |
- unbytemap_ = new uint8[bytemap_range_]; |
- for (int i = 0; i < 256; i++) |
- unbytemap_[bytemap_[i]] = static_cast<uint8>(i); |
- |
- if (0) { // For debugging: use trivial byte map. |
- for (int i = 0; i < 256; i++) { |
- bytemap_[i] = static_cast<uint8>(i); |
- unbytemap_[i] = static_cast<uint8>(i); |
- } |
- bytemap_range_ = 256; |
- LOG(INFO) << "Using trivial bytemap."; |
- } |
-} |
- |
-} // namespace re2 |
- |