Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(684)

Side by Side Diff: third_party/re2/re2/prog.cc

Issue 1544433002: Replace RE2 import with a dependency (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Re-Added LICENSE and OWNERS file Created 4 years, 12 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/re2/re2/prog.h ('k') | third_party/re2/re2/re2.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright 2007 The RE2 Authors. All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // Compiled regular expression representation.
6 // Tested by compile_test.cc
7
8 #include "util/util.h"
9 #include "util/sparse_set.h"
10 #include "re2/prog.h"
11 #include "re2/stringpiece.h"
12
13 namespace re2 {
14
15 // Constructors per Inst opcode
16
17 void Prog::Inst::InitAlt(uint32 out, uint32 out1) {
18 DCHECK_EQ(out_opcode_, 0);
19 set_out_opcode(out, kInstAlt);
20 out1_ = out1;
21 }
22
23 void Prog::Inst::InitByteRange(int lo, int hi, int foldcase, uint32 out) {
24 DCHECK_EQ(out_opcode_, 0);
25 set_out_opcode(out, kInstByteRange);
26 lo_ = lo & 0xFF;
27 hi_ = hi & 0xFF;
28 foldcase_ = foldcase & 0xFF;
29 }
30
31 void Prog::Inst::InitCapture(int cap, uint32 out) {
32 DCHECK_EQ(out_opcode_, 0);
33 set_out_opcode(out, kInstCapture);
34 cap_ = cap;
35 }
36
37 void Prog::Inst::InitEmptyWidth(EmptyOp empty, uint32 out) {
38 DCHECK_EQ(out_opcode_, 0);
39 set_out_opcode(out, kInstEmptyWidth);
40 empty_ = empty;
41 }
42
43 void Prog::Inst::InitMatch(int32 id) {
44 DCHECK_EQ(out_opcode_, 0);
45 set_opcode(kInstMatch);
46 match_id_ = id;
47 }
48
49 void Prog::Inst::InitNop(uint32 out) {
50 DCHECK_EQ(out_opcode_, 0);
51 set_opcode(kInstNop);
52 }
53
54 void Prog::Inst::InitFail() {
55 DCHECK_EQ(out_opcode_, 0);
56 set_opcode(kInstFail);
57 }
58
59 string Prog::Inst::Dump() {
60 switch (opcode()) {
61 default:
62 return StringPrintf("opcode %d", static_cast<int>(opcode()));
63
64 case kInstAlt:
65 return StringPrintf("alt -> %d | %d", out(), out1_);
66
67 case kInstAltMatch:
68 return StringPrintf("altmatch -> %d | %d", out(), out1_);
69
70 case kInstByteRange:
71 return StringPrintf("byte%s [%02x-%02x] -> %d",
72 foldcase_ ? "/i" : "",
73 lo_, hi_, out());
74
75 case kInstCapture:
76 return StringPrintf("capture %d -> %d", cap_, out());
77
78 case kInstEmptyWidth:
79 return StringPrintf("emptywidth %#x -> %d",
80 static_cast<int>(empty_), out());
81
82 case kInstMatch:
83 return StringPrintf("match! %d", match_id());
84
85 case kInstNop:
86 return StringPrintf("nop -> %d", out());
87
88 case kInstFail:
89 return StringPrintf("fail");
90 }
91 }
92
93 Prog::Prog()
94 : anchor_start_(false),
95 anchor_end_(false),
96 reversed_(false),
97 did_onepass_(false),
98 start_(0),
99 start_unanchored_(0),
100 size_(0),
101 byte_inst_count_(0),
102 bytemap_range_(0),
103 flags_(0),
104 onepass_statesize_(0),
105 inst_(NULL),
106 dfa_first_(NULL),
107 dfa_longest_(NULL),
108 dfa_mem_(0),
109 delete_dfa_(NULL),
110 unbytemap_(NULL),
111 onepass_nodes_(NULL),
112 onepass_start_(NULL) {
113 }
114
115 Prog::~Prog() {
116 if (delete_dfa_) {
117 if (dfa_first_)
118 delete_dfa_(dfa_first_);
119 if (dfa_longest_)
120 delete_dfa_(dfa_longest_);
121 }
122 delete[] onepass_nodes_;
123 delete[] inst_;
124 delete[] unbytemap_;
125 }
126
127 typedef SparseSet Workq;
128
129 static inline void AddToQueue(Workq* q, int id) {
130 if (id != 0)
131 q->insert(id);
132 }
133
134 static string ProgToString(Prog* prog, Workq* q) {
135 string s;
136
137 for (Workq::iterator i = q->begin(); i != q->end(); ++i) {
138 int id = *i;
139 Prog::Inst* ip = prog->inst(id);
140 StringAppendF(&s, "%d. %s\n", id, ip->Dump().c_str());
141 AddToQueue(q, ip->out());
142 if (ip->opcode() == kInstAlt || ip->opcode() == kInstAltMatch)
143 AddToQueue(q, ip->out1());
144 }
145 return s;
146 }
147
148 string Prog::Dump() {
149 string map;
150 if (false) { // Debugging
151 int lo = 0;
152 StringAppendF(&map, "byte map:\n");
153 for (int i = 0; i < bytemap_range_; i++) {
154 StringAppendF(&map, "\t%d. [%02x-%02x]\n", i, lo, unbytemap_[i]);
155 lo = unbytemap_[i] + 1;
156 }
157 StringAppendF(&map, "\n");
158 }
159
160 Workq q(size_);
161 AddToQueue(&q, start_);
162 return map + ProgToString(this, &q);
163 }
164
165 string Prog::DumpUnanchored() {
166 Workq q(size_);
167 AddToQueue(&q, start_unanchored_);
168 return ProgToString(this, &q);
169 }
170
171 static bool IsMatch(Prog*, Prog::Inst*);
172
173 // Peep-hole optimizer.
174 void Prog::Optimize() {
175 Workq q(size_);
176
177 // Eliminate nops. Most are taken out during compilation
178 // but a few are hard to avoid.
179 q.clear();
180 AddToQueue(&q, start_);
181 for (Workq::iterator i = q.begin(); i != q.end(); ++i) {
182 int id = *i;
183
184 Inst* ip = inst(id);
185 int j = ip->out();
186 Inst* jp;
187 while (j != 0 && (jp=inst(j))->opcode() == kInstNop) {
188 j = jp->out();
189 }
190 ip->set_out(j);
191 AddToQueue(&q, ip->out());
192
193 if (ip->opcode() == kInstAlt) {
194 j = ip->out1();
195 while (j != 0 && (jp=inst(j))->opcode() == kInstNop) {
196 j = jp->out();
197 }
198 ip->out1_ = j;
199 AddToQueue(&q, ip->out1());
200 }
201 }
202
203 // Insert kInstAltMatch instructions
204 // Look for
205 // ip: Alt -> j | k
206 // j: ByteRange [00-FF] -> ip
207 // k: Match
208 // or the reverse (the above is the greedy one).
209 // Rewrite Alt to AltMatch.
210 q.clear();
211 AddToQueue(&q, start_);
212 for (Workq::iterator i = q.begin(); i != q.end(); ++i) {
213 int id = *i;
214 Inst* ip = inst(id);
215 AddToQueue(&q, ip->out());
216 if (ip->opcode() == kInstAlt)
217 AddToQueue(&q, ip->out1());
218
219 if (ip->opcode() == kInstAlt) {
220 Inst* j = inst(ip->out());
221 Inst* k = inst(ip->out1());
222 if (j->opcode() == kInstByteRange && j->out() == id &&
223 j->lo() == 0x00 && j->hi() == 0xFF &&
224 IsMatch(this, k)) {
225 ip->set_opcode(kInstAltMatch);
226 continue;
227 }
228 if (IsMatch(this, j) &&
229 k->opcode() == kInstByteRange && k->out() == id &&
230 k->lo() == 0x00 && k->hi() == 0xFF) {
231 ip->set_opcode(kInstAltMatch);
232 }
233 }
234 }
235 }
236
237 // Is ip a guaranteed match at end of text, perhaps after some capturing?
238 static bool IsMatch(Prog* prog, Prog::Inst* ip) {
239 for (;;) {
240 switch (ip->opcode()) {
241 default:
242 LOG(DFATAL) << "Unexpected opcode in IsMatch: " << ip->opcode();
243 return false;
244
245 case kInstAlt:
246 case kInstAltMatch:
247 case kInstByteRange:
248 case kInstFail:
249 case kInstEmptyWidth:
250 return false;
251
252 case kInstCapture:
253 case kInstNop:
254 ip = prog->inst(ip->out());
255 break;
256
257 case kInstMatch:
258 return true;
259 }
260 }
261 }
262
263 uint32 Prog::EmptyFlags(const StringPiece& text, const char* p) {
264 int flags = 0;
265
266 // ^ and \A
267 if (p == text.begin())
268 flags |= kEmptyBeginText | kEmptyBeginLine;
269 else if (p[-1] == '\n')
270 flags |= kEmptyBeginLine;
271
272 // $ and \z
273 if (p == text.end())
274 flags |= kEmptyEndText | kEmptyEndLine;
275 else if (p < text.end() && p[0] == '\n')
276 flags |= kEmptyEndLine;
277
278 // \b and \B
279 if (p == text.begin() && p == text.end()) {
280 // no word boundary here
281 } else if (p == text.begin()) {
282 if (IsWordChar(p[0]))
283 flags |= kEmptyWordBoundary;
284 } else if (p == text.end()) {
285 if (IsWordChar(p[-1]))
286 flags |= kEmptyWordBoundary;
287 } else {
288 if (IsWordChar(p[-1]) != IsWordChar(p[0]))
289 flags |= kEmptyWordBoundary;
290 }
291 if (!(flags & kEmptyWordBoundary))
292 flags |= kEmptyNonWordBoundary;
293
294 return flags;
295 }
296
297 void Prog::MarkByteRange(int lo, int hi) {
298 DCHECK_GE(lo, 0);
299 DCHECK_GE(hi, 0);
300 DCHECK_LE(lo, 255);
301 DCHECK_LE(hi, 255);
302 DCHECK_LE(lo, hi);
303 if (0 < lo && lo <= 255)
304 byterange_.Set(lo - 1);
305 if (0 <= hi && hi <= 255)
306 byterange_.Set(hi);
307 }
308
309 void Prog::ComputeByteMap() {
310 // Fill in bytemap with byte classes for prog_.
311 // Ranges of bytes that are treated as indistinguishable
312 // by the regexp program are mapped to a single byte class.
313 // The vector prog_->byterange() marks the end of each
314 // such range.
315 const Bitmap<256>& v = byterange();
316
317 COMPILE_ASSERT(8*sizeof(v.Word(0)) == 32, wordsize);
318 uint8 n = 0;
319 uint32 bits = 0;
320 for (int i = 0; i < 256; i++) {
321 if ((i&31) == 0)
322 bits = v.Word(i >> 5);
323 bytemap_[i] = n;
324 n += bits & 1;
325 bits >>= 1;
326 }
327 bytemap_range_ = bytemap_[255] + 1;
328 unbytemap_ = new uint8[bytemap_range_];
329 for (int i = 0; i < 256; i++)
330 unbytemap_[bytemap_[i]] = static_cast<uint8>(i);
331
332 if (0) { // For debugging: use trivial byte map.
333 for (int i = 0; i < 256; i++) {
334 bytemap_[i] = static_cast<uint8>(i);
335 unbytemap_[i] = static_cast<uint8>(i);
336 }
337 bytemap_range_ = 256;
338 LOG(INFO) << "Using trivial bytemap.";
339 }
340 }
341
342 } // namespace re2
343
OLDNEW
« no previous file with comments | « third_party/re2/re2/prog.h ('k') | third_party/re2/re2/re2.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698