OLD | NEW |
| (Empty) |
1 // Copyright 2008 The RE2 Authors. All Rights Reserved. | |
2 // Use of this source code is governed by a BSD-style | |
3 // license that can be found in the LICENSE file. | |
4 | |
5 // Tested by search_test.cc. | |
6 // | |
7 // Prog::SearchOnePass is an efficient implementation of | |
8 // regular expression search with submatch tracking for | |
9 // what I call "one-pass regular expressions". (An alternate | |
10 // name might be "backtracking-free regular expressions".) | |
11 // | |
12 // One-pass regular expressions have the property that | |
13 // at each input byte during an anchored match, there may be | |
14 // multiple alternatives but only one can proceed for any | |
15 // given input byte. | |
16 // | |
17 // For example, the regexp /x*yx*/ is one-pass: you read | |
18 // x's until a y, then you read the y, then you keep reading x's. | |
19 // At no point do you have to guess what to do or back up | |
20 // and try a different guess. | |
21 // | |
22 // On the other hand, /x*x/ is not one-pass: when you're | |
23 // looking at an input "x", it's not clear whether you should | |
24 // use it to extend the x* or as the final x. | |
25 // | |
26 // More examples: /([^ ]*) (.*)/ is one-pass; /(.*) (.*)/ is not. | |
27 // /(\d+)-(\d+)/ is one-pass; /(\d+).(\d+)/ is not. | |
28 // | |
29 // A simple intuition for identifying one-pass regular expressions | |
30 // is that it's always immediately obvious when a repetition ends. | |
31 // It must also be immediately obvious which branch of an | to take: | |
32 // | |
33 // /x(y|z)/ is one-pass, but /(xy|xz)/ is not. | |
34 // | |
35 // The NFA-based search in nfa.cc does some bookkeeping to | |
36 // avoid the need for backtracking and its associated exponential blowup. | |
37 // But if we have a one-pass regular expression, there is no | |
38 // possibility of backtracking, so there is no need for the | |
39 // extra bookkeeping. Hence, this code. | |
40 // | |
41 // On a one-pass regular expression, the NFA code in nfa.cc | |
42 // runs at about 1/20 of the backtracking-based PCRE speed. | |
43 // In contrast, the code in this file runs at about the same | |
44 // speed as PCRE. | |
45 // | |
46 // One-pass regular expressions get used a lot when RE is | |
47 // used for parsing simple strings, so it pays off to | |
48 // notice them and handle them efficiently. | |
49 // | |
50 // See also Anne Brüggemann-Klein and Derick Wood, | |
51 // "One-unambiguous regular languages", Information and Computation 142(2). | |
52 | |
53 #include <string.h> | |
54 #include <map> | |
55 #include "util/util.h" | |
56 #include "util/sparse_set.h" | |
57 #include "re2/prog.h" | |
58 #include "re2/stringpiece.h" | |
59 | |
60 namespace re2 { | |
61 | |
62 static const int Debug = 0; | |
63 | |
64 // The key insight behind this implementation is that the | |
65 // non-determinism in an NFA for a one-pass regular expression | |
66 // is contained. To explain what that means, first a | |
67 // refresher about what regular expression programs look like | |
68 // and how the usual NFA execution runs. | |
69 // | |
70 // In a regular expression program, only the kInstByteRange | |
71 // instruction processes an input byte c and moves on to the | |
72 // next byte in the string (it does so if c is in the given range). | |
73 // The kInstByteRange instructions correspond to literal characters | |
74 // and character classes in the regular expression. | |
75 // | |
76 // The kInstAlt instructions are used as wiring to connect the | |
77 // kInstByteRange instructions together in interesting ways when | |
78 // implementing | + and *. | |
79 // The kInstAlt instruction forks execution, like a goto that | |
80 // jumps to ip->out() and ip->out1() in parallel. Each of the | |
81 // resulting computation paths is called a thread. | |
82 // | |
83 // The other instructions -- kInstEmptyWidth, kInstMatch, kInstCapture -- | |
84 // are interesting in their own right but like kInstAlt they don't | |
85 // advance the input pointer. Only kInstByteRange does. | |
86 // | |
87 // The automaton execution in nfa.cc runs all the possible | |
88 // threads of execution in lock-step over the input. To process | |
89 // a particular byte, each thread gets run until it either dies | |
90 // or finds a kInstByteRange instruction matching the byte. | |
91 // If the latter happens, the thread stops just past the | |
92 // kInstByteRange instruction (at ip->out()) and waits for | |
93 // the other threads to finish processing the input byte. | |
94 // Then, once all the threads have processed that input byte, | |
95 // the whole process repeats. The kInstAlt state instruction | |
96 // might create new threads during input processing, but no | |
97 // matter what, all the threads stop after a kInstByteRange | |
98 // and wait for the other threads to "catch up". | |
99 // Running in lock step like this ensures that the NFA reads | |
100 // the input string only once. | |
101 // | |
102 // Each thread maintains its own set of capture registers | |
103 // (the string positions at which it executed the kInstCapture | |
104 // instructions corresponding to capturing parentheses in the | |
105 // regular expression). Repeated copying of the capture registers | |
106 // is the main performance bottleneck in the NFA implementation. | |
107 // | |
108 // A regular expression program is "one-pass" if, no matter what | |
109 // the input string, there is only one thread that makes it | |
110 // past a kInstByteRange instruction at each input byte. This means | |
111 // that there is in some sense only one active thread throughout | |
112 // the execution. Other threads might be created during the | |
113 // processing of an input byte, but they are ephemeral: only one | |
114 // thread is left to start processing the next input byte. | |
115 // This is what I meant above when I said the non-determinism | |
116 // was "contained". | |
117 // | |
118 // To execute a one-pass regular expression program, we can build | |
119 // a DFA (no non-determinism) that has at most as many states as | |
120 // the NFA (compare this to the possibly exponential number of states | |
121 // in the general case). Each state records, for each possible | |
122 // input byte, the next state along with the conditions required | |
123 // before entering that state -- empty-width flags that must be true | |
124 // and capture operations that must be performed. It also records | |
125 // whether a set of conditions required to finish a match at that | |
126 // point in the input rather than process the next byte. | |
127 | |
128 // A state in the one-pass NFA - just an array of actions indexed | |
129 // by the bytemap_[] of the next input byte. (The bytemap | |
130 // maps next input bytes into equivalence classes, to reduce | |
131 // the memory footprint.) | |
132 struct OneState { | |
133 uint32 matchcond; // conditions to match right now. | |
134 uint32 action[1]; | |
135 }; | |
136 | |
137 // The uint32 conditions in the action are a combination of | |
138 // condition and capture bits and the next state. The bottom 16 bits | |
139 // are the condition and capture bits, and the top 16 are the index of | |
140 // the next state. | |
141 // | |
142 // Bits 0-5 are the empty-width flags from prog.h. | |
143 // Bit 6 is kMatchWins, which means the match takes | |
144 // priority over moving to next in a first-match search. | |
145 // The remaining bits mark capture registers that should | |
146 // be set to the current input position. The capture bits | |
147 // start at index 2, since the search loop can take care of | |
148 // cap[0], cap[1] (the overall match position). | |
149 // That means we can handle up to 5 capturing parens: $1 through $4, plus $0. | |
150 // No input position can satisfy both kEmptyWordBoundary | |
151 // and kEmptyNonWordBoundary, so we can use that as a sentinel | |
152 // instead of needing an extra bit. | |
153 | |
154 static const int kIndexShift = 16; // number of bits below index | |
155 static const int kEmptyShift = 6; // number of empty flags in prog.h | |
156 static const int kRealCapShift = kEmptyShift + 1; | |
157 static const int kRealMaxCap = (kIndexShift - kRealCapShift) / 2 * 2; | |
158 | |
159 // Parameters used to skip over cap[0], cap[1]. | |
160 static const int kCapShift = kRealCapShift - 2; | |
161 static const int kMaxCap = kRealMaxCap + 2; | |
162 | |
163 static const uint32 kMatchWins = 1 << kEmptyShift; | |
164 static const uint32 kCapMask = ((1 << kRealMaxCap) - 1) << kRealCapShift; | |
165 | |
166 static const uint32 kImpossible = kEmptyWordBoundary | kEmptyNonWordBoundary; | |
167 | |
168 // Check, at compile time, that prog.h agrees with math above. | |
169 // This function is never called. | |
170 void OnePass_Checks() { | |
171 COMPILE_ASSERT((1<<kEmptyShift)-1 == kEmptyAllFlags, | |
172 kEmptyShift_disagrees_with_kEmptyAllFlags); | |
173 // kMaxCap counts pointers, kMaxOnePassCapture counts pairs. | |
174 COMPILE_ASSERT(kMaxCap == Prog::kMaxOnePassCapture*2, | |
175 kMaxCap_disagrees_with_kMaxOnePassCapture); | |
176 } | |
177 | |
178 static bool Satisfy(uint32 cond, const StringPiece& context, const char* p) { | |
179 uint32 satisfied = Prog::EmptyFlags(context, p); | |
180 if (cond & kEmptyAllFlags & ~satisfied) | |
181 return false; | |
182 return true; | |
183 } | |
184 | |
185 // Apply the capture bits in cond, saving p to the appropriate | |
186 // locations in cap[]. | |
187 static void ApplyCaptures(uint32 cond, const char* p, | |
188 const char** cap, int ncap) { | |
189 for (int i = 2; i < ncap; i++) | |
190 if (cond & (1 << kCapShift << i)) | |
191 cap[i] = p; | |
192 } | |
193 | |
194 // Compute a node pointer. | |
195 // Basically (OneState*)(nodes + statesize*nodeindex) | |
196 // but the version with the C++ casts overflows 80 characters (and is ugly). | |
197 static inline OneState* IndexToNode(volatile uint8* nodes, int statesize, | |
198 int nodeindex) { | |
199 return reinterpret_cast<OneState*>( | |
200 const_cast<uint8*>(nodes + statesize*nodeindex)); | |
201 } | |
202 | |
203 bool Prog::SearchOnePass(const StringPiece& text, | |
204 const StringPiece& const_context, | |
205 Anchor anchor, MatchKind kind, | |
206 StringPiece* match, int nmatch) { | |
207 if (anchor != kAnchored && kind != kFullMatch) { | |
208 LOG(DFATAL) << "Cannot use SearchOnePass for unanchored matches."; | |
209 return false; | |
210 } | |
211 | |
212 // Make sure we have at least cap[1], | |
213 // because we use it to tell if we matched. | |
214 int ncap = 2*nmatch; | |
215 if (ncap < 2) | |
216 ncap = 2; | |
217 | |
218 const char* cap[kMaxCap]; | |
219 for (int i = 0; i < ncap; i++) | |
220 cap[i] = NULL; | |
221 | |
222 const char* matchcap[kMaxCap]; | |
223 for (int i = 0; i < ncap; i++) | |
224 matchcap[i] = NULL; | |
225 | |
226 StringPiece context = const_context; | |
227 if (context.begin() == NULL) | |
228 context = text; | |
229 if (anchor_start() && context.begin() != text.begin()) | |
230 return false; | |
231 if (anchor_end() && context.end() != text.end()) | |
232 return false; | |
233 if (anchor_end()) | |
234 kind = kFullMatch; | |
235 | |
236 // State and act are marked volatile to | |
237 // keep the compiler from re-ordering the | |
238 // memory accesses walking over the NFA. | |
239 // This is worth about 5%. | |
240 volatile OneState* state = onepass_start_; | |
241 volatile uint8* nodes = onepass_nodes_; | |
242 volatile uint32 statesize = onepass_statesize_; | |
243 uint8* bytemap = bytemap_; | |
244 const char* bp = text.begin(); | |
245 const char* ep = text.end(); | |
246 const char* p; | |
247 bool matched = false; | |
248 matchcap[0] = bp; | |
249 cap[0] = bp; | |
250 uint32 nextmatchcond = state->matchcond; | |
251 for (p = bp; p < ep; p++) { | |
252 int c = bytemap[*p & 0xFF]; | |
253 uint32 matchcond = nextmatchcond; | |
254 uint32 cond = state->action[c]; | |
255 | |
256 // Determine whether we can reach act->next. | |
257 // If so, advance state and nextmatchcond. | |
258 if ((cond & kEmptyAllFlags) == 0 || Satisfy(cond, context, p)) { | |
259 uint32 nextindex = cond >> kIndexShift; | |
260 state = IndexToNode(nodes, statesize, nextindex); | |
261 nextmatchcond = state->matchcond; | |
262 } else { | |
263 state = NULL; | |
264 nextmatchcond = kImpossible; | |
265 } | |
266 | |
267 // This code section is carefully tuned. | |
268 // The goto sequence is about 10% faster than the | |
269 // obvious rewrite as a large if statement in the | |
270 // ASCIIMatchRE2 and DotMatchRE2 benchmarks. | |
271 | |
272 // Saving the match capture registers is expensive. | |
273 // Is this intermediate match worth thinking about? | |
274 | |
275 // Not if we want a full match. | |
276 if (kind == kFullMatch) | |
277 goto skipmatch; | |
278 | |
279 // Not if it's impossible. | |
280 if (matchcond == kImpossible) | |
281 goto skipmatch; | |
282 | |
283 // Not if the possible match is beaten by the certain | |
284 // match at the next byte. When this test is useless | |
285 // (e.g., HTTPPartialMatchRE2) it slows the loop by | |
286 // about 10%, but when it avoids work (e.g., DotMatchRE2), | |
287 // it cuts the loop execution by about 45%. | |
288 if ((cond & kMatchWins) == 0 && (nextmatchcond & kEmptyAllFlags) == 0) | |
289 goto skipmatch; | |
290 | |
291 // Finally, the match conditions must be satisfied. | |
292 if ((matchcond & kEmptyAllFlags) == 0 || Satisfy(matchcond, context, p)) { | |
293 for (int i = 2; i < 2*nmatch; i++) | |
294 matchcap[i] = cap[i]; | |
295 if (nmatch > 1 && (matchcond & kCapMask)) | |
296 ApplyCaptures(matchcond, p, matchcap, ncap); | |
297 matchcap[1] = p; | |
298 matched = true; | |
299 | |
300 // If we're in longest match mode, we have to keep | |
301 // going and see if we find a longer match. | |
302 // In first match mode, we can stop if the match | |
303 // takes priority over the next state for this input byte. | |
304 // That bit is per-input byte and thus in cond, not matchcond. | |
305 if (kind == kFirstMatch && (cond & kMatchWins)) | |
306 goto done; | |
307 } | |
308 | |
309 skipmatch: | |
310 if (state == NULL) | |
311 goto done; | |
312 if ((cond & kCapMask) && nmatch > 1) | |
313 ApplyCaptures(cond, p, cap, ncap); | |
314 } | |
315 | |
316 // Look for match at end of input. | |
317 { | |
318 uint32 matchcond = state->matchcond; | |
319 if (matchcond != kImpossible && | |
320 ((matchcond & kEmptyAllFlags) == 0 || Satisfy(matchcond, context, p))) { | |
321 if (nmatch > 1 && (matchcond & kCapMask)) | |
322 ApplyCaptures(matchcond, p, cap, ncap); | |
323 for (int i = 2; i < ncap; i++) | |
324 matchcap[i] = cap[i]; | |
325 matchcap[1] = p; | |
326 matched = true; | |
327 } | |
328 } | |
329 | |
330 done: | |
331 if (!matched) | |
332 return false; | |
333 for (int i = 0; i < nmatch; i++) | |
334 match[i].set(matchcap[2*i], | |
335 static_cast<int>(matchcap[2*i+1] - matchcap[2*i])); | |
336 return true; | |
337 } | |
338 | |
339 | |
340 // Analysis to determine whether a given regexp program is one-pass. | |
341 | |
342 // If ip is not on workq, adds ip to work queue and returns true. | |
343 // If ip is already on work queue, does nothing and returns false. | |
344 // If ip is NULL, does nothing and returns true (pretends to add it). | |
345 typedef SparseSet Instq; | |
346 static bool AddQ(Instq *q, int id) { | |
347 if (id == 0) | |
348 return true; | |
349 if (q->contains(id)) | |
350 return false; | |
351 q->insert(id); | |
352 return true; | |
353 } | |
354 | |
355 struct InstCond { | |
356 int id; | |
357 uint32 cond; | |
358 }; | |
359 | |
360 // Returns whether this is a one-pass program; that is, | |
361 // returns whether it is safe to use SearchOnePass on this program. | |
362 // These conditions must be true for any instruction ip: | |
363 // | |
364 // (1) for any other Inst nip, there is at most one input-free | |
365 // path from ip to nip. | |
366 // (2) there is at most one kInstByte instruction reachable from | |
367 // ip that matches any particular byte c. | |
368 // (3) there is at most one input-free path from ip to a kInstMatch | |
369 // instruction. | |
370 // | |
371 // This is actually just a conservative approximation: it might | |
372 // return false when the answer is true, when kInstEmptyWidth | |
373 // instructions are involved. | |
374 // Constructs and saves corresponding one-pass NFA on success. | |
375 bool Prog::IsOnePass() { | |
376 if (did_onepass_) | |
377 return onepass_start_ != NULL; | |
378 did_onepass_ = true; | |
379 | |
380 if (start() == 0) // no match | |
381 return false; | |
382 | |
383 // Steal memory for the one-pass NFA from the overall DFA budget. | |
384 // Willing to use at most 1/4 of the DFA budget (heuristic). | |
385 // Limit max node count to 65000 as a conservative estimate to | |
386 // avoid overflowing 16-bit node index in encoding. | |
387 int maxnodes = 2 + byte_inst_count_; | |
388 int statesize = sizeof(OneState) + (bytemap_range_-1)*sizeof(uint32); | |
389 if (maxnodes >= 65000 || dfa_mem_ / 4 / statesize < maxnodes) | |
390 return false; | |
391 | |
392 // Flood the graph starting at the start state, and check | |
393 // that in each reachable state, each possible byte leads | |
394 // to a unique next state. | |
395 int size = this->size(); | |
396 InstCond *stack = new InstCond[size]; | |
397 | |
398 int* nodebyid = new int[size]; // indexed by ip | |
399 memset(nodebyid, 0xFF, size*sizeof nodebyid[0]); | |
400 | |
401 uint8* nodes = new uint8[maxnodes*statesize]; | |
402 uint8* nodep = nodes; | |
403 | |
404 Instq tovisit(size), workq(size); | |
405 AddQ(&tovisit, start()); | |
406 nodebyid[start()] = 0; | |
407 nodep += statesize; | |
408 int nalloc = 1; | |
409 for (Instq::iterator it = tovisit.begin(); it != tovisit.end(); ++it) { | |
410 int id = *it; | |
411 int nodeindex = nodebyid[id]; | |
412 OneState* node = IndexToNode(nodes, statesize, nodeindex); | |
413 | |
414 // Flood graph using manual stack, filling in actions as found. | |
415 // Default is none. | |
416 for (int b = 0; b < bytemap_range_; b++) | |
417 node->action[b] = kImpossible; | |
418 node->matchcond = kImpossible; | |
419 | |
420 workq.clear(); | |
421 bool matched = false; | |
422 int nstack = 0; | |
423 stack[nstack].id = id; | |
424 stack[nstack++].cond = 0; | |
425 while (nstack > 0) { | |
426 int id = stack[--nstack].id; | |
427 Prog::Inst* ip = inst(id); | |
428 uint32 cond = stack[nstack].cond; | |
429 switch (ip->opcode()) { | |
430 case kInstAltMatch: | |
431 // TODO(rsc): Ignoring kInstAltMatch optimization. | |
432 // Should implement it in this engine, but it's subtle. | |
433 // Fall through. | |
434 case kInstAlt: | |
435 // If already on work queue, (1) is violated: bail out. | |
436 if (!AddQ(&workq, ip->out()) || !AddQ(&workq, ip->out1())) | |
437 goto fail; | |
438 stack[nstack].id = ip->out1(); | |
439 stack[nstack++].cond = cond; | |
440 stack[nstack].id = ip->out(); | |
441 stack[nstack++].cond = cond; | |
442 break; | |
443 | |
444 case kInstByteRange: { | |
445 int nextindex = nodebyid[ip->out()]; | |
446 if (nextindex == -1) { | |
447 if (nalloc >= maxnodes) { | |
448 if (Debug) | |
449 LOG(ERROR) | |
450 << StringPrintf("Not OnePass: hit node limit %d > %d", | |
451 nalloc, maxnodes); | |
452 goto fail; | |
453 } | |
454 nextindex = nalloc; | |
455 nodep += statesize; | |
456 nodebyid[ip->out()] = nextindex; | |
457 nalloc++; | |
458 AddQ(&tovisit, ip->out()); | |
459 } | |
460 if (matched) | |
461 cond |= kMatchWins; | |
462 for (int c = ip->lo(); c <= ip->hi(); c++) { | |
463 int b = bytemap_[c]; | |
464 c = unbytemap_[b]; // last c in byte class | |
465 uint32 act = node->action[b]; | |
466 uint32 newact = (nextindex << kIndexShift) | cond; | |
467 if ((act & kImpossible) == kImpossible) { | |
468 node->action[b] = newact; | |
469 } else if (act != newact) { | |
470 if (Debug) { | |
471 LOG(ERROR) | |
472 << StringPrintf("Not OnePass: conflict on byte " | |
473 "%#x at state %d", | |
474 c, *it); | |
475 } | |
476 goto fail; | |
477 } | |
478 } | |
479 if (ip->foldcase()) { | |
480 Rune lo = max<Rune>(ip->lo(), 'a') + 'A' - 'a'; | |
481 Rune hi = min<Rune>(ip->hi(), 'z') + 'A' - 'a'; | |
482 for (int c = lo; c <= hi; c++) { | |
483 int b = bytemap_[c]; | |
484 c = unbytemap_[b]; // last c in class | |
485 uint32 act = node->action[b]; | |
486 uint32 newact = (nextindex << kIndexShift) | cond; | |
487 if ((act & kImpossible) == kImpossible) { | |
488 node->action[b] = newact; | |
489 } else if (act != newact) { | |
490 if (Debug) { | |
491 LOG(ERROR) | |
492 << StringPrintf("Not OnePass: conflict on byte " | |
493 "%#x at state %d", | |
494 c, *it); | |
495 } | |
496 goto fail; | |
497 } | |
498 } | |
499 } | |
500 break; | |
501 } | |
502 | |
503 case kInstCapture: | |
504 if (ip->cap() < kMaxCap) | |
505 cond |= (1 << kCapShift) << ip->cap(); | |
506 goto QueueEmpty; | |
507 | |
508 case kInstEmptyWidth: | |
509 cond |= ip->empty(); | |
510 goto QueueEmpty; | |
511 | |
512 case kInstNop: | |
513 QueueEmpty: | |
514 // kInstCapture and kInstNop always proceed to ip->out(). | |
515 // kInstEmptyWidth only sometimes proceeds to ip->out(), | |
516 // but as a conservative approximation we assume it always does. | |
517 // We could be a little more precise by looking at what c | |
518 // is, but that seems like overkill. | |
519 | |
520 // If already on work queue, (1) is violated: bail out. | |
521 if (!AddQ(&workq, ip->out())) { | |
522 if (Debug) { | |
523 LOG(ERROR) << StringPrintf("Not OnePass: multiple paths" | |
524 " %d -> %d\n", | |
525 *it, ip->out()); | |
526 } | |
527 goto fail; | |
528 } | |
529 stack[nstack].id = ip->out(); | |
530 stack[nstack++].cond = cond; | |
531 break; | |
532 | |
533 case kInstMatch: | |
534 if (matched) { | |
535 // (3) is violated | |
536 if (Debug) { | |
537 LOG(ERROR) << StringPrintf("Not OnePass: multiple matches" | |
538 " from %d\n", *it); | |
539 } | |
540 goto fail; | |
541 } | |
542 matched = true; | |
543 node->matchcond = cond; | |
544 break; | |
545 | |
546 case kInstFail: | |
547 break; | |
548 } | |
549 } | |
550 } | |
551 | |
552 if (Debug) { // For debugging, dump one-pass NFA to LOG(ERROR). | |
553 string dump = "prog dump:\n" + Dump() + "node dump\n"; | |
554 map<int, int> idmap; | |
555 for (int i = 0; i < size; i++) | |
556 if (nodebyid[i] != -1) | |
557 idmap[nodebyid[i]] = i; | |
558 | |
559 StringAppendF(&dump, "byte ranges:\n"); | |
560 int i = 0; | |
561 for (int b = 0; b < bytemap_range_; b++) { | |
562 int lo = i; | |
563 while (bytemap_[i] == b) | |
564 i++; | |
565 StringAppendF(&dump, "\t%d: %#x-%#x\n", b, lo, i - 1); | |
566 } | |
567 | |
568 for (Instq::iterator it = tovisit.begin(); it != tovisit.end(); ++it) { | |
569 int id = *it; | |
570 int nodeindex = nodebyid[id]; | |
571 if (nodeindex == -1) | |
572 continue; | |
573 OneState* node = IndexToNode(nodes, statesize, nodeindex); | |
574 string s; | |
575 StringAppendF(&dump, "node %d id=%d: matchcond=%#x\n", | |
576 nodeindex, id, node->matchcond); | |
577 for (int i = 0; i < bytemap_range_; i++) { | |
578 if ((node->action[i] & kImpossible) == kImpossible) | |
579 continue; | |
580 StringAppendF(&dump, " %d cond %#x -> %d id=%d\n", | |
581 i, node->action[i] & 0xFFFF, | |
582 node->action[i] >> kIndexShift, | |
583 idmap[node->action[i] >> kIndexShift]); | |
584 } | |
585 } | |
586 LOG(ERROR) << dump; | |
587 } | |
588 | |
589 // Overallocated earlier; cut down to actual size. | |
590 nodep = new uint8[nalloc*statesize]; | |
591 memmove(nodep, nodes, nalloc*statesize); | |
592 delete[] nodes; | |
593 nodes = nodep; | |
594 | |
595 onepass_start_ = IndexToNode(nodes, statesize, nodebyid[start()]); | |
596 onepass_nodes_ = nodes; | |
597 onepass_statesize_ = statesize; | |
598 dfa_mem_ -= nalloc*statesize; | |
599 | |
600 delete[] stack; | |
601 delete[] nodebyid; | |
602 return true; | |
603 | |
604 fail: | |
605 delete[] stack; | |
606 delete[] nodebyid; | |
607 delete[] nodes; | |
608 return false; | |
609 } | |
610 | |
611 } // namespace re2 | |
OLD | NEW |