Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(452)

Side by Side Diff: third_party/re2/re2/onepass.cc

Issue 1516543002: Update re2 (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: updated update instructions Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/re2/re2/nfa.cc ('k') | third_party/re2/re2/parse.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2008 The RE2 Authors. All Rights Reserved. 1 // Copyright 2008 The RE2 Authors. All Rights Reserved.
2 // Use of this source code is governed by a BSD-style 2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file. 3 // license that can be found in the LICENSE file.
4 4
5 // Tested by search_test.cc. 5 // Tested by search_test.cc.
6 // 6 //
7 // Prog::SearchOnePass is an efficient implementation of 7 // Prog::SearchOnePass is an efficient implementation of
8 // regular expression search with submatch tracking for 8 // regular expression search with submatch tracking for
9 // what I call "one-pass regular expressions". (An alternate 9 // what I call "one-pass regular expressions". (An alternate
10 // name might be "backtracking-free regular expressions".) 10 // name might be "backtracking-free regular expressions".)
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
46 // One-pass regular expressions get used a lot when RE is 46 // One-pass regular expressions get used a lot when RE is
47 // used for parsing simple strings, so it pays off to 47 // used for parsing simple strings, so it pays off to
48 // notice them and handle them efficiently. 48 // notice them and handle them efficiently.
49 // 49 //
50 // See also Anne Brüggemann-Klein and Derick Wood, 50 // See also Anne Brüggemann-Klein and Derick Wood,
51 // "One-unambiguous regular languages", Information and Computation 142(2). 51 // "One-unambiguous regular languages", Information and Computation 142(2).
52 52
53 #include <string.h> 53 #include <string.h>
54 #include <map> 54 #include <map>
55 #include "util/util.h" 55 #include "util/util.h"
56 #include "util/arena.h"
57 #include "util/sparse_set.h" 56 #include "util/sparse_set.h"
58 #include "re2/prog.h" 57 #include "re2/prog.h"
59 #include "re2/stringpiece.h" 58 #include "re2/stringpiece.h"
60 59
61 namespace re2 { 60 namespace re2 {
62 61
63 static const int Debug = 0; 62 static const int Debug = 0;
64 63
65 // The key insight behind this implementation is that the 64 // The key insight behind this implementation is that the
66 // non-determinism in an NFA for a one-pass regular expression 65 // non-determinism in an NFA for a one-pass regular expression
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after
119 // To execute a one-pass regular expression program, we can build 118 // To execute a one-pass regular expression program, we can build
120 // a DFA (no non-determinism) that has at most as many states as 119 // a DFA (no non-determinism) that has at most as many states as
121 // the NFA (compare this to the possibly exponential number of states 120 // the NFA (compare this to the possibly exponential number of states
122 // in the general case). Each state records, for each possible 121 // in the general case). Each state records, for each possible
123 // input byte, the next state along with the conditions required 122 // input byte, the next state along with the conditions required
124 // before entering that state -- empty-width flags that must be true 123 // before entering that state -- empty-width flags that must be true
125 // and capture operations that must be performed. It also records 124 // and capture operations that must be performed. It also records
126 // whether a set of conditions required to finish a match at that 125 // whether a set of conditions required to finish a match at that
127 // point in the input rather than process the next byte. 126 // point in the input rather than process the next byte.
128 127
129 // A state in the one-pass NFA (aka DFA) - just an array of actions.
130 struct OneState;
131
132 // A state in the one-pass NFA - just an array of actions indexed 128 // A state in the one-pass NFA - just an array of actions indexed
133 // by the bytemap_[] of the next input byte. (The bytemap 129 // by the bytemap_[] of the next input byte. (The bytemap
134 // maps next input bytes into equivalence classes, to reduce 130 // maps next input bytes into equivalence classes, to reduce
135 // the memory footprint.) 131 // the memory footprint.)
136 struct OneState { 132 struct OneState {
137 uint32 matchcond; // conditions to match right now. 133 uint32 matchcond; // conditions to match right now.
138 uint32 action[1]; 134 uint32 action[1];
139 }; 135 };
140 136
141 // The uint32 conditions in the action are a combination of 137 // The uint32 conditions in the action are a combination of
(...skipping 186 matching lines...) Expand 10 before | Expand all | Expand 10 after
328 matchcap[i] = cap[i]; 324 matchcap[i] = cap[i];
329 matchcap[1] = p; 325 matchcap[1] = p;
330 matched = true; 326 matched = true;
331 } 327 }
332 } 328 }
333 329
334 done: 330 done:
335 if (!matched) 331 if (!matched)
336 return false; 332 return false;
337 for (int i = 0; i < nmatch; i++) 333 for (int i = 0; i < nmatch; i++)
338 match[i].set(matchcap[2*i], matchcap[2*i+1] - matchcap[2*i]); 334 match[i].set(matchcap[2*i],
335 static_cast<int>(matchcap[2*i+1] - matchcap[2*i]));
339 return true; 336 return true;
340 } 337 }
341 338
342 339
343 // Analysis to determine whether a given regexp program is one-pass. 340 // Analysis to determine whether a given regexp program is one-pass.
344 341
345 // If ip is not on workq, adds ip to work queue and returns true. 342 // If ip is not on workq, adds ip to work queue and returns true.
346 // If ip is already on work queue, does nothing and returns false. 343 // If ip is already on work queue, does nothing and returns false.
347 // If ip is NULL, does nothing and returns true (pretends to add it). 344 // If ip is NULL, does nothing and returns true (pretends to add it).
348 typedef SparseSet Instq; 345 typedef SparseSet Instq;
(...skipping 256 matching lines...) Expand 10 before | Expand all | Expand 10 after
605 return true; 602 return true;
606 603
607 fail: 604 fail:
608 delete[] stack; 605 delete[] stack;
609 delete[] nodebyid; 606 delete[] nodebyid;
610 delete[] nodes; 607 delete[] nodes;
611 return false; 608 return false;
612 } 609 }
613 610
614 } // namespace re2 611 } // namespace re2
OLDNEW
« no previous file with comments | « third_party/re2/re2/nfa.cc ('k') | third_party/re2/re2/parse.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698