Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(355)

Side by Side Diff: src/jsregexp.h

Issue 12427: Merge regexp2000 back into bleeding_edge (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: Created 12 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 12 matching lines...) Expand all
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 27
28 #ifndef V8_JSREGEXP_H_ 28 #ifndef V8_JSREGEXP_H_
29 #define V8_JSREGEXP_H_ 29 #define V8_JSREGEXP_H_
30 30
31 namespace v8 { namespace internal { 31 namespace v8 { namespace internal {
32 32
33
34 class RegExpMacroAssembler;
35
36
33 class RegExpImpl { 37 class RegExpImpl {
34 public: 38 public:
35 // Creates a regular expression literal in the old space. 39 // Creates a regular expression literal in the old space.
36 // This function calls the garbage collector if necessary. 40 // This function calls the garbage collector if necessary.
37 static Handle<Object> CreateRegExpLiteral(Handle<JSFunction> constructor, 41 static Handle<Object> CreateRegExpLiteral(Handle<JSFunction> constructor,
38 Handle<String> pattern, 42 Handle<String> pattern,
39 Handle<String> flags, 43 Handle<String> flags,
40 bool* has_pending_exception); 44 bool* has_pending_exception);
41 45
42 // Returns a string representation of a regular expression. 46 // Returns a string representation of a regular expression.
(...skipping 11 matching lines...) Expand all
54 static Handle<Object> Exec(Handle<JSRegExp> regexp, 58 static Handle<Object> Exec(Handle<JSRegExp> regexp,
55 Handle<String> subject, 59 Handle<String> subject,
56 Handle<Object> index); 60 Handle<Object> index);
57 61
58 // Call RegExp.prototyp.exec(string) in a loop. 62 // Call RegExp.prototyp.exec(string) in a loop.
59 // Used by String.prototype.match and String.prototype.replace. 63 // Used by String.prototype.match and String.prototype.replace.
60 // This function calls the garbage collector if necessary. 64 // This function calls the garbage collector if necessary.
61 static Handle<Object> ExecGlobal(Handle<JSRegExp> regexp, 65 static Handle<Object> ExecGlobal(Handle<JSRegExp> regexp,
62 Handle<String> subject); 66 Handle<String> subject);
63 67
68 // Stores an uncompiled RegExp pattern in the JSRegExp object.
69 // It will be compiled by JSCRE when first executed.
70 static Handle<Object> JscrePrepare(Handle<JSRegExp> re,
71 Handle<String> pattern,
72 JSRegExp::Flags flags);
73
74 // Stores a compiled RegExp pattern in the JSRegExp object.
75 // The pattern is compiled by Irregexp.
76 static Handle<Object> IrregexpPrepare(Handle<JSRegExp> re,
77 Handle<String> pattern,
78 JSRegExp::Flags flags,
79 Handle<FixedArray> irregexp_data);
80
81
82 // Compile the pattern using JSCRE and store the result in the
83 // JSRegExp object.
84 static Handle<Object> JscreCompile(Handle<JSRegExp> re);
85
64 static Handle<Object> AtomCompile(Handle<JSRegExp> re, 86 static Handle<Object> AtomCompile(Handle<JSRegExp> re,
65 Handle<String> pattern, 87 Handle<String> pattern,
66 JSRegExp::Flags flags); 88 JSRegExp::Flags flags,
67 89 Handle<String> match_pattern);
68 static Handle<Object> AtomExec(Handle<JSRegExp> regexp, 90 static Handle<Object> AtomExec(Handle<JSRegExp> regexp,
69 Handle<String> subject, 91 Handle<String> subject,
70 Handle<Object> index); 92 Handle<Object> index);
71 93
72 static Handle<Object> AtomExecGlobal(Handle<JSRegExp> regexp, 94 static Handle<Object> AtomExecGlobal(Handle<JSRegExp> regexp,
73 Handle<String> subject); 95 Handle<String> subject);
74 96
75 static Handle<Object> JsreCompile(Handle<JSRegExp> re, 97 static Handle<Object> JscreCompile(Handle<JSRegExp> re,
76 Handle<String> pattern, 98 Handle<String> pattern,
77 JSRegExp::Flags flags); 99 JSRegExp::Flags flags);
78 100
79 static Handle<Object> JsreExec(Handle<JSRegExp> regexp, 101 // Execute a compiled JSCRE pattern.
80 Handle<String> subject, 102 static Handle<Object> JscreExec(Handle<JSRegExp> regexp,
81 Handle<Object> index); 103 Handle<String> subject,
104 Handle<Object> index);
82 105
83 static Handle<Object> JsreExecGlobal(Handle<JSRegExp> regexp, 106 // Execute an Irregexp bytecode pattern.
84 Handle<String> subject); 107 static Handle<Object> IrregexpExec(Handle<JSRegExp> regexp,
108 Handle<String> subject,
109 Handle<Object> index);
110
111 static Handle<Object> JscreExecGlobal(Handle<JSRegExp> regexp,
112 Handle<String> subject);
113
114 static Handle<Object> IrregexpExecGlobal(Handle<JSRegExp> regexp,
115 Handle<String> subject);
85 116
86 static void NewSpaceCollectionPrologue(); 117 static void NewSpaceCollectionPrologue();
87 static void OldSpaceCollectionPrologue(); 118 static void OldSpaceCollectionPrologue();
88 119
89 private:
90 // Converts a source string to a 16 bit flat string. The string 120 // Converts a source string to a 16 bit flat string. The string
91 // will be either sequential or it will be a SlicedString backed 121 // will be either sequential or it will be a SlicedString backed
92 // by a flat string. 122 // by a flat string.
93 static Handle<String> StringToTwoByte(Handle<String> pattern); 123 static Handle<String> StringToTwoByte(Handle<String> pattern);
94 static Handle<String> CachedStringToTwoByte(Handle<String> pattern); 124 static Handle<String> CachedStringToTwoByte(Handle<String> pattern);
95 125
126 static const int kIrregexpImplementationIndex = 0;
127 static const int kIrregexpNumberOfCapturesIndex = 1;
128 static const int kIrregexpNumberOfRegistersIndex = 2;
129 static const int kIrregexpCodeIndex = 3;
130 static const int kIrregexpDataLength = 4;
131
132 static const int kJscreNumberOfCapturesIndex = 0;
133 static const int kJscreInternalIndex = 1;
134 static const int kJscreDataLength = 2;
135
136 private:
96 static String* last_ascii_string_; 137 static String* last_ascii_string_;
97 static String* two_byte_cached_string_; 138 static String* two_byte_cached_string_;
98 139
99 // Returns the caputure from the re. 140 static int JscreNumberOfCaptures(Handle<JSRegExp> re);
100 static int JsreCapture(Handle<JSRegExp> re); 141 static ByteArray* JscreInternal(Handle<JSRegExp> re);
101 static ByteArray* JsreInternal(Handle<JSRegExp> re); 142
143 static int IrregexpNumberOfCaptures(Handle<JSRegExp> re);
144 static int IrregexpNumberOfRegisters(Handle<JSRegExp> re);
145 static Handle<ByteArray> IrregexpCode(Handle<JSRegExp> re);
102 146
103 // Call jsRegExpExecute once 147 // Call jsRegExpExecute once
104 static Handle<Object> JsreExecOnce(Handle<JSRegExp> regexp, 148 static Handle<Object> JscreExecOnce(Handle<JSRegExp> regexp,
105 int num_captures, 149 int num_captures,
106 Handle<String> subject, 150 Handle<String> subject,
107 int previous_index, 151 int previous_index,
108 const uc16* utf8_subject, 152 const uc16* utf8_subject,
109 int* ovector, 153 int* ovector,
110 int ovector_length); 154 int ovector_length);
155
156 static Handle<Object> IrregexpExecOnce(Handle<JSRegExp> regexp,
157 int num_captures,
158 Handle<String> subject16,
159 int previous_index,
160 int* ovector,
161 int ovector_length);
111 162
112 // Set the subject cache. The previous string buffer is not deleted, so the 163 // Set the subject cache. The previous string buffer is not deleted, so the
113 // caller should ensure that it doesn't leak. 164 // caller should ensure that it doesn't leak.
114 static void SetSubjectCache(String* subject, char* utf8_subject, 165 static void SetSubjectCache(String* subject,
115 int uft8_length, int character_position, 166 char* utf8_subject,
167 int uft8_length,
168 int character_position,
116 int utf8_position); 169 int utf8_position);
117 170
118 // A one element cache of the last utf8_subject string and its length. The 171 // A one element cache of the last utf8_subject string and its length. The
119 // subject JS String object is cached in the heap. We also cache a 172 // subject JS String object is cached in the heap. We also cache a
120 // translation between position and utf8 position. 173 // translation between position and utf8 position.
121 static char* utf8_subject_cache_; 174 static char* utf8_subject_cache_;
122 static int utf8_length_cache_; 175 static int utf8_length_cache_;
123 static int utf8_position_; 176 static int utf8_position_;
124 static int character_position_; 177 static int character_position_;
125 }; 178 };
126 179
127 180
181 class CharacterRange {
iposva 2008/11/26 06:56:22 Please add comments about this class.
182 public:
183 CharacterRange() : from_(0), to_(0) { }
184 // For compatibility with the CHECK_OK macro
185 CharacterRange(void* null) { ASSERT_EQ(NULL, null); } //NOLINT
186 CharacterRange(uc16 from, uc16 to)
187 : from_(from),
Mads Ager (chromium) 2008/11/25 21:09:41 Four space indent. This migth all fit on one line
Christian Plesner Hansen 2008/11/26 06:49:56 Fixed.
188 to_(to) {
189 }
190 static void AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges);
191 static inline CharacterRange Singleton(uc16 value) {
192 return CharacterRange(value, value);
193 }
194 static inline CharacterRange Range(uc16 from, uc16 to) {
195 ASSERT(from <= to);
196 return CharacterRange(from, to);
197 }
198 static inline CharacterRange Everything() {
199 return CharacterRange(0, 0xFFFF);
200 }
201 bool Contains(uc16 i) { return from_ <= i && i <= to_; }
202 uc16 from() const { return from_; }
203 void set_from(uc16 value) { from_ = value; }
204 uc16 to() const { return to_; }
205 void set_to(uc16 value) { to_ = value; }
206 bool is_valid() { return from_ <= to_; }
207 bool IsSingleton() { return (from_ == to_); }
208 void AddCaseEquivalents(ZoneList<CharacterRange>* ranges);
209 static const int kRangeCanonicalizeMax = 0x200;
iposva 2008/11/26 06:56:22 Declaration order: constants before constructors.
Christian Plesner Hansen 2008/11/26 07:49:51 We've never followed that rule. In fact, as far a
210 static const int kStartMarker = (1 << 24);
211 static const int kPayloadMask = (1 << 24) - 1;
212 private:
iposva 2008/11/26 06:56:22 Please leave a blank line before "private:" in acc
Christian Plesner Hansen 2008/11/26 07:49:51 Again, that's not a rule we've been consistent abo
213 uc16 from_;
214 uc16 to_;
215 };
216
217
218 template <typename Node, class Callback>
219 static void DoForEach(Node* node, Callback* callback);
220
221
222 // A zone splay tree. The config type parameter encapsulates the
223 // different configurations of a concrete splay tree:
224 //
225 // typedef Key: the key type
226 // typedef Value: the value type
227 // static const kNoKey: the dummy key used when no key is set
228 // static const kNoValue: the dummy value used to initialize nodes
229 // int (Compare)(Key& a, Key& b) -> {-1, 0, 1}: comparison function
230 //
231 template <typename Config>
232 class ZoneSplayTree : public ZoneObject {
233 public:
234 typedef typename Config::Key Key;
235 typedef typename Config::Value Value;
236
237 class Locator;
238
239 ZoneSplayTree() : root_(NULL) { }
240
241 // Inserts the given key in this tree with the given value. Returns
242 // true if a node was inserted, otherwise false. If found the locator
243 // is enabled and provides access to the mapping for the key.
244 bool Insert(const Key& key, Locator* locator);
245
246 // Looks up the key in this tree and returns true if it was found,
247 // otherwise false. If the node is found the locator is enabled and
248 // provides access to the mapping for the key.
249 bool Find(const Key& key, Locator* locator);
250
251 // Finds the mapping with the greatest key less than or equal to the
252 // given key.
253 bool FindGreatestLessThan(const Key& key, Locator* locator);
254
255 // Find the mapping with the greatest key in this tree.
256 bool FindGreatest(Locator* locator);
257
258 // Finds the mapping with the least key greater than or equal to the
259 // given key.
260 bool FindLeastGreaterThan(const Key& key, Locator* locator);
261
262 // Find the mapping with the least key in this tree.
263 bool FindLeast(Locator* locator);
264
265 // Remove the node with the given key from the tree.
266 bool Remove(const Key& key);
267
268 bool is_empty() { return root_ == NULL; }
269
270 // Perform the splay operation for the given key. Moves the node with
271 // the given key to the top of the tree. If no node has the given
272 // key, the last node on the search path is moved to the top of the
273 // tree.
274 void Splay(const Key& key);
275
276 class Node : public ZoneObject {
277 public:
278 Node(const Key& key, const Value& value)
279 : key_(key),
Mads Ager (chromium) 2008/11/25 21:09:41 Four space indent. Many more occurrences below.
Christian Plesner Hansen 2008/11/26 06:49:56 Fixed.
280 value_(value),
281 left_(NULL),
282 right_(NULL) { }
283 Key key() { return key_; }
Mads Ager (chromium) 2008/11/25 21:09:41 Indentation is off for all the accessors.
Christian Plesner Hansen 2008/11/26 06:49:56 Fixed. I wonder why lint didn't catch this?
284 Value value() { return value_; }
285 Node* left() { return left_; }
286 Node* right() { return right_; }
287 private:
288 friend class ZoneSplayTree;
289 friend class Locator;
290 Key key_;
291 Value value_;
292 Node* left_;
293 Node* right_;
294 };
295
296 // A locator provides access to a node in the tree without actually
297 // exposing the node.
298 class Locator {
299 public:
300 explicit Locator(Node* node) : node_(node) { }
301 Locator() : node_(NULL) { }
302 const Key& key() { return node_->key_; }
303 Value& value() { return node_->value_; }
304 void set_value(const Value& value) { node_->value_ = value; }
305 inline void bind(Node* node) { node_ = node; }
306 private:
307 Node* node_;
308 };
309
310 template <class Callback>
311 void ForEach(Callback* c) {
312 DoForEach<typename ZoneSplayTree<Config>::Node, Callback>(root_, c);
313 }
314
315 private:
316 Node* root_;
317 };
318
319
320 // A set of unsigned integers that behaves especially well on small
321 // integers (< 32). May do zone-allocation.
322 class OutSet: public ZoneObject {
323 public:
324 OutSet() : first_(0), remaining_(NULL), successors_(NULL) { }
325 OutSet* Extend(unsigned value);
326 bool Get(unsigned value);
327 static const unsigned kFirstLimit = 32;
328 private:
Mads Ager (chromium) 2008/11/25 21:09:41 Have the empty line before private instead of afte
Christian Plesner Hansen 2008/11/26 06:49:56 Fixed.
329
330 // Destructively set a value in this set. In most cases you want
331 // to use Extend instead to ensure that only one instance exists
332 // that contains the same values.
333 void Set(unsigned value);
334
335 // The successors are a list of sets that contain the same values
336 // as this set and the one more value that is not present in this
337 // set.
338 ZoneList<OutSet*>* successors() { return successors_; }
339
340 OutSet(uint32_t first, ZoneList<unsigned>* remaining)
341 : first_(first), remaining_(remaining), successors_(NULL) { }
342 uint32_t first_;
343 ZoneList<unsigned>* remaining_;
344 ZoneList<OutSet*>* successors_;
345 };
346
347
348 // A mapping from integers, specified as ranges, to a set of integers.
349 // Used for mapping character ranges to choices.
350 class DispatchTable {
351 public:
352 class Entry {
353 public:
354 Entry()
355 : from_(0), to_(0), out_set_(NULL) { }
Mads Ager (chromium) 2008/11/25 21:09:41 Put this on one line.
Christian Plesner Hansen 2008/11/26 06:49:56 Fixed.
356 Entry(uc16 from, uc16 to, OutSet* out_set)
357 : from_(from), to_(to), out_set_(out_set) { }
358 uc16 from() { return from_; }
359 uc16 to() { return to_; }
360 void set_to(uc16 value) { to_ = value; }
361 void AddValue(int value) { out_set_ = out_set_->Extend(value); }
362 OutSet* out_set() { return out_set_; }
363 private:
364 uc16 from_;
365 uc16 to_;
366 OutSet* out_set_;
367 };
368
369 class Config {
370 public:
371 typedef uc16 Key;
372 typedef Entry Value;
373 static const uc16 kNoKey;
374 static const Entry kNoValue;
375 static inline int Compare(uc16 a, uc16 b) {
376 if (a == b)
Mads Ager (chromium) 2008/11/25 21:09:41 How about: if (a == b) return 0; if (a < b) retur
Christian Plesner Hansen 2008/11/26 06:49:56 That's a religious question: should there be an 'e
377 return 0;
378 else if (a < b)
379 return -1;
380 else
381 return 1;
382 }
383 };
384
385 void AddRange(CharacterRange range, int value);
386 OutSet* Get(uc16 value);
387 void Dump();
388
389 template <typename Callback>
390 void ForEach(Callback* callback) { return tree()->ForEach(callback); }
391 private:
392 // There can't be a static empty set since it allocates its
393 // successors in a zone and caches them.
394 OutSet* empty() { return &empty_; }
395 OutSet empty_;
396 ZoneSplayTree<Config>* tree() { return &tree_; }
397 ZoneSplayTree<Config> tree_;
398 };
399
400
401 #define FOR_EACH_NODE_TYPE(VISIT) \
402 VISIT(End) \
403 VISIT(Action) \
404 VISIT(Choice) \
405 VISIT(BackReference) \
406 VISIT(Text)
407
408
409 #define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \
410 VISIT(Disjunction) \
411 VISIT(Alternative) \
412 VISIT(Assertion) \
413 VISIT(CharacterClass) \
414 VISIT(Atom) \
415 VISIT(Quantifier) \
416 VISIT(Capture) \
417 VISIT(Lookahead) \
418 VISIT(BackReference) \
419 VISIT(Empty) \
420 VISIT(Text)
421
422
423 #define FORWARD_DECLARE(Name) class RegExp##Name;
424 FOR_EACH_REG_EXP_TREE_TYPE(FORWARD_DECLARE)
425 #undef FORWARD_DECLARE
426
427
428 class TextElement {
429 public:
430 enum Type {UNINITIALIZED, ATOM, CHAR_CLASS};
431 TextElement() : type(UNINITIALIZED) { }
432 explicit TextElement(Type t) : type(t) { }
433 static TextElement Atom(RegExpAtom* atom);
434 static TextElement CharClass(RegExpCharacterClass* char_class);
435 Type type;
436 union {
437 RegExpAtom* u_atom;
438 RegExpCharacterClass* u_char_class;
439 } data;
440 };
441
442
443 struct NodeInfo {
444 NodeInfo()
445 : being_analyzed(false),
446 been_analyzed(false),
447 determine_word(false),
448 determine_newline(false),
449 determine_start(false),
450 follows_word_interest(false),
451 follows_newline_interest(false),
452 follows_start_interest(false) { }
453 bool SameInterests(NodeInfo* that) {
454 return (follows_word_interest == that->follows_word_interest)
455 && (follows_newline_interest == that->follows_newline_interest)
456 && (follows_start_interest == that->follows_start_interest);
457 }
458 void AdoptInterests(NodeInfo* that) {
459 follows_word_interest = that->follows_word_interest;
460 follows_newline_interest = that->follows_newline_interest;
461 follows_start_interest = that->follows_start_interest;
462 }
463 bool prev_determine_word() {
464 return determine_word || follows_word_interest;
465 }
466 bool prev_determine_newline() {
467 return determine_newline || follows_newline_interest;
468 }
469 bool prev_determine_start() {
470 return determine_start || follows_start_interest;
471 }
472 bool being_analyzed: 1;
473 bool been_analyzed: 1;
474 bool determine_word: 1;
475 bool determine_newline: 1;
476 bool determine_start: 1;
477 bool follows_word_interest: 1;
478 bool follows_newline_interest: 1;
479 bool follows_start_interest: 1;
480 };
481
482
483 STATIC_CHECK(sizeof(NodeInfo) <= sizeof(int)); // NOLINT
484
485
486 class SiblingList {
487 public:
488 SiblingList() : list_(NULL) { }
489 int length() {
490 return list_ == NULL ? 0 : list_->length();
491 }
492 void Ensure(RegExpNode* parent) {
493 if (list_ == NULL) {
494 list_ = new ZoneList<RegExpNode*>(2);
495 list_->Add(parent);
496 }
497 }
498 void Add(RegExpNode* node) { list_->Add(node); }
499 RegExpNode* Get(int index) { return list_->at(index); }
500 private:
501 ZoneList<RegExpNode*>* list_;
502 };
503
504
505 class RegExpNode: public ZoneObject {
506 public:
507 virtual ~RegExpNode() { }
508 virtual void Accept(NodeVisitor* visitor) = 0;
509 // Generates a goto to this node or actually generates the code at this point.
510 // Until the implementation is complete we will return true for success and
511 // false for failure.
512 virtual bool GoTo(RegExpCompiler* compiler);
513 Label* label();
514
515 // Until the implementation is complete we will return true for success and
516 // false for failure.
517 virtual bool Emit(RegExpCompiler* compiler) = 0;
518 virtual RegExpNode* PropagateInterest(NodeInfo* info) = 0;
519 NodeInfo* info() { return &info_; }
520 virtual bool IsBacktrack() { return false; }
521 RegExpNode* GetSibling(NodeInfo* info);
522 void EnsureSiblings() { siblings_.Ensure(this); }
523 void AddSibling(RegExpNode* node) { siblings_.Add(node); }
524 protected:
525 inline void Bind(RegExpMacroAssembler* macro);
526 private:
527 Label label_;
528 NodeInfo info_;
529 SiblingList siblings_;
530 };
531
532
533 class SeqRegExpNode: public RegExpNode {
534 public:
535 explicit SeqRegExpNode(RegExpNode* on_success)
536 : on_success_(on_success) { }
537 RegExpNode* on_success() { return on_success_; }
538 void set_on_success(RegExpNode* node) { on_success_ = node; }
539 virtual bool Emit(RegExpCompiler* compiler) { return false; }
540 private:
541 RegExpNode* on_success_;
542 };
543
544
545 class ActionNode: public SeqRegExpNode {
546 public:
547 enum Type {
548 STORE_REGISTER,
549 INCREMENT_REGISTER,
550 STORE_POSITION,
551 SAVE_POSITION,
552 RESTORE_POSITION,
553 BEGIN_SUBMATCH,
554 ESCAPE_SUBMATCH
555 };
556 static ActionNode* StoreRegister(int reg, int val, RegExpNode* on_success);
557 static ActionNode* IncrementRegister(int reg, RegExpNode* on_success);
558 static ActionNode* StorePosition(int reg, RegExpNode* on_success);
559 static ActionNode* SavePosition(int reg, RegExpNode* on_success);
560 static ActionNode* RestorePosition(int reg, RegExpNode* on_success);
561 static ActionNode* BeginSubmatch(int reg, RegExpNode* on_success);
562 static ActionNode* EscapeSubmatch(int reg, RegExpNode* on_success);
563 virtual void Accept(NodeVisitor* visitor);
564 virtual bool Emit(RegExpCompiler* compiler);
565 virtual RegExpNode* PropagateInterest(NodeInfo* info);
566 private:
567 union {
568 struct {
569 int reg;
570 int value;
571 } u_store_register;
572 struct {
573 int reg;
574 } u_increment_register;
575 struct {
576 int reg;
577 } u_position_register;
578 struct {
579 int reg;
580 } u_submatch_stack_pointer_register;
581 } data_;
582 ActionNode(Type type, RegExpNode* on_success)
583 : SeqRegExpNode(on_success),
584 type_(type) { }
585 Type type_;
586 friend class DotPrinter;
587 };
588
589
590 class TextNode: public SeqRegExpNode {
591 public:
592 TextNode(ZoneList<TextElement>* elms,
593 RegExpNode* on_success,
594 RegExpNode* on_failure)
595 : SeqRegExpNode(on_success),
596 on_failure_(on_failure),
597 elms_(elms) { }
598 virtual void Accept(NodeVisitor* visitor);
599 virtual RegExpNode* PropagateInterest(NodeInfo* info);
600 RegExpNode* on_failure() { return on_failure_; }
601 virtual bool Emit(RegExpCompiler* compiler);
602 ZoneList<TextElement>* elements() { return elms_; }
603 private:
604 RegExpNode* on_failure_;
605 ZoneList<TextElement>* elms_;
606 };
607
608
609 class BackReferenceNode: public SeqRegExpNode {
610 public:
611 BackReferenceNode(int start_reg,
612 int end_reg,
613 RegExpNode* on_success,
614 RegExpNode* on_failure)
615 : SeqRegExpNode(on_success),
616 on_failure_(on_failure),
617 start_reg_(start_reg),
618 end_reg_(end_reg) { }
619 virtual void Accept(NodeVisitor* visitor);
620 RegExpNode* on_failure() { return on_failure_; }
621 int start_register() { return start_reg_; }
622 int end_register() { return end_reg_; }
623 virtual bool Emit(RegExpCompiler* compiler);
624 virtual RegExpNode* PropagateInterest(NodeInfo* info);
625 private:
626 RegExpNode* on_failure_;
627 int start_reg_;
628 int end_reg_;
629 };
630
631
632 class EndNode: public RegExpNode {
633 public:
634 enum Action { ACCEPT, BACKTRACK };
635 explicit EndNode(Action action) : action_(action) { }
636 virtual void Accept(NodeVisitor* visitor);
637 virtual bool Emit(RegExpCompiler* compiler);
638 virtual RegExpNode* PropagateInterest(NodeInfo* info);
639 virtual bool IsBacktrack() { return action_ == BACKTRACK; }
640 virtual bool GoTo(RegExpCompiler* compiler);
641 private:
642 Action action_;
643 };
644
645
646 class Guard: public ZoneObject {
647 public:
648 enum Relation { LT, GEQ };
649 Guard(int reg, Relation op, int value)
650 : reg_(reg),
651 op_(op),
652 value_(value) { }
653 int reg() { return reg_; }
654 Relation op() { return op_; }
655 int value() { return value_; }
656 private:
657 int reg_;
658 Relation op_;
659 int value_;
660 };
661
662
663 class GuardedAlternative {
664 public:
665 explicit GuardedAlternative(RegExpNode* node) : node_(node), guards_(NULL) { }
666 void AddGuard(Guard* guard);
667 RegExpNode* node() { return node_; }
668 void set_node(RegExpNode* node) { node_ = node; }
669 ZoneList<Guard*>* guards() { return guards_; }
670 private:
671 RegExpNode* node_;
672 ZoneList<Guard*>* guards_;
673 };
674
675
676 class ChoiceNode: public RegExpNode {
677 public:
678 explicit ChoiceNode(int expected_size, RegExpNode* on_failure)
679 : on_failure_(on_failure),
680 alternatives_(new ZoneList<GuardedAlternative>(expected_size)),
681 table_calculated_(false),
682 being_calculated_(false) { }
683 virtual void Accept(NodeVisitor* visitor);
684 void AddAlternative(GuardedAlternative node) { alternatives()->Add(node); }
685 ZoneList<GuardedAlternative>* alternatives() { return alternatives_; }
686 DispatchTable* table() { return &table_; }
687 RegExpNode* on_failure() { return on_failure_; }
688 virtual bool Emit(RegExpCompiler* compiler);
689 virtual RegExpNode* PropagateInterest(NodeInfo* info);
690 bool table_calculated() { return table_calculated_; }
691 void set_table_calculated(bool b) { table_calculated_ = b; }
692 bool being_calculated() { return being_calculated_; }
693 void set_being_calculated(bool b) { being_calculated_ = b; }
694 private:
695 void GenerateGuard(RegExpMacroAssembler* macro_assembler,
696 Guard *guard,
Erik Corry 2008/11/25 11:03:52 Misplaced asterisk.
Mads Ager (chromium) 2008/11/25 21:09:41 This one is still there.
697 Label* on_failure);
698 RegExpNode* on_failure_;
699 ZoneList<GuardedAlternative>* alternatives_;
700 DispatchTable table_;
701 bool table_calculated_;
702 bool being_calculated_;
703 };
704
705
706 class NodeVisitor {
707 public:
708 virtual ~NodeVisitor() { }
709 #define DECLARE_VISIT(Type) \
710 virtual void Visit##Type(Type##Node* that) = 0;
711 FOR_EACH_NODE_TYPE(DECLARE_VISIT)
712 #undef DECLARE_VISIT
713 };
714
715
716 // Node visitor used to add the start set of the alternatives to the
717 // dispatch table of a choice node.
718 class DispatchTableConstructor: public NodeVisitor {
719 public:
720 explicit DispatchTableConstructor(DispatchTable* table)
721 : table_(table),
722 choice_index_(-1) { }
723
724 void BuildTable(ChoiceNode* node);
725
726 void AddRange(CharacterRange range) {
727 table()->AddRange(range, choice_index_);
728 }
729
730 void AddInverse(ZoneList<CharacterRange>* ranges);
731
732 #define DECLARE_VISIT(Type) \
733 virtual void Visit##Type(Type##Node* that);
734 FOR_EACH_NODE_TYPE(DECLARE_VISIT)
735 #undef DECLARE_VISIT
736
737 DispatchTable* table() { return table_; }
738 void set_choice_index(int value) { choice_index_ = value; }
739
740 protected:
741 DispatchTable *table_;
Erik Corry 2008/11/25 11:03:52 Misplaced asterisk.
Mads Ager (chromium) 2008/11/25 21:09:41 This is still there.
742 int choice_index_;
743 };
744
745
746 class Analysis: public NodeVisitor {
747 public:
748 void EnsureAnalyzed(RegExpNode* node);
749
750 #define DECLARE_VISIT(Type) \
751 virtual void Visit##Type(Type##Node* that);
752 FOR_EACH_NODE_TYPE(DECLARE_VISIT)
753 #undef DECLARE_VISIT
754 };
755
756
757 struct RegExpParseResult {
758 RegExpTree* tree;
759 bool has_character_escapes;
760 Handle<String> error;
761 int capture_count;
762 };
763
764
765 class RegExpEngine: public AllStatic {
766 public:
767 static Handle<FixedArray> Compile(RegExpParseResult* input,
768 RegExpNode** node_return,
769 bool ignore_case);
770 static void DotPrint(const char* label, RegExpNode* node);
771 };
772
773
128 } } // namespace v8::internal 774 } } // namespace v8::internal
129 775
130 #endif // V8_JSREGEXP_H_ 776 #endif // V8_JSREGEXP_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698