OLD | NEW |
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef V8_REGEXP_JSREGEXP_H_ | 5 #ifndef V8_REGEXP_JSREGEXP_H_ |
6 #define V8_REGEXP_JSREGEXP_H_ | 6 #define V8_REGEXP_JSREGEXP_H_ |
7 | 7 |
8 #include "src/allocation.h" | 8 #include "src/allocation.h" |
9 #include "src/assembler.h" | 9 #include "src/assembler.h" |
10 #include "src/regexp/regexp-ast.h" | 10 #include "src/regexp/regexp-ast.h" |
(...skipping 247 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
258 | 258 |
259 // A mapping from integers, specified as ranges, to a set of integers. | 259 // A mapping from integers, specified as ranges, to a set of integers. |
260 // Used for mapping character ranges to choices. | 260 // Used for mapping character ranges to choices. |
261 class DispatchTable : public ZoneObject { | 261 class DispatchTable : public ZoneObject { |
262 public: | 262 public: |
263 explicit DispatchTable(Zone* zone) : tree_(zone) { } | 263 explicit DispatchTable(Zone* zone) : tree_(zone) { } |
264 | 264 |
265 class Entry { | 265 class Entry { |
266 public: | 266 public: |
267 Entry() : from_(0), to_(0), out_set_(NULL) { } | 267 Entry() : from_(0), to_(0), out_set_(NULL) { } |
268 Entry(uc32 from, uc32 to, OutSet* out_set) | 268 Entry(uc16 from, uc16 to, OutSet* out_set) |
269 : from_(from), to_(to), out_set_(out_set) {} | 269 : from_(from), to_(to), out_set_(out_set) { } |
270 uc32 from() { return from_; } | 270 uc16 from() { return from_; } |
271 uc32 to() { return to_; } | 271 uc16 to() { return to_; } |
272 void set_to(uc32 value) { to_ = value; } | 272 void set_to(uc16 value) { to_ = value; } |
273 void AddValue(int value, Zone* zone) { | 273 void AddValue(int value, Zone* zone) { |
274 out_set_ = out_set_->Extend(value, zone); | 274 out_set_ = out_set_->Extend(value, zone); |
275 } | 275 } |
276 OutSet* out_set() { return out_set_; } | 276 OutSet* out_set() { return out_set_; } |
277 private: | 277 private: |
278 uc32 from_; | 278 uc16 from_; |
279 uc32 to_; | 279 uc16 to_; |
280 OutSet* out_set_; | 280 OutSet* out_set_; |
281 }; | 281 }; |
282 | 282 |
283 class Config { | 283 class Config { |
284 public: | 284 public: |
285 typedef uc32 Key; | 285 typedef uc16 Key; |
286 typedef Entry Value; | 286 typedef Entry Value; |
287 static const uc32 kNoKey; | 287 static const uc16 kNoKey; |
288 static const Entry NoValue() { return Value(); } | 288 static const Entry NoValue() { return Value(); } |
289 static inline int Compare(uc32 a, uc32 b) { | 289 static inline int Compare(uc16 a, uc16 b) { |
290 if (a == b) | 290 if (a == b) |
291 return 0; | 291 return 0; |
292 else if (a < b) | 292 else if (a < b) |
293 return -1; | 293 return -1; |
294 else | 294 else |
295 return 1; | 295 return 1; |
296 } | 296 } |
297 }; | 297 }; |
298 | 298 |
299 void AddRange(CharacterRange range, int value, Zone* zone); | 299 void AddRange(CharacterRange range, int value, Zone* zone); |
300 OutSet* Get(uc32 value); | 300 OutSet* Get(uc16 value); |
301 void Dump(); | 301 void Dump(); |
302 | 302 |
303 template <typename Callback> | 303 template <typename Callback> |
304 void ForEach(Callback* callback) { | 304 void ForEach(Callback* callback) { |
305 return tree()->ForEach(callback); | 305 return tree()->ForEach(callback); |
306 } | 306 } |
307 | 307 |
308 private: | 308 private: |
309 // There can't be a static empty set since it allocates its | 309 // There can't be a static empty set since it allocates its |
310 // successors in a zone and caches them. | 310 // successors in a zone and caches them. |
311 OutSet* empty() { return &empty_; } | 311 OutSet* empty() { return &empty_; } |
312 OutSet empty_; | 312 OutSet empty_; |
313 ZoneSplayTree<Config>* tree() { return &tree_; } | 313 ZoneSplayTree<Config>* tree() { return &tree_; } |
314 ZoneSplayTree<Config> tree_; | 314 ZoneSplayTree<Config> tree_; |
315 }; | 315 }; |
316 | 316 |
317 | 317 |
318 // Categorizes character ranges into BMP, non-BMP, lead, and trail surrogates. | |
319 class UnicodeRangeSplitter { | |
320 public: | |
321 UnicodeRangeSplitter(Zone* zone, ZoneList<CharacterRange>* base); | |
322 void Call(uc32 from, DispatchTable::Entry entry); | |
323 | |
324 ZoneList<CharacterRange>* bmp() { return bmp_; } | |
325 ZoneList<CharacterRange>* lead_surrogates() { return lead_surrogates_; } | |
326 ZoneList<CharacterRange>* trail_surrogates() { return trail_surrogates_; } | |
327 ZoneList<CharacterRange>* non_bmp() const { return non_bmp_; } | |
328 | |
329 private: | |
330 static const int kBase = 0; | |
331 // Separate ranges into | |
332 static const int kBmpCodePoints = 1; | |
333 static const int kLeadSurrogates = 2; | |
334 static const int kTrailSurrogates = 3; | |
335 static const int kNonBmpCodePoints = 4; | |
336 | |
337 Zone* zone_; | |
338 DispatchTable table_; | |
339 ZoneList<CharacterRange>* bmp_; | |
340 ZoneList<CharacterRange>* lead_surrogates_; | |
341 ZoneList<CharacterRange>* trail_surrogates_; | |
342 ZoneList<CharacterRange>* non_bmp_; | |
343 }; | |
344 | |
345 | |
346 #define FOR_EACH_NODE_TYPE(VISIT) \ | 318 #define FOR_EACH_NODE_TYPE(VISIT) \ |
347 VISIT(End) \ | 319 VISIT(End) \ |
348 VISIT(Action) \ | 320 VISIT(Action) \ |
349 VISIT(Choice) \ | 321 VISIT(Choice) \ |
350 VISIT(BackReference) \ | 322 VISIT(BackReference) \ |
351 VISIT(Assertion) \ | 323 VISIT(Assertion) \ |
352 VISIT(Text) | 324 VISIT(Text) |
353 | 325 |
354 | 326 |
355 class Trace; | 327 class Trace; |
(...skipping 355 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
711 TextNode(ZoneList<TextElement>* elms, bool read_backward, | 683 TextNode(ZoneList<TextElement>* elms, bool read_backward, |
712 RegExpNode* on_success) | 684 RegExpNode* on_success) |
713 : SeqRegExpNode(on_success), elms_(elms), read_backward_(read_backward) {} | 685 : SeqRegExpNode(on_success), elms_(elms), read_backward_(read_backward) {} |
714 TextNode(RegExpCharacterClass* that, bool read_backward, | 686 TextNode(RegExpCharacterClass* that, bool read_backward, |
715 RegExpNode* on_success) | 687 RegExpNode* on_success) |
716 : SeqRegExpNode(on_success), | 688 : SeqRegExpNode(on_success), |
717 elms_(new (zone()) ZoneList<TextElement>(1, zone())), | 689 elms_(new (zone()) ZoneList<TextElement>(1, zone())), |
718 read_backward_(read_backward) { | 690 read_backward_(read_backward) { |
719 elms_->Add(TextElement::CharClass(that), zone()); | 691 elms_->Add(TextElement::CharClass(that), zone()); |
720 } | 692 } |
721 // Create TextNode for a single character class for the given ranges. | |
722 static TextNode* CreateForCharacterRanges(Zone* zone, | |
723 ZoneList<CharacterRange>* ranges, | |
724 bool read_backward, | |
725 RegExpNode* on_success); | |
726 // Create TextNode for a surrogate pair with a range given for the | |
727 // lead and the trail surrogate each. | |
728 static TextNode* CreateForSurrogatePair(Zone* zone, CharacterRange lead, | |
729 CharacterRange trail, | |
730 bool read_backward, | |
731 RegExpNode* on_success); | |
732 virtual void Accept(NodeVisitor* visitor); | 693 virtual void Accept(NodeVisitor* visitor); |
733 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 694 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
734 virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start); | 695 virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start); |
735 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 696 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
736 RegExpCompiler* compiler, | 697 RegExpCompiler* compiler, |
737 int characters_filled_in, | 698 int characters_filled_in, |
738 bool not_at_start); | 699 bool not_at_start); |
739 ZoneList<TextElement>* elements() { return elms_; } | 700 ZoneList<TextElement>* elements() { return elms_; } |
740 bool read_backward() { return read_backward_; } | 701 bool read_backward() { return read_backward_; } |
741 void MakeCaseIndependent(Isolate* isolate, bool is_one_byte); | 702 void MakeCaseIndependent(Isolate* isolate, bool is_one_byte); |
(...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
845 private: | 806 private: |
846 int start_reg_; | 807 int start_reg_; |
847 int end_reg_; | 808 int end_reg_; |
848 bool read_backward_; | 809 bool read_backward_; |
849 }; | 810 }; |
850 | 811 |
851 | 812 |
852 class EndNode: public RegExpNode { | 813 class EndNode: public RegExpNode { |
853 public: | 814 public: |
854 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; | 815 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; |
855 EndNode(Action action, Zone* zone) : RegExpNode(zone), action_(action) {} | 816 explicit EndNode(Action action, Zone* zone) |
| 817 : RegExpNode(zone), action_(action) { } |
856 virtual void Accept(NodeVisitor* visitor); | 818 virtual void Accept(NodeVisitor* visitor); |
857 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 819 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
858 virtual int EatsAtLeast(int still_to_find, | 820 virtual int EatsAtLeast(int still_to_find, |
859 int recursion_depth, | 821 int recursion_depth, |
860 bool not_at_start) { return 0; } | 822 bool not_at_start) { return 0; } |
861 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 823 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
862 RegExpCompiler* compiler, | 824 RegExpCompiler* compiler, |
863 int characters_filled_in, | 825 int characters_filled_in, |
864 bool not_at_start) { | 826 bool not_at_start) { |
865 // Returning 0 from EatsAtLeast should ensure we never get here. | 827 // Returning 0 from EatsAtLeast should ensure we never get here. |
(...skipping 670 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1536 code(isolate->heap()->the_hole_value()), | 1498 code(isolate->heap()->the_hole_value()), |
1537 num_registers(0) {} | 1499 num_registers(0) {} |
1538 CompilationResult(Object* code, int registers) | 1500 CompilationResult(Object* code, int registers) |
1539 : error_message(NULL), code(code), num_registers(registers) {} | 1501 : error_message(NULL), code(code), num_registers(registers) {} |
1540 const char* error_message; | 1502 const char* error_message; |
1541 Object* code; | 1503 Object* code; |
1542 int num_registers; | 1504 int num_registers; |
1543 }; | 1505 }; |
1544 | 1506 |
1545 static CompilationResult Compile(Isolate* isolate, Zone* zone, | 1507 static CompilationResult Compile(Isolate* isolate, Zone* zone, |
1546 RegExpCompileData* input, | 1508 RegExpCompileData* input, bool ignore_case, |
1547 JSRegExp::Flags flags, | 1509 bool global, bool multiline, bool sticky, |
1548 Handle<String> pattern, | 1510 Handle<String> pattern, |
1549 Handle<String> sample_subject, | 1511 Handle<String> sample_subject, |
1550 bool is_one_byte); | 1512 bool is_one_byte); |
1551 | 1513 |
1552 static bool TooMuchRegExpCode(Handle<String> pattern); | 1514 static bool TooMuchRegExpCode(Handle<String> pattern); |
1553 | 1515 |
1554 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); | 1516 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); |
1555 }; | 1517 }; |
1556 | 1518 |
1557 | 1519 |
(...skipping 18 matching lines...) Expand all Loading... |
1576 static const int kStringOffset = 0; | 1538 static const int kStringOffset = 0; |
1577 static const int kPatternOffset = 1; | 1539 static const int kPatternOffset = 1; |
1578 static const int kArrayOffset = 2; | 1540 static const int kArrayOffset = 2; |
1579 static const int kLastMatchOffset = 3; | 1541 static const int kLastMatchOffset = 3; |
1580 }; | 1542 }; |
1581 | 1543 |
1582 } // namespace internal | 1544 } // namespace internal |
1583 } // namespace v8 | 1545 } // namespace v8 |
1584 | 1546 |
1585 #endif // V8_REGEXP_JSREGEXP_H_ | 1547 #endif // V8_REGEXP_JSREGEXP_H_ |
OLD | NEW |