OLD | NEW |
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef V8_REGEXP_JSREGEXP_H_ | 5 #ifndef V8_REGEXP_JSREGEXP_H_ |
6 #define V8_REGEXP_JSREGEXP_H_ | 6 #define V8_REGEXP_JSREGEXP_H_ |
7 | 7 |
8 #include "src/allocation.h" | 8 #include "src/allocation.h" |
9 #include "src/assembler.h" | 9 #include "src/assembler.h" |
10 #include "src/regexp/regexp-ast.h" | 10 #include "src/regexp/regexp-ast.h" |
(...skipping 247 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
258 | 258 |
259 // A mapping from integers, specified as ranges, to a set of integers. | 259 // A mapping from integers, specified as ranges, to a set of integers. |
260 // Used for mapping character ranges to choices. | 260 // Used for mapping character ranges to choices. |
261 class DispatchTable : public ZoneObject { | 261 class DispatchTable : public ZoneObject { |
262 public: | 262 public: |
263 explicit DispatchTable(Zone* zone) : tree_(zone) { } | 263 explicit DispatchTable(Zone* zone) : tree_(zone) { } |
264 | 264 |
265 class Entry { | 265 class Entry { |
266 public: | 266 public: |
267 Entry() : from_(0), to_(0), out_set_(NULL) { } | 267 Entry() : from_(0), to_(0), out_set_(NULL) { } |
268 Entry(uc16 from, uc16 to, OutSet* out_set) | 268 Entry(uc32 from, uc32 to, OutSet* out_set) |
269 : from_(from), to_(to), out_set_(out_set) { } | 269 : from_(from), to_(to), out_set_(out_set) {} |
270 uc16 from() { return from_; } | 270 uc32 from() { return from_; } |
271 uc16 to() { return to_; } | 271 uc32 to() { return to_; } |
272 void set_to(uc16 value) { to_ = value; } | 272 void set_to(uc32 value) { to_ = value; } |
273 void AddValue(int value, Zone* zone) { | 273 void AddValue(int value, Zone* zone) { |
274 out_set_ = out_set_->Extend(value, zone); | 274 out_set_ = out_set_->Extend(value, zone); |
275 } | 275 } |
276 OutSet* out_set() { return out_set_; } | 276 OutSet* out_set() { return out_set_; } |
277 private: | 277 private: |
278 uc16 from_; | 278 uc32 from_; |
279 uc16 to_; | 279 uc32 to_; |
280 OutSet* out_set_; | 280 OutSet* out_set_; |
281 }; | 281 }; |
282 | 282 |
283 class Config { | 283 class Config { |
284 public: | 284 public: |
285 typedef uc16 Key; | 285 typedef uc32 Key; |
286 typedef Entry Value; | 286 typedef Entry Value; |
287 static const uc16 kNoKey; | 287 static const uc32 kNoKey; |
288 static const Entry NoValue() { return Value(); } | 288 static const Entry NoValue() { return Value(); } |
289 static inline int Compare(uc16 a, uc16 b) { | 289 static inline int Compare(uc32 a, uc32 b) { |
290 if (a == b) | 290 if (a == b) |
291 return 0; | 291 return 0; |
292 else if (a < b) | 292 else if (a < b) |
293 return -1; | 293 return -1; |
294 else | 294 else |
295 return 1; | 295 return 1; |
296 } | 296 } |
297 }; | 297 }; |
298 | 298 |
299 void AddRange(CharacterRange range, int value, Zone* zone); | 299 void AddRange(CharacterRange range, int value, Zone* zone); |
300 OutSet* Get(uc16 value); | 300 OutSet* Get(uc32 value); |
301 void Dump(); | 301 void Dump(); |
302 | 302 |
303 template <typename Callback> | 303 template <typename Callback> |
304 void ForEach(Callback* callback) { | 304 void ForEach(Callback* callback) { |
305 return tree()->ForEach(callback); | 305 return tree()->ForEach(callback); |
306 } | 306 } |
307 | 307 |
308 private: | 308 private: |
309 // There can't be a static empty set since it allocates its | 309 // There can't be a static empty set since it allocates its |
310 // successors in a zone and caches them. | 310 // successors in a zone and caches them. |
311 OutSet* empty() { return &empty_; } | 311 OutSet* empty() { return &empty_; } |
312 OutSet empty_; | 312 OutSet empty_; |
313 ZoneSplayTree<Config>* tree() { return &tree_; } | 313 ZoneSplayTree<Config>* tree() { return &tree_; } |
314 ZoneSplayTree<Config> tree_; | 314 ZoneSplayTree<Config> tree_; |
315 }; | 315 }; |
316 | 316 |
317 | 317 |
| 318 // Categorizes character ranges into BMP, non-BMP, lead, and trail surrogates. |
| 319 class UnicodeRangeSplitter { |
| 320 public: |
| 321 UnicodeRangeSplitter(Zone* zone, ZoneList<CharacterRange>* base); |
| 322 void Call(uc32 from, DispatchTable::Entry entry); |
| 323 |
| 324 ZoneList<CharacterRange>* bmp() { return bmp_; } |
| 325 ZoneList<CharacterRange>* lead_surrogates() { return lead_surrogates_; } |
| 326 ZoneList<CharacterRange>* trail_surrogates() { return trail_surrogates_; } |
| 327 ZoneList<CharacterRange>* non_bmp() const { return non_bmp_; } |
| 328 |
| 329 private: |
| 330 static const int kBase = 0; |
| 331 // Separate ranges into |
| 332 static const int kBmpCodePoints = 1; |
| 333 static const int kLeadSurrogates = 2; |
| 334 static const int kTrailSurrogates = 3; |
| 335 static const int kNonBmpCodePoints = 4; |
| 336 |
| 337 Zone* zone_; |
| 338 DispatchTable table_; |
| 339 ZoneList<CharacterRange>* bmp_; |
| 340 ZoneList<CharacterRange>* lead_surrogates_; |
| 341 ZoneList<CharacterRange>* trail_surrogates_; |
| 342 ZoneList<CharacterRange>* non_bmp_; |
| 343 }; |
| 344 |
| 345 |
318 #define FOR_EACH_NODE_TYPE(VISIT) \ | 346 #define FOR_EACH_NODE_TYPE(VISIT) \ |
319 VISIT(End) \ | 347 VISIT(End) \ |
320 VISIT(Action) \ | 348 VISIT(Action) \ |
321 VISIT(Choice) \ | 349 VISIT(Choice) \ |
322 VISIT(BackReference) \ | 350 VISIT(BackReference) \ |
323 VISIT(Assertion) \ | 351 VISIT(Assertion) \ |
324 VISIT(Text) | 352 VISIT(Text) |
325 | 353 |
326 | 354 |
327 class Trace; | 355 class Trace; |
(...skipping 355 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
683 TextNode(ZoneList<TextElement>* elms, bool read_backward, | 711 TextNode(ZoneList<TextElement>* elms, bool read_backward, |
684 RegExpNode* on_success) | 712 RegExpNode* on_success) |
685 : SeqRegExpNode(on_success), elms_(elms), read_backward_(read_backward) {} | 713 : SeqRegExpNode(on_success), elms_(elms), read_backward_(read_backward) {} |
686 TextNode(RegExpCharacterClass* that, bool read_backward, | 714 TextNode(RegExpCharacterClass* that, bool read_backward, |
687 RegExpNode* on_success) | 715 RegExpNode* on_success) |
688 : SeqRegExpNode(on_success), | 716 : SeqRegExpNode(on_success), |
689 elms_(new (zone()) ZoneList<TextElement>(1, zone())), | 717 elms_(new (zone()) ZoneList<TextElement>(1, zone())), |
690 read_backward_(read_backward) { | 718 read_backward_(read_backward) { |
691 elms_->Add(TextElement::CharClass(that), zone()); | 719 elms_->Add(TextElement::CharClass(that), zone()); |
692 } | 720 } |
| 721 // Create TextNode for a single character class for the given ranges. |
| 722 static TextNode* CreateForCharacterRanges(Zone* zone, |
| 723 ZoneList<CharacterRange>* ranges, |
| 724 bool read_backward, |
| 725 RegExpNode* on_success); |
| 726 // Create TextNode for a surrogate pair with a range given for the |
| 727 // lead and the trail surrogate each. |
| 728 static TextNode* CreateForSurrogatePair(Zone* zone, CharacterRange lead, |
| 729 CharacterRange trail, |
| 730 bool read_backward, |
| 731 RegExpNode* on_success); |
693 virtual void Accept(NodeVisitor* visitor); | 732 virtual void Accept(NodeVisitor* visitor); |
694 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 733 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
695 virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start); | 734 virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start); |
696 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 735 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
697 RegExpCompiler* compiler, | 736 RegExpCompiler* compiler, |
698 int characters_filled_in, | 737 int characters_filled_in, |
699 bool not_at_start); | 738 bool not_at_start); |
700 ZoneList<TextElement>* elements() { return elms_; } | 739 ZoneList<TextElement>* elements() { return elms_; } |
701 bool read_backward() { return read_backward_; } | 740 bool read_backward() { return read_backward_; } |
702 void MakeCaseIndependent(Isolate* isolate, bool is_one_byte); | 741 void MakeCaseIndependent(Isolate* isolate, bool is_one_byte); |
(...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
806 private: | 845 private: |
807 int start_reg_; | 846 int start_reg_; |
808 int end_reg_; | 847 int end_reg_; |
809 bool read_backward_; | 848 bool read_backward_; |
810 }; | 849 }; |
811 | 850 |
812 | 851 |
813 class EndNode: public RegExpNode { | 852 class EndNode: public RegExpNode { |
814 public: | 853 public: |
815 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; | 854 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; |
816 explicit EndNode(Action action, Zone* zone) | 855 EndNode(Action action, Zone* zone) : RegExpNode(zone), action_(action) {} |
817 : RegExpNode(zone), action_(action) { } | |
818 virtual void Accept(NodeVisitor* visitor); | 856 virtual void Accept(NodeVisitor* visitor); |
819 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 857 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
820 virtual int EatsAtLeast(int still_to_find, | 858 virtual int EatsAtLeast(int still_to_find, |
821 int recursion_depth, | 859 int recursion_depth, |
822 bool not_at_start) { return 0; } | 860 bool not_at_start) { return 0; } |
823 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 861 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
824 RegExpCompiler* compiler, | 862 RegExpCompiler* compiler, |
825 int characters_filled_in, | 863 int characters_filled_in, |
826 bool not_at_start) { | 864 bool not_at_start) { |
827 // Returning 0 from EatsAtLeast should ensure we never get here. | 865 // Returning 0 from EatsAtLeast should ensure we never get here. |
(...skipping 670 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1498 code(isolate->heap()->the_hole_value()), | 1536 code(isolate->heap()->the_hole_value()), |
1499 num_registers(0) {} | 1537 num_registers(0) {} |
1500 CompilationResult(Object* code, int registers) | 1538 CompilationResult(Object* code, int registers) |
1501 : error_message(NULL), code(code), num_registers(registers) {} | 1539 : error_message(NULL), code(code), num_registers(registers) {} |
1502 const char* error_message; | 1540 const char* error_message; |
1503 Object* code; | 1541 Object* code; |
1504 int num_registers; | 1542 int num_registers; |
1505 }; | 1543 }; |
1506 | 1544 |
1507 static CompilationResult Compile(Isolate* isolate, Zone* zone, | 1545 static CompilationResult Compile(Isolate* isolate, Zone* zone, |
1508 RegExpCompileData* input, bool ignore_case, | 1546 RegExpCompileData* input, |
1509 bool global, bool multiline, bool sticky, | 1547 JSRegExp::Flags flags, |
1510 Handle<String> pattern, | 1548 Handle<String> pattern, |
1511 Handle<String> sample_subject, | 1549 Handle<String> sample_subject, |
1512 bool is_one_byte); | 1550 bool is_one_byte); |
1513 | 1551 |
1514 static bool TooMuchRegExpCode(Handle<String> pattern); | 1552 static bool TooMuchRegExpCode(Handle<String> pattern); |
1515 | 1553 |
1516 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); | 1554 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); |
1517 }; | 1555 }; |
1518 | 1556 |
1519 | 1557 |
(...skipping 18 matching lines...) Expand all Loading... |
1538 static const int kStringOffset = 0; | 1576 static const int kStringOffset = 0; |
1539 static const int kPatternOffset = 1; | 1577 static const int kPatternOffset = 1; |
1540 static const int kArrayOffset = 2; | 1578 static const int kArrayOffset = 2; |
1541 static const int kLastMatchOffset = 3; | 1579 static const int kLastMatchOffset = 3; |
1542 }; | 1580 }; |
1543 | 1581 |
1544 } // namespace internal | 1582 } // namespace internal |
1545 } // namespace v8 | 1583 } // namespace v8 |
1546 | 1584 |
1547 #endif // V8_REGEXP_JSREGEXP_H_ | 1585 #endif // V8_REGEXP_JSREGEXP_H_ |
OLD | NEW |