Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef V8_REGEXP_JSREGEXP_H_ | 5 #ifndef V8_REGEXP_JSREGEXP_H_ |
| 6 #define V8_REGEXP_JSREGEXP_H_ | 6 #define V8_REGEXP_JSREGEXP_H_ |
| 7 | 7 |
| 8 #include "src/allocation.h" | 8 #include "src/allocation.h" |
| 9 #include "src/assembler.h" | 9 #include "src/assembler.h" |
| 10 #include "src/regexp/regexp-ast.h" | 10 #include "src/regexp/regexp-ast.h" |
| (...skipping 247 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 258 | 258 |
| 259 // A mapping from integers, specified as ranges, to a set of integers. | 259 // A mapping from integers, specified as ranges, to a set of integers. |
| 260 // Used for mapping character ranges to choices. | 260 // Used for mapping character ranges to choices. |
| 261 class DispatchTable : public ZoneObject { | 261 class DispatchTable : public ZoneObject { |
| 262 public: | 262 public: |
| 263 explicit DispatchTable(Zone* zone) : tree_(zone) { } | 263 explicit DispatchTable(Zone* zone) : tree_(zone) { } |
| 264 | 264 |
| 265 class Entry { | 265 class Entry { |
| 266 public: | 266 public: |
| 267 Entry() : from_(0), to_(0), out_set_(NULL) { } | 267 Entry() : from_(0), to_(0), out_set_(NULL) { } |
| 268 Entry(uc16 from, uc16 to, OutSet* out_set) | 268 Entry(uc32 from, uc32 to, OutSet* out_set) |
| 269 : from_(from), to_(to), out_set_(out_set) { } | 269 : from_(from), to_(to), out_set_(out_set) {} |
| 270 uc16 from() { return from_; } | 270 uc32 from() { return from_; } |
| 271 uc16 to() { return to_; } | 271 uc32 to() { return to_; } |
| 272 void set_to(uc16 value) { to_ = value; } | 272 void set_to(uc32 value) { to_ = value; } |
| 273 void AddValue(int value, Zone* zone) { | 273 void AddValue(int value, Zone* zone) { |
| 274 out_set_ = out_set_->Extend(value, zone); | 274 out_set_ = out_set_->Extend(value, zone); |
| 275 } | 275 } |
| 276 OutSet* out_set() { return out_set_; } | 276 OutSet* out_set() { return out_set_; } |
| 277 private: | 277 private: |
| 278 uc16 from_; | 278 uc32 from_; |
| 279 uc16 to_; | 279 uc32 to_; |
| 280 OutSet* out_set_; | 280 OutSet* out_set_; |
| 281 }; | 281 }; |
| 282 | 282 |
| 283 class Config { | 283 class Config { |
| 284 public: | 284 public: |
| 285 typedef uc16 Key; | 285 typedef uc32 Key; |
| 286 typedef Entry Value; | 286 typedef Entry Value; |
| 287 static const uc16 kNoKey; | 287 static const uc32 kNoKey; |
| 288 static const Entry NoValue() { return Value(); } | 288 static const Entry NoValue() { return Value(); } |
| 289 static inline int Compare(uc16 a, uc16 b) { | 289 static inline int Compare(uc32 a, uc32 b) { |
| 290 if (a == b) | 290 if (a == b) |
| 291 return 0; | 291 return 0; |
| 292 else if (a < b) | 292 else if (a < b) |
| 293 return -1; | 293 return -1; |
| 294 else | 294 else |
| 295 return 1; | 295 return 1; |
| 296 } | 296 } |
| 297 }; | 297 }; |
| 298 | 298 |
| 299 void AddRange(CharacterRange range, int value, Zone* zone); | 299 void AddRange(CharacterRange range, int value, Zone* zone); |
| 300 OutSet* Get(uc16 value); | 300 OutSet* Get(uc32 value); |
| 301 void Dump(); | 301 void Dump(); |
| 302 | 302 |
| 303 template <typename Callback> | 303 template <typename Callback> |
| 304 void ForEach(Callback* callback) { | 304 void ForEach(Callback* callback) { |
| 305 return tree()->ForEach(callback); | 305 return tree()->ForEach(callback); |
| 306 } | 306 } |
| 307 | 307 |
| 308 private: | 308 private: |
| 309 // There can't be a static empty set since it allocates its | 309 // There can't be a static empty set since it allocates its |
| 310 // successors in a zone and caches them. | 310 // successors in a zone and caches them. |
| 311 OutSet* empty() { return &empty_; } | 311 OutSet* empty() { return &empty_; } |
| 312 OutSet empty_; | 312 OutSet empty_; |
| 313 ZoneSplayTree<Config>* tree() { return &tree_; } | 313 ZoneSplayTree<Config>* tree() { return &tree_; } |
| 314 ZoneSplayTree<Config> tree_; | 314 ZoneSplayTree<Config> tree_; |
| 315 }; | 315 }; |
| 316 | 316 |
| 317 | 317 |
| 318 // Categorizes character ranges into BMP, non-BMP, lead, and trail surrogates. | |
| 319 class UnicodeRangeSplitter { | |
| 320 public: | |
| 321 UnicodeRangeSplitter(Zone* zone, ZoneList<CharacterRange>* base); | |
| 322 void Call(uc32 from, DispatchTable::Entry entry); | |
| 323 | |
| 324 ZoneList<CharacterRange>* bmp() { return bmp_; } | |
| 325 ZoneList<CharacterRange>* lead_surrogates() { return lead_surrogates_; } | |
| 326 ZoneList<CharacterRange>* trail_surrogates() { return trail_surrogates_; } | |
| 327 ZoneList<CharacterRange>* non_bmp() const { return non_bmp_; } | |
| 328 | |
| 329 private: | |
| 330 static const int kBase = 0; | |
| 331 // Separate ranges into | |
| 332 static const int kBmpCodePoints = 1; | |
| 333 static const int kLeadSurrogates = 2; | |
| 334 static const int kTrailSurrogates = 3; | |
| 335 static const int kNonBmpCodePoints = 4; | |
| 336 | |
| 337 Zone* zone_; | |
| 338 DispatchTable table_; | |
| 339 ZoneList<CharacterRange>* bmp_; | |
| 340 ZoneList<CharacterRange>* lead_surrogates_; | |
| 341 ZoneList<CharacterRange>* trail_surrogates_; | |
| 342 ZoneList<CharacterRange>* non_bmp_; | |
| 343 }; | |
| 344 | |
| 345 | |
| 318 #define FOR_EACH_NODE_TYPE(VISIT) \ | 346 #define FOR_EACH_NODE_TYPE(VISIT) \ |
| 319 VISIT(End) \ | 347 VISIT(End) \ |
| 320 VISIT(Action) \ | 348 VISIT(Action) \ |
| 321 VISIT(Choice) \ | 349 VISIT(Choice) \ |
| 322 VISIT(BackReference) \ | 350 VISIT(BackReference) \ |
| 323 VISIT(Assertion) \ | 351 VISIT(Assertion) \ |
| 324 VISIT(Text) | 352 VISIT(Text) |
| 325 | 353 |
| 326 | 354 |
| 327 class Trace; | 355 class Trace; |
| (...skipping 355 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 683 TextNode(ZoneList<TextElement>* elms, bool read_backward, | 711 TextNode(ZoneList<TextElement>* elms, bool read_backward, |
| 684 RegExpNode* on_success) | 712 RegExpNode* on_success) |
| 685 : SeqRegExpNode(on_success), elms_(elms), read_backward_(read_backward) {} | 713 : SeqRegExpNode(on_success), elms_(elms), read_backward_(read_backward) {} |
| 686 TextNode(RegExpCharacterClass* that, bool read_backward, | 714 TextNode(RegExpCharacterClass* that, bool read_backward, |
| 687 RegExpNode* on_success) | 715 RegExpNode* on_success) |
| 688 : SeqRegExpNode(on_success), | 716 : SeqRegExpNode(on_success), |
| 689 elms_(new (zone()) ZoneList<TextElement>(1, zone())), | 717 elms_(new (zone()) ZoneList<TextElement>(1, zone())), |
| 690 read_backward_(read_backward) { | 718 read_backward_(read_backward) { |
| 691 elms_->Add(TextElement::CharClass(that), zone()); | 719 elms_->Add(TextElement::CharClass(that), zone()); |
| 692 } | 720 } |
| 721 static TextNode* CharacterRanges(Zone* zone, ZoneList<CharacterRange>* ranges, | |
|
erikcorry
2016/01/20 10:47:05
These two could use a 1-line comment explaining wh
Yang
2016/01/20 13:06:20
Done.
| |
| 722 bool read_backward, RegExpNode* on_success); | |
| 723 static TextNode* SurrogatePair(Zone* zone, CharacterRange lead, | |
| 724 CharacterRange trail, bool read_backward, | |
| 725 RegExpNode* on_success); | |
| 693 virtual void Accept(NodeVisitor* visitor); | 726 virtual void Accept(NodeVisitor* visitor); |
| 694 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 727 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
| 695 virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start); | 728 virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start); |
| 696 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 729 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 697 RegExpCompiler* compiler, | 730 RegExpCompiler* compiler, |
| 698 int characters_filled_in, | 731 int characters_filled_in, |
| 699 bool not_at_start); | 732 bool not_at_start); |
| 700 ZoneList<TextElement>* elements() { return elms_; } | 733 ZoneList<TextElement>* elements() { return elms_; } |
| 701 bool read_backward() { return read_backward_; } | 734 bool read_backward() { return read_backward_; } |
| 702 void MakeCaseIndependent(Isolate* isolate, bool is_one_byte); | 735 void MakeCaseIndependent(Isolate* isolate, bool is_one_byte); |
| (...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 806 private: | 839 private: |
| 807 int start_reg_; | 840 int start_reg_; |
| 808 int end_reg_; | 841 int end_reg_; |
| 809 bool read_backward_; | 842 bool read_backward_; |
| 810 }; | 843 }; |
| 811 | 844 |
| 812 | 845 |
| 813 class EndNode: public RegExpNode { | 846 class EndNode: public RegExpNode { |
| 814 public: | 847 public: |
| 815 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; | 848 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; |
| 816 explicit EndNode(Action action, Zone* zone) | 849 EndNode(Action action, Zone* zone) : RegExpNode(zone), action_(action) {} |
| 817 : RegExpNode(zone), action_(action) { } | |
| 818 virtual void Accept(NodeVisitor* visitor); | 850 virtual void Accept(NodeVisitor* visitor); |
| 819 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 851 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
| 820 virtual int EatsAtLeast(int still_to_find, | 852 virtual int EatsAtLeast(int still_to_find, |
| 821 int recursion_depth, | 853 int recursion_depth, |
| 822 bool not_at_start) { return 0; } | 854 bool not_at_start) { return 0; } |
| 823 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 855 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 824 RegExpCompiler* compiler, | 856 RegExpCompiler* compiler, |
| 825 int characters_filled_in, | 857 int characters_filled_in, |
| 826 bool not_at_start) { | 858 bool not_at_start) { |
| 827 // Returning 0 from EatsAtLeast should ensure we never get here. | 859 // Returning 0 from EatsAtLeast should ensure we never get here. |
| (...skipping 670 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1498 code(isolate->heap()->the_hole_value()), | 1530 code(isolate->heap()->the_hole_value()), |
| 1499 num_registers(0) {} | 1531 num_registers(0) {} |
| 1500 CompilationResult(Object* code, int registers) | 1532 CompilationResult(Object* code, int registers) |
| 1501 : error_message(NULL), code(code), num_registers(registers) {} | 1533 : error_message(NULL), code(code), num_registers(registers) {} |
| 1502 const char* error_message; | 1534 const char* error_message; |
| 1503 Object* code; | 1535 Object* code; |
| 1504 int num_registers; | 1536 int num_registers; |
| 1505 }; | 1537 }; |
| 1506 | 1538 |
| 1507 static CompilationResult Compile(Isolate* isolate, Zone* zone, | 1539 static CompilationResult Compile(Isolate* isolate, Zone* zone, |
| 1508 RegExpCompileData* input, bool ignore_case, | 1540 RegExpCompileData* input, |
| 1509 bool global, bool multiline, bool sticky, | 1541 JSRegExp::Flags flags, |
| 1510 Handle<String> pattern, | 1542 Handle<String> pattern, |
| 1511 Handle<String> sample_subject, | 1543 Handle<String> sample_subject, |
| 1512 bool is_one_byte); | 1544 bool is_one_byte); |
| 1513 | 1545 |
| 1514 static bool TooMuchRegExpCode(Handle<String> pattern); | 1546 static bool TooMuchRegExpCode(Handle<String> pattern); |
| 1515 | 1547 |
| 1516 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); | 1548 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); |
| 1517 }; | 1549 }; |
| 1518 | 1550 |
| 1519 | 1551 |
| (...skipping 18 matching lines...) Expand all Loading... | |
| 1538 static const int kStringOffset = 0; | 1570 static const int kStringOffset = 0; |
| 1539 static const int kPatternOffset = 1; | 1571 static const int kPatternOffset = 1; |
| 1540 static const int kArrayOffset = 2; | 1572 static const int kArrayOffset = 2; |
| 1541 static const int kLastMatchOffset = 3; | 1573 static const int kLastMatchOffset = 3; |
| 1542 }; | 1574 }; |
| 1543 | 1575 |
| 1544 } // namespace internal | 1576 } // namespace internal |
| 1545 } // namespace v8 | 1577 } // namespace v8 |
| 1546 | 1578 |
| 1547 #endif // V8_REGEXP_JSREGEXP_H_ | 1579 #endif // V8_REGEXP_JSREGEXP_H_ |
| OLD | NEW |