OLD | NEW |
---|---|
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 #ifndef VM_REGEXP_H_ | 5 #ifndef VM_REGEXP_H_ |
6 #define VM_REGEXP_H_ | 6 #define VM_REGEXP_H_ |
7 | 7 |
8 #include "vm/assembler.h" | 8 #include "vm/assembler.h" |
9 #include "vm/intermediate_language.h" | |
9 #include "vm/object.h" | 10 #include "vm/object.h" |
11 #include "vm/regexp_assembler.h" | |
10 | 12 |
11 namespace dart { | 13 namespace dart { |
12 | 14 |
13 // Forward declarations. | 15 // Forward declarations. |
14 class AlternativeGeneration; | 16 class AlternativeGeneration; |
15 class BoyerMooreLookahead; | 17 class BoyerMooreLookahead; |
16 class NodeVisitor; | 18 class NodeVisitor; |
17 class QuickCheckDetails; | 19 class QuickCheckDetails; |
18 class RegExpAtom; | 20 class RegExpAtom; |
19 class RegExpCharacterClass; | 21 class RegExpCharacterClass; |
(...skipping 212 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
232 // this case anchored branches will always fail and can be ignored when | 234 // this case anchored branches will always fail and can be ignored when |
233 // determining how many characters are consumed on success. | 235 // determining how many characters are consumed on success. |
234 virtual intptr_t EatsAtLeast(intptr_t still_to_find, intptr_t budget, | 236 virtual intptr_t EatsAtLeast(intptr_t still_to_find, intptr_t budget, |
235 bool not_at_start) = 0; | 237 bool not_at_start) = 0; |
236 // Emits some quick code that checks whether the preloaded characters match. | 238 // Emits some quick code that checks whether the preloaded characters match. |
237 // Falls through on certain failure, jumps to the label on possible success. | 239 // Falls through on certain failure, jumps to the label on possible success. |
238 // If the node cannot make a quick check it does nothing and returns false. | 240 // If the node cannot make a quick check it does nothing and returns false. |
239 bool EmitQuickCheck(RegExpCompiler* compiler, | 241 bool EmitQuickCheck(RegExpCompiler* compiler, |
240 Trace* trace, | 242 Trace* trace, |
241 bool preload_has_checked_bounds, | 243 bool preload_has_checked_bounds, |
242 Label* on_possible_success, | 244 BlockLabel* on_possible_success, |
243 QuickCheckDetails* details_return, | 245 QuickCheckDetails* details_return, |
244 bool fall_through_on_failure); | 246 bool fall_through_on_failure); |
245 // For a given number of characters this returns a mask and a value. The | 247 // For a given number of characters this returns a mask and a value. The |
246 // next n characters are anded with the mask and compared with the value. | 248 // next n characters are anded with the mask and compared with the value. |
247 // A comparison failure indicates the node cannot match the next n characters. | 249 // A comparison failure indicates the node cannot match the next n characters. |
248 // A comparison success indicates the node may match. | 250 // A comparison success indicates the node may match. |
249 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 251 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
250 RegExpCompiler* compiler, | 252 RegExpCompiler* compiler, |
251 intptr_t characters_filled_in, | 253 intptr_t characters_filled_in, |
252 bool not_at_start) = 0; | 254 bool not_at_start) = 0; |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
292 } | 294 } |
293 | 295 |
294 // We want to avoid recalculating the lookahead info, so we store it on the | 296 // We want to avoid recalculating the lookahead info, so we store it on the |
295 // node. Only info that is for this node is stored. We can tell that the | 297 // node. Only info that is for this node is stored. We can tell that the |
296 // info is for this node when offset == 0, so the information is calculated | 298 // info is for this node when offset == 0, so the information is calculated |
297 // relative to this node. | 299 // relative to this node. |
298 void SaveBMInfo(BoyerMooreLookahead* bm, bool not_at_start, intptr_t offset) { | 300 void SaveBMInfo(BoyerMooreLookahead* bm, bool not_at_start, intptr_t offset) { |
299 if (offset == 0) set_bm_info(not_at_start, bm); | 301 if (offset == 0) set_bm_info(not_at_start, bm); |
300 } | 302 } |
301 | 303 |
302 Label* label() { return &label_; } | 304 BlockLabel* label() { return &label_; } |
303 // If non-generic code is generated for a node (i.e. the node is not at the | 305 // If non-generic code is generated for a node (i.e. the node is not at the |
304 // start of the trace) then it cannot be reused. This variable sets a limit | 306 // start of the trace) then it cannot be reused. This variable sets a limit |
305 // on how often we allow that to happen before we insist on starting a new | 307 // on how often we allow that to happen before we insist on starting a new |
306 // trace and generating generic code for a node that can be reused by flushing | 308 // trace and generating generic code for a node that can be reused by flushing |
307 // the deferred actions in the current trace and generating a goto. | 309 // the deferred actions in the current trace and generating a goto. |
308 static const intptr_t kMaxCopiesCodeGenerated = 10; | 310 static const intptr_t kMaxCopiesCodeGenerated = 10; |
309 | 311 |
310 NodeInfo* info() { return &info_; } | 312 NodeInfo* info() { return &info_; } |
311 | 313 |
312 BoyerMooreLookahead* bm_info(bool not_at_start) { | 314 BoyerMooreLookahead* bm_info(bool not_at_start) { |
313 return bm_info_[not_at_start ? 1 : 0]; | 315 return bm_info_[not_at_start ? 1 : 0]; |
314 } | 316 } |
315 | 317 |
316 Isolate* isolate() const { return isolate_; } | 318 Isolate* isolate() const { return isolate_; } |
317 | 319 |
318 protected: | 320 protected: |
319 enum LimitResult { DONE, CONTINUE }; | 321 enum LimitResult { DONE, CONTINUE }; |
320 RegExpNode* replacement_; | 322 RegExpNode* replacement_; |
321 | 323 |
324 LimitResult LimitVersions(RegExpCompiler* compiler, Trace* trace); | |
325 | |
322 void set_bm_info(bool not_at_start, BoyerMooreLookahead* bm) { | 326 void set_bm_info(bool not_at_start, BoyerMooreLookahead* bm) { |
323 bm_info_[not_at_start ? 1 : 0] = bm; | 327 bm_info_[not_at_start ? 1 : 0] = bm; |
324 } | 328 } |
325 | 329 |
326 private: | 330 private: |
327 static const intptr_t kFirstCharBudget = 10; | 331 static const intptr_t kFirstCharBudget = 10; |
328 Label label_; | 332 BlockLabel label_; |
329 NodeInfo info_; | 333 NodeInfo info_; |
330 // This variable keeps track of how many times code has been generated for | 334 // This variable keeps track of how many times code has been generated for |
331 // this node (in different traces). We don't keep track of where the | 335 // this node (in different traces). We don't keep track of where the |
332 // generated code is located unless the code is generated at the start of | 336 // generated code is located unless the code is generated at the start of |
333 // a trace, in which case it is generic and can be reused by flushing the | 337 // a trace, in which case it is generic and can be reused by flushing the |
334 // deferred operations in the current trace and generating a goto. | 338 // deferred operations in the current trace and generating a goto. |
335 intptr_t trace_count_; | 339 intptr_t trace_count_; |
336 BoyerMooreLookahead* bm_info_[2]; | 340 BoyerMooreLookahead* bm_info_[2]; |
337 Isolate* isolate_; | 341 Isolate* isolate_; |
338 }; | 342 }; |
(...skipping 189 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
528 RegExpNode* node_; | 532 RegExpNode* node_; |
529 ZoneGrowableArray<Guard*>* guards_; | 533 ZoneGrowableArray<Guard*>* guards_; |
530 }; | 534 }; |
531 | 535 |
532 | 536 |
533 // A set of unsigned integers that behaves especially well on small | 537 // A set of unsigned integers that behaves especially well on small |
534 // integers (< 32). May do zone-allocation. | 538 // integers (< 32). May do zone-allocation. |
535 class OutSet: public ZoneAllocated { | 539 class OutSet: public ZoneAllocated { |
536 public: | 540 public: |
537 OutSet() : first_(0), remaining_(NULL), successors_(NULL) { } | 541 OutSet() : first_(0), remaining_(NULL), successors_(NULL) { } |
538 OutSet* Extend(unsigned value, Zone* zone); | 542 OutSet* Extend(unsigned value, Isolate* isolate); |
539 bool Get(unsigned value) const; | 543 bool Get(unsigned value) const; |
540 static const unsigned kFirstLimit = 32; | 544 static const unsigned kFirstLimit = 32; |
541 | 545 |
542 private: | 546 private: |
543 // Destructively set a value in this set. In most cases you want | 547 // Destructively set a value in this set. In most cases you want |
544 // to use Extend instead to ensure that only one instance exists | 548 // to use Extend instead to ensure that only one instance exists |
545 // that contains the same values. | 549 // that contains the same values. |
546 void Set(unsigned value, Zone* zone); | 550 void Set(unsigned value, Isolate* isolate); |
547 | 551 |
548 // The successors are a list of sets that contain the same values | 552 // The successors are a list of sets that contain the same values |
549 // as this set and the one more value that is not present in this | 553 // as this set and the one more value that is not present in this |
550 // set. | 554 // set. |
551 ZoneGrowableArray<OutSet*>* successors() { return successors_; } | 555 ZoneGrowableArray<OutSet*>* successors() { return successors_; } |
552 | 556 |
553 OutSet(uint32_t first, ZoneGrowableArray<unsigned>* remaining) | 557 OutSet(uint32_t first, ZoneGrowableArray<unsigned>* remaining) |
554 : first_(first), remaining_(remaining), successors_(NULL) { } | 558 : first_(first), remaining_(remaining), successors_(NULL) { } |
555 uint32_t first_; | 559 uint32_t first_; |
556 ZoneGrowableArray<unsigned>* remaining_; | 560 ZoneGrowableArray<unsigned>* remaining_; |
557 ZoneGrowableArray<OutSet*>* successors_; | 561 ZoneGrowableArray<OutSet*>* successors_; |
558 friend class Trace; | 562 friend class Trace; |
559 }; | 563 }; |
560 | 564 |
561 | 565 |
562 // A mapping from integers, specified as ranges, to a set of integers. | 566 // A mapping from integers, specified as ranges, to a set of integers. |
563 // Used for mapping character ranges to choices. | 567 // Used for mapping character ranges to choices. |
564 class DispatchTable : public ZoneAllocated { | 568 class DispatchTable : public ZoneAllocated { |
565 public: | 569 public: |
566 DispatchTable() { } | 570 DispatchTable() { } |
567 | 571 |
568 class Entry { | 572 class Entry { |
569 public: | 573 public: |
570 Entry() : from_(0), to_(0), out_set_(NULL) { } | 574 Entry() : from_(0), to_(0), out_set_(NULL) { } |
571 Entry(uint16_t from, uint16_t to, OutSet* out_set) | 575 Entry(uint16_t from, uint16_t to, OutSet* out_set) |
572 : from_(from), to_(to), out_set_(out_set) { } | 576 : from_(from), to_(to), out_set_(out_set) { } |
573 uint16_t from() { return from_; } | 577 uint16_t from() { return from_; } |
574 uint16_t to() { return to_; } | 578 uint16_t to() { return to_; } |
575 void set_to(uint16_t value) { to_ = value; } | 579 void set_to(uint16_t value) { to_ = value; } |
576 void AddValue(intptr_t value, Zone* zone) { | 580 void AddValue(intptr_t value, Isolate* isolate) { |
577 out_set_ = out_set_->Extend(value, zone); | 581 out_set_ = out_set_->Extend(value, isolate); |
578 } | 582 } |
579 OutSet* out_set() { return out_set_; } | 583 OutSet* out_set() { return out_set_; } |
580 private: | 584 private: |
581 uint16_t from_; | 585 uint16_t from_; |
582 uint16_t to_; | 586 uint16_t to_; |
583 OutSet* out_set_; | 587 OutSet* out_set_; |
584 }; | 588 }; |
585 | 589 |
586 class Config { | 590 class Config { |
587 public: | 591 public: |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
625 table_(NULL), | 629 table_(NULL), |
626 not_at_start_(false), | 630 not_at_start_(false), |
627 being_calculated_(false) { } | 631 being_calculated_(false) { } |
628 virtual void Accept(NodeVisitor* visitor); | 632 virtual void Accept(NodeVisitor* visitor); |
629 void AddAlternative(GuardedAlternative node) { | 633 void AddAlternative(GuardedAlternative node) { |
630 alternatives()->Add(node); | 634 alternatives()->Add(node); |
631 } | 635 } |
632 ZoneGrowableArray<GuardedAlternative>* alternatives() { | 636 ZoneGrowableArray<GuardedAlternative>* alternatives() { |
633 return alternatives_; | 637 return alternatives_; |
634 } | 638 } |
635 DispatchTable* GetTable(bool ignore_case); | |
636 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 639 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
637 virtual intptr_t EatsAtLeast(intptr_t still_to_find, intptr_t budget, | 640 virtual intptr_t EatsAtLeast(intptr_t still_to_find, intptr_t budget, |
638 bool not_at_start); | 641 bool not_at_start); |
639 intptr_t EatsAtLeastHelper(intptr_t still_to_find, | 642 intptr_t EatsAtLeastHelper(intptr_t still_to_find, |
640 intptr_t budget, | 643 intptr_t budget, |
641 RegExpNode* ignore_this_node, | 644 RegExpNode* ignore_this_node, |
642 bool not_at_start); | 645 bool not_at_start); |
643 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 646 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
644 RegExpCompiler* compiler, | 647 RegExpCompiler* compiler, |
645 intptr_t characters_filled_in, | 648 intptr_t characters_filled_in, |
(...skipping 333 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
979 Position positions_[4]; | 982 Position positions_[4]; |
980 // These values are the condensate of the above array after Rationalize(). | 983 // These values are the condensate of the above array after Rationalize(). |
981 uint32_t mask_; | 984 uint32_t mask_; |
982 uint32_t value_; | 985 uint32_t value_; |
983 // If set to true, there is no way this quick check can match at all. | 986 // If set to true, there is no way this quick check can match at all. |
984 // E.g., if it requires to be at the start of the input, and isn't. | 987 // E.g., if it requires to be at the start of the input, and isn't. |
985 bool cannot_match_; | 988 bool cannot_match_; |
986 }; | 989 }; |
987 | 990 |
988 | 991 |
992 // Improve the speed that we scan for an initial point where a non-anchored | |
993 // regexp can match by using a Boyer-Moore-like table. This is done by | |
994 // identifying non-greedy non-capturing loops in the nodes that eat any | |
995 // character one at a time. For example in the middle of the regexp | |
996 // /foo[\s\S]*?bar/ we find such a loop. There is also such a loop implicitly | |
997 // inserted at the start of any non-anchored regexp. | |
998 // | |
999 // When we have found such a loop we look ahead in the nodes to find the set of | |
1000 // characters that can come at given distances. For example for the regexp | |
1001 // /.?foo/ we know that there are at least 3 characters ahead of us, and the | |
1002 // sets of characters that can occur are [any, [f, o], [o]]. We find a range in | |
1003 // the lookahead info where the set of characters is reasonably constrained. In | |
1004 // our example this is from index 1 to 2 (0 is not constrained). We can now | |
1005 // look 3 characters ahead and if we don't find one of [f, o] (the union of | |
1006 // [f, o] and [o]) then we can skip forwards by the range size (in this case 2). | |
1007 // | |
1008 // For Unicode input strings we do the same, but modulo 128. | |
1009 // | |
1010 // We also look at the first string fed to the regexp and use that to get a hint | |
1011 // of the character frequencies in the inputs. This affects the assessment of | |
1012 // whether the set of characters is 'reasonably constrained'. | |
1013 // | |
1014 // We also have another lookahead mechanism (called quick check in the code), | |
1015 // which uses a wide load of multiple characters followed by a mask and compare | |
1016 // to determine whether a match is possible at this point. | |
1017 enum ContainedInLattice { | |
1018 kNotYet = 0, | |
1019 kLatticeIn = 1, | |
1020 kLatticeOut = 2, | |
1021 kLatticeUnknown = 3 // Can also mean both in and out. | |
1022 }; | |
1023 | |
1024 | |
1025 inline ContainedInLattice Combine(ContainedInLattice a, ContainedInLattice b) { | |
1026 return static_cast<ContainedInLattice>(a | b); | |
1027 } | |
1028 | |
1029 | |
1030 ContainedInLattice AddRange(ContainedInLattice a, | |
1031 const int* ranges, | |
1032 intptr_t ranges_size, | |
1033 Interval new_range); | |
1034 | |
1035 | |
1036 class BoyerMoorePositionInfo : public ZoneAllocated { | |
1037 public: | |
1038 explicit BoyerMoorePositionInfo(Isolate* isolate) | |
1039 : map_(new(isolate) ZoneGrowableArray<bool>(kMapSize)), | |
1040 map_count_(0), | |
1041 w_(kNotYet), | |
1042 s_(kNotYet), | |
1043 d_(kNotYet), | |
1044 surrogate_(kNotYet) { | |
1045 for (intptr_t i = 0; i < kMapSize; i++) { | |
1046 map_->Add(false); | |
1047 } | |
1048 } | |
1049 | |
1050 bool& at(intptr_t i) { return (*map_)[i]; } | |
1051 | |
1052 static const intptr_t kMapSize = 128; | |
1053 static const intptr_t kMask = kMapSize - 1; | |
1054 | |
1055 intptr_t map_count() const { return map_count_; } | |
1056 | |
1057 void Set(intptr_t character); | |
1058 void SetInterval(const Interval& interval); | |
1059 void SetAll(); | |
1060 bool is_non_word() { return w_ == kLatticeOut; } | |
1061 bool is_word() { return w_ == kLatticeIn; } | |
1062 | |
1063 private: | |
1064 ZoneGrowableArray<bool>* map_; | |
1065 intptr_t map_count_; // Number of set bits in the map. | |
1066 ContainedInLattice w_; // The \w character class. | |
1067 ContainedInLattice s_; // The \s character class. | |
1068 ContainedInLattice d_; // The \d character class. | |
1069 ContainedInLattice surrogate_; // Surrogate UTF-16 code units. | |
1070 }; | |
1071 | |
1072 | |
1073 class BoyerMooreLookahead : public ZoneAllocated{ | |
1074 public: | |
1075 BoyerMooreLookahead(intptr_t length, RegExpCompiler* compiler, | |
1076 Isolate* Isolate); | |
1077 | |
1078 intptr_t length() { return length_; } | |
1079 intptr_t max_char() { return max_char_; } | |
1080 RegExpCompiler* compiler() { return compiler_; } | |
1081 | |
1082 intptr_t Count(intptr_t map_number) { | |
1083 return bitmaps_->At(map_number)->map_count(); | |
1084 } | |
1085 | |
1086 BoyerMoorePositionInfo* at(intptr_t i) { return bitmaps_->At(i); } | |
1087 | |
1088 void Set(intptr_t map_number, intptr_t character) { | |
1089 if (character > max_char_) return; | |
1090 BoyerMoorePositionInfo* info = bitmaps_->At(map_number); | |
1091 info->Set(character); | |
1092 } | |
1093 | |
1094 void SetInterval(intptr_t map_number, const Interval& interval) { | |
1095 if (interval.from() > max_char_) return; | |
1096 BoyerMoorePositionInfo* info = bitmaps_->At(map_number); | |
1097 if (interval.to() > max_char_) { | |
1098 info->SetInterval(Interval(interval.from(), max_char_)); | |
1099 } else { | |
1100 info->SetInterval(interval); | |
1101 } | |
1102 } | |
1103 | |
1104 void SetAll(intptr_t map_number) { | |
1105 bitmaps_->At(map_number)->SetAll(); | |
1106 } | |
1107 | |
1108 void SetRest(intptr_t from_map) { | |
1109 for (intptr_t i = from_map; i < length_; i++) SetAll(i); | |
1110 } | |
1111 bool EmitSkipInstructions(RegExpMacroAssembler* masm); | |
1112 | |
1113 private: | |
1114 // This is the value obtained by EatsAtLeast. If we do not have at least this | |
1115 // many characters left in the sample string then the match is bound to fail. | |
1116 // Therefore it is OK to read a character this far ahead of the current match | |
1117 // point. | |
1118 intptr_t length_; | |
1119 RegExpCompiler* compiler_; | |
1120 // 0x7f for ASCII, 0xffff for UTF-16. | |
1121 intptr_t max_char_; | |
1122 ZoneGrowableArray<BoyerMoorePositionInfo*>* bitmaps_; | |
1123 | |
1124 intptr_t GetSkipTable(intptr_t min_lookahead, | |
1125 intptr_t max_lookahead, | |
1126 TypedData* boolean_skip_table); | |
1127 bool FindWorthwhileInterval(intptr_t* from, intptr_t* to); | |
1128 intptr_t FindBestInterval( | |
1129 intptr_t max_number_of_chars, | |
1130 intptr_t old_biggest_points, | |
1131 intptr_t* from, intptr_t* to); | |
1132 }; | |
1133 | |
1134 | |
989 // There are many ways to generate code for a node. This class encapsulates | 1135 // There are many ways to generate code for a node. This class encapsulates |
990 // the current way we should be generating. In other words it encapsulates | 1136 // the current way we should be generating. In other words it encapsulates |
991 // the current state of the code generator. The effect of this is that we | 1137 // the current state of the code generator. The effect of this is that we |
992 // generate code for paths that the matcher can take through the regular | 1138 // generate code for paths that the matcher can take through the regular |
993 // expression. A given node in the regexp can be code-generated several times | 1139 // expression. A given node in the regexp can be code-generated several times |
994 // as it can be part of several traces. For example for the regexp: | 1140 // as it can be part of several traces. For example for the regexp: |
995 // /foo(bar|ip)baz/ the code to match baz will be generated twice, once as part | 1141 // /foo(bar|ip)baz/ the code to match baz will be generated twice, once as part |
996 // of the foo-bar-baz trace and once as part of the foo-ip-baz trace. The code | 1142 // of the foo-bar-baz trace and once as part of the foo-ip-baz trace. The code |
997 // to match foo is generated only once (the traces have a common prefix). The | 1143 // to match foo is generated only once (the traces have a common prefix). The |
998 // code to store the capture is deferred and generated (twice) after the places | 1144 // code to store the capture is deferred and generated (twice) after the places |
(...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1094 cp_offset_ == 0 && | 1240 cp_offset_ == 0 && |
1095 characters_preloaded_ == 0 && | 1241 characters_preloaded_ == 0 && |
1096 bound_checked_up_to_ == 0 && | 1242 bound_checked_up_to_ == 0 && |
1097 quick_check_performed_.characters() == 0 && | 1243 quick_check_performed_.characters() == 0 && |
1098 at_start_ == UNKNOWN; | 1244 at_start_ == UNKNOWN; |
1099 } | 1245 } |
1100 TriBool at_start() { return at_start_; } | 1246 TriBool at_start() { return at_start_; } |
1101 void set_at_start(bool at_start) { | 1247 void set_at_start(bool at_start) { |
1102 at_start_ = at_start ? TRUE_VALUE : FALSE_VALUE; | 1248 at_start_ = at_start ? TRUE_VALUE : FALSE_VALUE; |
1103 } | 1249 } |
1104 Label* backtrack() { return backtrack_; } | 1250 BlockLabel* backtrack() { return backtrack_; } |
1105 Label* loop_label() { return loop_label_; } | 1251 BlockLabel* loop_label() { return loop_label_; } |
1106 RegExpNode* stop_node() { return stop_node_; } | 1252 RegExpNode* stop_node() { return stop_node_; } |
1107 intptr_t characters_preloaded() { return characters_preloaded_; } | 1253 intptr_t characters_preloaded() { return characters_preloaded_; } |
1108 intptr_t bound_checked_up_to() { return bound_checked_up_to_; } | 1254 intptr_t bound_checked_up_to() { return bound_checked_up_to_; } |
1109 intptr_t flush_budget() { return flush_budget_; } | 1255 intptr_t flush_budget() { return flush_budget_; } |
1110 QuickCheckDetails* quick_check_performed() { return &quick_check_performed_; } | 1256 QuickCheckDetails* quick_check_performed() { return &quick_check_performed_; } |
1111 bool mentions_reg(intptr_t reg); | 1257 bool mentions_reg(intptr_t reg); |
1112 // Returns true if a deferred position store exists to the specified | 1258 // Returns true if a deferred position store exists to the specified |
1113 // register and stores the offset in the out-parameter. Otherwise | 1259 // register and stores the offset in the out-parameter. Otherwise |
1114 // returns false. | 1260 // returns false. |
1115 bool GetStoredPosition(intptr_t reg, intptr_t* cp_offset); | 1261 bool GetStoredPosition(intptr_t reg, intptr_t* cp_offset); |
1116 // These set methods and AdvanceCurrentPositionInTrace should be used only on | 1262 // These set methods and AdvanceCurrentPositionInTrace should be used only on |
1117 // new traces - the intention is that traces are immutable after creation. | 1263 // new traces - the intention is that traces are immutable after creation. |
1118 void add_action(DeferredAction* new_action) { | 1264 void add_action(DeferredAction* new_action) { |
1119 ASSERT(new_action->next_ == NULL); | 1265 ASSERT(new_action->next_ == NULL); |
1120 new_action->next_ = actions_; | 1266 new_action->next_ = actions_; |
1121 actions_ = new_action; | 1267 actions_ = new_action; |
1122 } | 1268 } |
1123 void set_backtrack(Label* backtrack) { backtrack_ = backtrack; } | 1269 void set_backtrack(BlockLabel* backtrack) { backtrack_ = backtrack; } |
1124 void set_stop_node(RegExpNode* node) { stop_node_ = node; } | 1270 void set_stop_node(RegExpNode* node) { stop_node_ = node; } |
1125 void set_loop_label(Label* label) { loop_label_ = label; } | 1271 void set_loop_label(BlockLabel* label) { loop_label_ = label; } |
1126 void set_characters_preloaded(intptr_t count) { | 1272 void set_characters_preloaded(intptr_t count) { |
1127 characters_preloaded_ = count; | 1273 characters_preloaded_ = count; |
1128 } | 1274 } |
1129 void set_bound_checked_up_to(intptr_t to) { bound_checked_up_to_ = to; } | 1275 void set_bound_checked_up_to(intptr_t to) { bound_checked_up_to_ = to; } |
1130 void set_flush_budget(intptr_t to) { flush_budget_ = to; } | 1276 void set_flush_budget(intptr_t to) { flush_budget_ = to; } |
1131 void set_quick_check_performed(QuickCheckDetails* d) { | 1277 void set_quick_check_performed(QuickCheckDetails* d) { |
1132 quick_check_performed_ = *d; | 1278 quick_check_performed_ = *d; |
1133 } | 1279 } |
1134 void InvalidateCurrentCharacter(); | 1280 void InvalidateCurrentCharacter(); |
1135 void AdvanceCurrentPositionInTrace(intptr_t by, RegExpCompiler* compiler); | 1281 void AdvanceCurrentPositionInTrace(intptr_t by, RegExpCompiler* compiler); |
1136 | 1282 |
1137 private: | 1283 private: |
1138 intptr_t FindAffectedRegisters(OutSet* affected_registers, Zone* zone); | 1284 intptr_t FindAffectedRegisters(OutSet* affected_registers, Isolate* isolate); |
1139 void PerformDeferredActions(RegExpMacroAssembler* macro, | 1285 void PerformDeferredActions(RegExpMacroAssembler* macro, |
1140 intptr_t max_register, | 1286 intptr_t max_register, |
1141 const OutSet& affected_registers, | 1287 const OutSet& affected_registers, |
1142 OutSet* registers_to_pop, | 1288 OutSet* registers_to_pop, |
1143 OutSet* registers_to_clear, | 1289 OutSet* registers_to_clear, |
1144 Zone* zone); | 1290 Isolate* isolate); |
1145 void RestoreAffectedRegisters(RegExpMacroAssembler* macro, | 1291 void RestoreAffectedRegisters(RegExpMacroAssembler* macro, |
1146 intptr_t max_register, | 1292 intptr_t max_register, |
1147 const OutSet& registers_to_pop, | 1293 const OutSet& registers_to_pop, |
1148 const OutSet& registers_to_clear); | 1294 const OutSet& registers_to_clear); |
1149 intptr_t cp_offset_; | 1295 intptr_t cp_offset_; |
1150 DeferredAction* actions_; | 1296 DeferredAction* actions_; |
1151 Label* backtrack_; | 1297 BlockLabel* backtrack_; |
1152 RegExpNode* stop_node_; | 1298 RegExpNode* stop_node_; |
1153 Label* loop_label_; | 1299 BlockLabel* loop_label_; |
1154 intptr_t characters_preloaded_; | 1300 intptr_t characters_preloaded_; |
1155 intptr_t bound_checked_up_to_; | 1301 intptr_t bound_checked_up_to_; |
1156 QuickCheckDetails quick_check_performed_; | 1302 QuickCheckDetails quick_check_performed_; |
1157 intptr_t flush_budget_; | 1303 intptr_t flush_budget_; |
1158 TriBool at_start_; | 1304 TriBool at_start_; |
1159 }; | 1305 }; |
1160 | 1306 |
1161 | 1307 |
1162 class NodeVisitor : public ValueObject { | 1308 class NodeVisitor : public ValueObject { |
1163 public: | 1309 public: |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1207 | 1353 |
1208 private: | 1354 private: |
1209 bool ignore_case_; | 1355 bool ignore_case_; |
1210 bool is_ascii_; | 1356 bool is_ascii_; |
1211 const char* error_message_; | 1357 const char* error_message_; |
1212 | 1358 |
1213 DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis); | 1359 DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis); |
1214 }; | 1360 }; |
1215 | 1361 |
1216 | 1362 |
1217 struct RegExpCompileData { | 1363 struct RegExpCompileData : public ZoneAllocated { |
1218 RegExpCompileData() | 1364 RegExpCompileData() |
1219 : tree(NULL), | 1365 : tree(NULL), |
1220 node(NULL), | 1366 node(NULL), |
1221 simple(true), | 1367 simple(true), |
1222 contains_anchor(false), | 1368 contains_anchor(false), |
1223 error(String::Handle(String::null())), | 1369 error(String::Handle(String::null())), |
1224 capture_count(0) { } | 1370 capture_count(0) { } |
1225 RegExpTree* tree; | 1371 RegExpTree* tree; |
1226 RegExpNode* node; | 1372 RegExpNode* node; |
1227 bool simple; | 1373 bool simple; |
1228 bool contains_anchor; | 1374 bool contains_anchor; |
1229 String& error; | 1375 String& error; |
1230 intptr_t capture_count; | 1376 intptr_t capture_count; |
1231 }; | 1377 }; |
1232 | 1378 |
1233 | 1379 |
1234 class RegExpEngine: public AllStatic { | 1380 class RegExpEngine: public AllStatic { |
1235 public: | 1381 public: |
1236 struct CompilationResult { | 1382 struct CompilationResult { |
1237 explicit CompilationResult(const char* error_message) | 1383 explicit CompilationResult(const char* error_message) |
1238 : error_message(error_message), | 1384 : macro_assembler(NULL), |
1239 num_registers(0) {} | 1385 graph_entry(NULL), |
1240 CompilationResult(Object* code, intptr_t registers) | 1386 num_blocks(-1), |
1241 : error_message(NULL), | 1387 num_stack_locals(-1), |
1242 num_registers(registers) {} | 1388 error_message(error_message) {} |
1389 explicit CompilationResult(IRRegExpMacroAssembler* macro_assembler, | |
Vyacheslav Egorov (Google)
2014/10/01 20:13:21
No need to mark multiple arguments constructor "ex
jgruber1
2014/10/03 18:59:51
Done.
| |
1390 GraphEntryInstr* graph_entry, | |
1391 intptr_t num_blocks, | |
1392 intptr_t num_stack_locals) | |
1393 : macro_assembler(macro_assembler), | |
1394 graph_entry(graph_entry), | |
1395 num_blocks(num_blocks), | |
1396 num_stack_locals(num_stack_locals), | |
1397 error_message(NULL) {} | |
1398 | |
1399 IRRegExpMacroAssembler* macro_assembler; | |
1400 GraphEntryInstr* graph_entry; | |
1401 const intptr_t num_blocks; | |
1402 const intptr_t num_stack_locals; | |
1403 | |
1243 const char* error_message; | 1404 const char* error_message; |
1244 intptr_t num_registers; | |
1245 }; | 1405 }; |
1246 | 1406 |
1247 static CompilationResult Compile(RegExpCompileData* input, | 1407 static CompilationResult Compile( |
1248 bool ignore_case, | 1408 RegExpCompileData* input, |
1249 bool global, | 1409 const ParsedFunction* parsed_function, |
1250 bool multiline, | 1410 ZoneGrowableArray<const ICData*>* ic_data_array); |
1251 const String& pattern, | 1411 |
1252 const String& sample_subject, | 1412 static RawJSRegExp* New(Isolate* isolate, |
1253 bool is_ascii); | 1413 const String& pattern, |
1414 bool multi_line, | |
1415 bool ignore_case); | |
1254 | 1416 |
1255 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); | 1417 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); |
1256 }; | 1418 }; |
1257 | 1419 |
1258 } // namespace dart | 1420 } // namespace dart |
1259 | 1421 |
1260 #endif // VM_REGEXP_H_ | 1422 #endif // VM_REGEXP_H_ |
OLD | NEW |