OLD | NEW |
---|---|
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 #ifndef VM_REGEXP_H_ | 5 #ifndef VM_REGEXP_H_ |
6 #define VM_REGEXP_H_ | 6 #define VM_REGEXP_H_ |
7 | 7 |
8 #include "vm/assembler.h" | 8 #include "vm/assembler.h" |
9 #include "vm/intermediate_language.h" | |
9 #include "vm/object.h" | 10 #include "vm/object.h" |
11 #include "vm/regexp_assembler.h" | |
10 | 12 |
11 namespace dart { | 13 namespace dart { |
12 | 14 |
13 // Forward declarations. | 15 // Forward declarations. |
14 class AlternativeGeneration; | 16 class AlternativeGeneration; |
15 class BoyerMooreLookahead; | 17 class BoyerMooreLookahead; |
16 class NodeVisitor; | 18 class NodeVisitor; |
17 class QuickCheckDetails; | 19 class QuickCheckDetails; |
18 class RegExpAtom; | 20 class RegExpAtom; |
19 class RegExpCharacterClass; | 21 class RegExpCharacterClass; |
(...skipping 212 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
232 // this case anchored branches will always fail and can be ignored when | 234 // this case anchored branches will always fail and can be ignored when |
233 // determining how many characters are consumed on success. | 235 // determining how many characters are consumed on success. |
234 virtual intptr_t EatsAtLeast(intptr_t still_to_find, intptr_t budget, | 236 virtual intptr_t EatsAtLeast(intptr_t still_to_find, intptr_t budget, |
235 bool not_at_start) = 0; | 237 bool not_at_start) = 0; |
236 // Emits some quick code that checks whether the preloaded characters match. | 238 // Emits some quick code that checks whether the preloaded characters match. |
237 // Falls through on certain failure, jumps to the label on possible success. | 239 // Falls through on certain failure, jumps to the label on possible success. |
238 // If the node cannot make a quick check it does nothing and returns false. | 240 // If the node cannot make a quick check it does nothing and returns false. |
239 bool EmitQuickCheck(RegExpCompiler* compiler, | 241 bool EmitQuickCheck(RegExpCompiler* compiler, |
240 Trace* trace, | 242 Trace* trace, |
241 bool preload_has_checked_bounds, | 243 bool preload_has_checked_bounds, |
242 Label* on_possible_success, | 244 BlockLabel* on_possible_success, |
243 QuickCheckDetails* details_return, | 245 QuickCheckDetails* details_return, |
244 bool fall_through_on_failure); | 246 bool fall_through_on_failure); |
245 // For a given number of characters this returns a mask and a value. The | 247 // For a given number of characters this returns a mask and a value. The |
246 // next n characters are anded with the mask and compared with the value. | 248 // next n characters are anded with the mask and compared with the value. |
247 // A comparison failure indicates the node cannot match the next n characters. | 249 // A comparison failure indicates the node cannot match the next n characters. |
248 // A comparison success indicates the node may match. | 250 // A comparison success indicates the node may match. |
249 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 251 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
250 RegExpCompiler* compiler, | 252 RegExpCompiler* compiler, |
251 intptr_t characters_filled_in, | 253 intptr_t characters_filled_in, |
252 bool not_at_start) = 0; | 254 bool not_at_start) = 0; |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
292 } | 294 } |
293 | 295 |
294 // We want to avoid recalculating the lookahead info, so we store it on the | 296 // We want to avoid recalculating the lookahead info, so we store it on the |
295 // node. Only info that is for this node is stored. We can tell that the | 297 // node. Only info that is for this node is stored. We can tell that the |
296 // info is for this node when offset == 0, so the information is calculated | 298 // info is for this node when offset == 0, so the information is calculated |
297 // relative to this node. | 299 // relative to this node. |
298 void SaveBMInfo(BoyerMooreLookahead* bm, bool not_at_start, intptr_t offset) { | 300 void SaveBMInfo(BoyerMooreLookahead* bm, bool not_at_start, intptr_t offset) { |
299 if (offset == 0) set_bm_info(not_at_start, bm); | 301 if (offset == 0) set_bm_info(not_at_start, bm); |
300 } | 302 } |
301 | 303 |
302 Label* label() { return &label_; } | 304 BlockLabel* label() { return &label_; } |
303 // If non-generic code is generated for a node (i.e. the node is not at the | 305 // If non-generic code is generated for a node (i.e. the node is not at the |
304 // start of the trace) then it cannot be reused. This variable sets a limit | 306 // start of the trace) then it cannot be reused. This variable sets a limit |
305 // on how often we allow that to happen before we insist on starting a new | 307 // on how often we allow that to happen before we insist on starting a new |
306 // trace and generating generic code for a node that can be reused by flushing | 308 // trace and generating generic code for a node that can be reused by flushing |
307 // the deferred actions in the current trace and generating a goto. | 309 // the deferred actions in the current trace and generating a goto. |
308 static const intptr_t kMaxCopiesCodeGenerated = 10; | 310 static const intptr_t kMaxCopiesCodeGenerated = 10; |
309 | 311 |
310 NodeInfo* info() { return &info_; } | 312 NodeInfo* info() { return &info_; } |
311 | 313 |
312 BoyerMooreLookahead* bm_info(bool not_at_start) { | 314 BoyerMooreLookahead* bm_info(bool not_at_start) { |
313 return bm_info_[not_at_start ? 1 : 0]; | 315 return bm_info_[not_at_start ? 1 : 0]; |
314 } | 316 } |
315 | 317 |
316 Isolate* isolate() const { return isolate_; } | 318 Isolate* isolate() const { return isolate_; } |
317 | 319 |
318 protected: | 320 protected: |
319 enum LimitResult { DONE, CONTINUE }; | 321 enum LimitResult { DONE, CONTINUE }; |
320 RegExpNode* replacement_; | 322 RegExpNode* replacement_; |
321 | 323 |
324 LimitResult LimitVersions(RegExpCompiler* compiler, Trace* trace); | |
325 | |
322 void set_bm_info(bool not_at_start, BoyerMooreLookahead* bm) { | 326 void set_bm_info(bool not_at_start, BoyerMooreLookahead* bm) { |
323 bm_info_[not_at_start ? 1 : 0] = bm; | 327 bm_info_[not_at_start ? 1 : 0] = bm; |
324 } | 328 } |
325 | 329 |
326 private: | 330 private: |
327 static const intptr_t kFirstCharBudget = 10; | 331 static const intptr_t kFirstCharBudget = 10; |
328 Label label_; | 332 BlockLabel label_; |
329 NodeInfo info_; | 333 NodeInfo info_; |
330 // This variable keeps track of how many times code has been generated for | 334 // This variable keeps track of how many times code has been generated for |
331 // this node (in different traces). We don't keep track of where the | 335 // this node (in different traces). We don't keep track of where the |
332 // generated code is located unless the code is generated at the start of | 336 // generated code is located unless the code is generated at the start of |
333 // a trace, in which case it is generic and can be reused by flushing the | 337 // a trace, in which case it is generic and can be reused by flushing the |
334 // deferred operations in the current trace and generating a goto. | 338 // deferred operations in the current trace and generating a goto. |
335 intptr_t trace_count_; | 339 intptr_t trace_count_; |
336 BoyerMooreLookahead* bm_info_[2]; | 340 BoyerMooreLookahead* bm_info_[2]; |
337 Isolate* isolate_; | 341 Isolate* isolate_; |
338 }; | 342 }; |
(...skipping 189 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
528 RegExpNode* node_; | 532 RegExpNode* node_; |
529 ZoneGrowableArray<Guard*>* guards_; | 533 ZoneGrowableArray<Guard*>* guards_; |
530 }; | 534 }; |
531 | 535 |
532 | 536 |
533 // A set of unsigned integers that behaves especially well on small | 537 // A set of unsigned integers that behaves especially well on small |
534 // integers (< 32). May do zone-allocation. | 538 // integers (< 32). May do zone-allocation. |
535 class OutSet: public ZoneAllocated { | 539 class OutSet: public ZoneAllocated { |
536 public: | 540 public: |
537 OutSet() : first_(0), remaining_(NULL), successors_(NULL) { } | 541 OutSet() : first_(0), remaining_(NULL), successors_(NULL) { } |
538 OutSet* Extend(unsigned value, Zone* zone); | 542 OutSet* Extend(unsigned value, Isolate* isolate); |
539 bool Get(unsigned value) const; | 543 bool Get(unsigned value) const; |
540 static const unsigned kFirstLimit = 32; | 544 static const unsigned kFirstLimit = 32; |
541 | 545 |
542 private: | 546 private: |
543 // Destructively set a value in this set. In most cases you want | 547 // Destructively set a value in this set. In most cases you want |
544 // to use Extend instead to ensure that only one instance exists | 548 // to use Extend instead to ensure that only one instance exists |
545 // that contains the same values. | 549 // that contains the same values. |
546 void Set(unsigned value, Zone* zone); | 550 void Set(unsigned value, Isolate* isolate); |
547 | 551 |
548 // The successors are a list of sets that contain the same values | 552 // The successors are a list of sets that contain the same values |
549 // as this set and the one more value that is not present in this | 553 // as this set and the one more value that is not present in this |
550 // set. | 554 // set. |
551 ZoneGrowableArray<OutSet*>* successors() { return successors_; } | 555 ZoneGrowableArray<OutSet*>* successors() { return successors_; } |
552 | 556 |
553 OutSet(uint32_t first, ZoneGrowableArray<unsigned>* remaining) | 557 OutSet(uint32_t first, ZoneGrowableArray<unsigned>* remaining) |
554 : first_(first), remaining_(remaining), successors_(NULL) { } | 558 : first_(first), remaining_(remaining), successors_(NULL) { } |
555 uint32_t first_; | 559 uint32_t first_; |
556 ZoneGrowableArray<unsigned>* remaining_; | 560 ZoneGrowableArray<unsigned>* remaining_; |
557 ZoneGrowableArray<OutSet*>* successors_; | 561 ZoneGrowableArray<OutSet*>* successors_; |
558 friend class Trace; | 562 friend class Trace; |
559 }; | 563 }; |
560 | 564 |
561 | 565 |
562 // A mapping from integers, specified as ranges, to a set of integers. | 566 // A mapping from integers, specified as ranges, to a set of integers. |
563 // Used for mapping character ranges to choices. | 567 // Used for mapping character ranges to choices. |
564 class DispatchTable : public ZoneAllocated { | 568 class DispatchTable : public ZoneAllocated { |
565 public: | 569 public: |
566 DispatchTable() { } | 570 DispatchTable() { } |
567 | 571 |
568 class Entry { | 572 class Entry { |
569 public: | 573 public: |
570 Entry() : from_(0), to_(0), out_set_(NULL) { } | 574 Entry() : from_(0), to_(0), out_set_(NULL) { } |
571 Entry(uint16_t from, uint16_t to, OutSet* out_set) | 575 Entry(uint16_t from, uint16_t to, OutSet* out_set) |
572 : from_(from), to_(to), out_set_(out_set) { } | 576 : from_(from), to_(to), out_set_(out_set) { } |
573 uint16_t from() { return from_; } | 577 uint16_t from() { return from_; } |
574 uint16_t to() { return to_; } | 578 uint16_t to() { return to_; } |
575 void set_to(uint16_t value) { to_ = value; } | 579 void set_to(uint16_t value) { to_ = value; } |
576 void AddValue(intptr_t value, Zone* zone) { | 580 void AddValue(intptr_t value, Isolate* isolate) { |
577 out_set_ = out_set_->Extend(value, zone); | 581 out_set_ = out_set_->Extend(value, isolate); |
578 } | 582 } |
579 OutSet* out_set() { return out_set_; } | 583 OutSet* out_set() { return out_set_; } |
580 private: | 584 private: |
581 uint16_t from_; | 585 uint16_t from_; |
582 uint16_t to_; | 586 uint16_t to_; |
583 OutSet* out_set_; | 587 OutSet* out_set_; |
584 }; | 588 }; |
585 | 589 |
586 class Config { | 590 class Config { |
587 public: | 591 public: |
(...skipping 391 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
979 Position positions_[4]; | 983 Position positions_[4]; |
980 // These values are the condensate of the above array after Rationalize(). | 984 // These values are the condensate of the above array after Rationalize(). |
981 uint32_t mask_; | 985 uint32_t mask_; |
982 uint32_t value_; | 986 uint32_t value_; |
983 // If set to true, there is no way this quick check can match at all. | 987 // If set to true, there is no way this quick check can match at all. |
984 // E.g., if it requires to be at the start of the input, and isn't. | 988 // E.g., if it requires to be at the start of the input, and isn't. |
985 bool cannot_match_; | 989 bool cannot_match_; |
986 }; | 990 }; |
987 | 991 |
988 | 992 |
993 // Improve the speed that we scan for an initial point where a non-anchored | |
994 // regexp can match by using a Boyer-Moore-like table. This is done by | |
995 // identifying non-greedy non-capturing loops in the nodes that eat any | |
996 // character one at a time. For example in the middle of the regexp | |
997 // /foo[\s\S]*?bar/ we find such a loop. There is also such a loop implicitly | |
998 // inserted at the start of any non-anchored regexp. | |
999 // | |
1000 // When we have found such a loop we look ahead in the nodes to find the set of | |
1001 // characters that can come at given distances. For example for the regexp | |
1002 // /.?foo/ we know that there are at least 3 characters ahead of us, and the | |
1003 // sets of characters that can occur are [any, [f, o], [o]]. We find a range in | |
1004 // the lookahead info where the set of characters is reasonably constrained. In | |
1005 // our example this is from index 1 to 2 (0 is not constrained). We can now | |
1006 // look 3 characters ahead and if we don't find one of [f, o] (the union of | |
1007 // [f, o] and [o]) then we can skip forwards by the range size (in this case 2). | |
1008 // | |
1009 // For Unicode input strings we do the same, but modulo 128. | |
1010 // | |
1011 // We also look at the first string fed to the regexp and use that to get a hint | |
1012 // of the character frequencies in the inputs. This affects the assessment of | |
1013 // whether the set of characters is 'reasonably constrained'. | |
1014 // | |
1015 // We also have another lookahead mechanism (called quick check in the code), | |
1016 // which uses a wide load of multiple characters followed by a mask and compare | |
1017 // to determine whether a match is possible at this point. | |
1018 enum ContainedInLattice { | |
1019 kNotYet = 0, | |
1020 kLatticeIn = 1, | |
1021 kLatticeOut = 2, | |
1022 kLatticeUnknown = 3 // Can also mean both in and out. | |
1023 }; | |
1024 | |
1025 | |
1026 inline ContainedInLattice Combine(ContainedInLattice a, ContainedInLattice b) { | |
1027 return static_cast<ContainedInLattice>(a | b); | |
1028 } | |
1029 | |
1030 | |
1031 ContainedInLattice AddRange(ContainedInLattice a, | |
1032 const int* ranges, | |
1033 intptr_t ranges_size, | |
1034 Interval new_range); | |
1035 | |
1036 | |
1037 class BoyerMoorePositionInfo : public ZoneAllocated { | |
1038 public: | |
1039 explicit BoyerMoorePositionInfo(Isolate* isolate) | |
1040 : map_(new(isolate) ZoneGrowableArray<bool>(kMapSize)), | |
1041 map_count_(0), | |
1042 w_(kNotYet), | |
1043 s_(kNotYet), | |
1044 d_(kNotYet), | |
1045 surrogate_(kNotYet) { | |
1046 for (intptr_t i = 0; i < kMapSize; i++) { | |
1047 map_->Add(false); | |
1048 } | |
1049 } | |
1050 | |
1051 bool& at(intptr_t i) { return map_->At(i); } | |
1052 | |
1053 static const intptr_t kMapSize = 128; | |
1054 static const intptr_t kMask = kMapSize - 1; | |
1055 | |
1056 intptr_t map_count() const { return map_count_; } | |
1057 | |
1058 void Set(intptr_t character); | |
1059 void SetInterval(const Interval& interval); | |
1060 void SetAll(); | |
1061 bool is_non_word() { return w_ == kLatticeOut; } | |
1062 bool is_word() { return w_ == kLatticeIn; } | |
1063 | |
1064 private: | |
1065 ZoneGrowableArray<bool>* map_; | |
1066 intptr_t map_count_; // Number of set bits in the map. | |
1067 ContainedInLattice w_; // The \w character class. | |
1068 ContainedInLattice s_; // The \s character class. | |
1069 ContainedInLattice d_; // The \d character class. | |
1070 ContainedInLattice surrogate_; // Surrogate UTF-16 code units. | |
1071 }; | |
1072 | |
1073 | |
1074 class BoyerMooreLookahead : public ZoneAllocated{ | |
1075 public: | |
1076 BoyerMooreLookahead(intptr_t length, RegExpCompiler* compiler, | |
1077 Isolate* Isolate); | |
1078 | |
1079 intptr_t length() { return length_; } | |
1080 intptr_t max_char() { return max_char_; } | |
1081 RegExpCompiler* compiler() { return compiler_; } | |
1082 | |
1083 intptr_t Count(intptr_t map_number) { | |
1084 return bitmaps_->At(map_number)->map_count(); | |
1085 } | |
1086 | |
1087 BoyerMoorePositionInfo* at(intptr_t i) { return bitmaps_->At(i); } | |
1088 | |
1089 void Set(intptr_t map_number, intptr_t character) { | |
1090 if (character > max_char_) return; | |
1091 BoyerMoorePositionInfo* info = bitmaps_->At(map_number); | |
1092 info->Set(character); | |
1093 } | |
1094 | |
1095 void SetInterval(intptr_t map_number, const Interval& interval) { | |
1096 if (interval.from() > max_char_) return; | |
1097 BoyerMoorePositionInfo* info = bitmaps_->At(map_number); | |
1098 if (interval.to() > max_char_) { | |
1099 info->SetInterval(Interval(interval.from(), max_char_)); | |
1100 } else { | |
1101 info->SetInterval(interval); | |
1102 } | |
1103 } | |
1104 | |
1105 void SetAll(intptr_t map_number) { | |
1106 bitmaps_->At(map_number)->SetAll(); | |
1107 } | |
1108 | |
1109 void SetRest(intptr_t from_map) { | |
1110 for (intptr_t i = from_map; i < length_; i++) SetAll(i); | |
1111 } | |
1112 bool EmitSkipInstructions(RegExpMacroAssembler* masm); | |
1113 | |
1114 private: | |
1115 // This is the value obtained by EatsAtLeast. If we do not have at least this | |
1116 // many characters left in the sample string then the match is bound to fail. | |
1117 // Therefore it is OK to read a character this far ahead of the current match | |
1118 // point. | |
1119 intptr_t length_; | |
1120 RegExpCompiler* compiler_; | |
1121 // 0x7f for ASCII, 0xffff for UTF-16. | |
1122 intptr_t max_char_; | |
1123 ZoneGrowableArray<BoyerMoorePositionInfo*>* bitmaps_; | |
1124 | |
1125 intptr_t GetSkipTable(intptr_t min_lookahead, | |
1126 intptr_t max_lookahead, | |
ricow1
2014/09/05 05:49:58
indentation
jgruber1
2014/09/05 06:17:25
Done.
| |
1127 TypedData* boolean_skip_table); | |
1128 bool FindWorthwhileInterval(int* from, int* to); | |
1129 intptr_t FindBestInterval( | |
1130 intptr_t max_number_of_chars, | |
1131 intptr_t old_biggest_points, | |
1132 int* from, int* to); | |
1133 }; | |
1134 | |
1135 | |
989 // There are many ways to generate code for a node. This class encapsulates | 1136 // There are many ways to generate code for a node. This class encapsulates |
990 // the current way we should be generating. In other words it encapsulates | 1137 // the current way we should be generating. In other words it encapsulates |
991 // the current state of the code generator. The effect of this is that we | 1138 // the current state of the code generator. The effect of this is that we |
992 // generate code for paths that the matcher can take through the regular | 1139 // generate code for paths that the matcher can take through the regular |
993 // expression. A given node in the regexp can be code-generated several times | 1140 // expression. A given node in the regexp can be code-generated several times |
994 // as it can be part of several traces. For example for the regexp: | 1141 // as it can be part of several traces. For example for the regexp: |
995 // /foo(bar|ip)baz/ the code to match baz will be generated twice, once as part | 1142 // /foo(bar|ip)baz/ the code to match baz will be generated twice, once as part |
996 // of the foo-bar-baz trace and once as part of the foo-ip-baz trace. The code | 1143 // of the foo-bar-baz trace and once as part of the foo-ip-baz trace. The code |
997 // to match foo is generated only once (the traces have a common prefix). The | 1144 // to match foo is generated only once (the traces have a common prefix). The |
998 // code to store the capture is deferred and generated (twice) after the places | 1145 // code to store the capture is deferred and generated (twice) after the places |
(...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1094 cp_offset_ == 0 && | 1241 cp_offset_ == 0 && |
1095 characters_preloaded_ == 0 && | 1242 characters_preloaded_ == 0 && |
1096 bound_checked_up_to_ == 0 && | 1243 bound_checked_up_to_ == 0 && |
1097 quick_check_performed_.characters() == 0 && | 1244 quick_check_performed_.characters() == 0 && |
1098 at_start_ == UNKNOWN; | 1245 at_start_ == UNKNOWN; |
1099 } | 1246 } |
1100 TriBool at_start() { return at_start_; } | 1247 TriBool at_start() { return at_start_; } |
1101 void set_at_start(bool at_start) { | 1248 void set_at_start(bool at_start) { |
1102 at_start_ = at_start ? TRUE_VALUE : FALSE_VALUE; | 1249 at_start_ = at_start ? TRUE_VALUE : FALSE_VALUE; |
1103 } | 1250 } |
1104 Label* backtrack() { return backtrack_; } | 1251 BlockLabel* backtrack() { return backtrack_; } |
1105 Label* loop_label() { return loop_label_; } | 1252 BlockLabel* loop_label() { return loop_label_; } |
1106 RegExpNode* stop_node() { return stop_node_; } | 1253 RegExpNode* stop_node() { return stop_node_; } |
1107 intptr_t characters_preloaded() { return characters_preloaded_; } | 1254 intptr_t characters_preloaded() { return characters_preloaded_; } |
1108 intptr_t bound_checked_up_to() { return bound_checked_up_to_; } | 1255 intptr_t bound_checked_up_to() { return bound_checked_up_to_; } |
1109 intptr_t flush_budget() { return flush_budget_; } | 1256 intptr_t flush_budget() { return flush_budget_; } |
1110 QuickCheckDetails* quick_check_performed() { return &quick_check_performed_; } | 1257 QuickCheckDetails* quick_check_performed() { return &quick_check_performed_; } |
1111 bool mentions_reg(intptr_t reg); | 1258 bool mentions_reg(intptr_t reg); |
1112 // Returns true if a deferred position store exists to the specified | 1259 // Returns true if a deferred position store exists to the specified |
1113 // register and stores the offset in the out-parameter. Otherwise | 1260 // register and stores the offset in the out-parameter. Otherwise |
1114 // returns false. | 1261 // returns false. |
1115 bool GetStoredPosition(intptr_t reg, intptr_t* cp_offset); | 1262 bool GetStoredPosition(intptr_t reg, intptr_t* cp_offset); |
1116 // These set methods and AdvanceCurrentPositionInTrace should be used only on | 1263 // These set methods and AdvanceCurrentPositionInTrace should be used only on |
1117 // new traces - the intention is that traces are immutable after creation. | 1264 // new traces - the intention is that traces are immutable after creation. |
1118 void add_action(DeferredAction* new_action) { | 1265 void add_action(DeferredAction* new_action) { |
1119 ASSERT(new_action->next_ == NULL); | 1266 ASSERT(new_action->next_ == NULL); |
1120 new_action->next_ = actions_; | 1267 new_action->next_ = actions_; |
1121 actions_ = new_action; | 1268 actions_ = new_action; |
1122 } | 1269 } |
1123 void set_backtrack(Label* backtrack) { backtrack_ = backtrack; } | 1270 void set_backtrack(BlockLabel* backtrack) { backtrack_ = backtrack; } |
1124 void set_stop_node(RegExpNode* node) { stop_node_ = node; } | 1271 void set_stop_node(RegExpNode* node) { stop_node_ = node; } |
1125 void set_loop_label(Label* label) { loop_label_ = label; } | 1272 void set_loop_label(BlockLabel* label) { loop_label_ = label; } |
1126 void set_characters_preloaded(intptr_t count) { | 1273 void set_characters_preloaded(intptr_t count) { |
1127 characters_preloaded_ = count; | 1274 characters_preloaded_ = count; |
1128 } | 1275 } |
1129 void set_bound_checked_up_to(intptr_t to) { bound_checked_up_to_ = to; } | 1276 void set_bound_checked_up_to(intptr_t to) { bound_checked_up_to_ = to; } |
1130 void set_flush_budget(intptr_t to) { flush_budget_ = to; } | 1277 void set_flush_budget(intptr_t to) { flush_budget_ = to; } |
1131 void set_quick_check_performed(QuickCheckDetails* d) { | 1278 void set_quick_check_performed(QuickCheckDetails* d) { |
1132 quick_check_performed_ = *d; | 1279 quick_check_performed_ = *d; |
1133 } | 1280 } |
1134 void InvalidateCurrentCharacter(); | 1281 void InvalidateCurrentCharacter(); |
1135 void AdvanceCurrentPositionInTrace(intptr_t by, RegExpCompiler* compiler); | 1282 void AdvanceCurrentPositionInTrace(intptr_t by, RegExpCompiler* compiler); |
1136 | 1283 |
1137 private: | 1284 private: |
1138 intptr_t FindAffectedRegisters(OutSet* affected_registers, Zone* zone); | 1285 intptr_t FindAffectedRegisters(OutSet* affected_registers, Isolate* isolate); |
1139 void PerformDeferredActions(RegExpMacroAssembler* macro, | 1286 void PerformDeferredActions(RegExpMacroAssembler* macro, |
1140 intptr_t max_register, | 1287 intptr_t max_register, |
1141 const OutSet& affected_registers, | 1288 const OutSet& affected_registers, |
1142 OutSet* registers_to_pop, | 1289 OutSet* registers_to_pop, |
1143 OutSet* registers_to_clear, | 1290 OutSet* registers_to_clear, |
1144 Zone* zone); | 1291 Isolate* isolate); |
1145 void RestoreAffectedRegisters(RegExpMacroAssembler* macro, | 1292 void RestoreAffectedRegisters(RegExpMacroAssembler* macro, |
1146 intptr_t max_register, | 1293 intptr_t max_register, |
1147 const OutSet& registers_to_pop, | 1294 const OutSet& registers_to_pop, |
1148 const OutSet& registers_to_clear); | 1295 const OutSet& registers_to_clear); |
1149 intptr_t cp_offset_; | 1296 intptr_t cp_offset_; |
1150 DeferredAction* actions_; | 1297 DeferredAction* actions_; |
1151 Label* backtrack_; | 1298 BlockLabel* backtrack_; |
1152 RegExpNode* stop_node_; | 1299 RegExpNode* stop_node_; |
1153 Label* loop_label_; | 1300 BlockLabel* loop_label_; |
1154 intptr_t characters_preloaded_; | 1301 intptr_t characters_preloaded_; |
1155 intptr_t bound_checked_up_to_; | 1302 intptr_t bound_checked_up_to_; |
1156 QuickCheckDetails quick_check_performed_; | 1303 QuickCheckDetails quick_check_performed_; |
1157 intptr_t flush_budget_; | 1304 intptr_t flush_budget_; |
1158 TriBool at_start_; | 1305 TriBool at_start_; |
1159 }; | 1306 }; |
1160 | 1307 |
1161 | 1308 |
1162 class NodeVisitor : public ValueObject { | 1309 class NodeVisitor : public ValueObject { |
1163 public: | 1310 public: |
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1228 bool contains_anchor; | 1375 bool contains_anchor; |
1229 String& error; | 1376 String& error; |
1230 intptr_t capture_count; | 1377 intptr_t capture_count; |
1231 }; | 1378 }; |
1232 | 1379 |
1233 | 1380 |
1234 class RegExpEngine: public AllStatic { | 1381 class RegExpEngine: public AllStatic { |
1235 public: | 1382 public: |
1236 struct CompilationResult { | 1383 struct CompilationResult { |
1237 explicit CompilationResult(const char* error_message) | 1384 explicit CompilationResult(const char* error_message) |
1238 : error_message(error_message), | 1385 : function(Function::Handle()), |
1239 num_registers(0) {} | 1386 error_message(error_message) {} |
1240 CompilationResult(Object* code, intptr_t registers) | 1387 explicit CompilationResult(const Function& function) |
1241 : error_message(NULL), | 1388 : function(function), |
1242 num_registers(registers) {} | 1389 error_message(NULL) {} |
1390 | |
1391 const Function& function; | |
1243 const char* error_message; | 1392 const char* error_message; |
1244 intptr_t num_registers; | |
1245 }; | 1393 }; |
1246 | 1394 |
1247 static CompilationResult Compile(RegExpCompileData* input, | 1395 static CompilationResult Compile(RegExpCompileData* input, |
1248 bool ignore_case, | 1396 bool ignore_case, |
1249 bool global, | 1397 bool global, |
1250 bool multiline, | 1398 bool multiline, |
1251 const String& pattern, | 1399 const String& pattern, |
1252 const String& sample_subject, | 1400 const String& sample_subject, |
1253 bool is_ascii); | 1401 bool is_ascii); |
1254 | 1402 |
1403 static RawJSRegExp* New(const String& pattern, | |
1404 bool multi_line, | |
1405 bool ignore_case); | |
1406 | |
1255 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); | 1407 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); |
1256 }; | 1408 }; |
1257 | 1409 |
1258 } // namespace dart | 1410 } // namespace dart |
1259 | 1411 |
1260 #endif // VM_REGEXP_H_ | 1412 #endif // VM_REGEXP_H_ |
OLD | NEW |