Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef VM_REGEXP_H_ | 5 #ifndef VM_REGEXP_H_ |
| 6 #define VM_REGEXP_H_ | 6 #define VM_REGEXP_H_ |
| 7 | 7 |
| 8 #include "vm/assembler.h" | 8 #include "vm/assembler.h" |
| 9 #include "vm/intermediate_language.h" | |
| 9 #include "vm/object.h" | 10 #include "vm/object.h" |
| 11 #include "vm/regexp_assembler.h" | |
| 10 | 12 |
| 11 namespace dart { | 13 namespace dart { |
| 12 | 14 |
| 13 // Forward declarations. | 15 // Forward declarations. |
| 14 class AlternativeGeneration; | 16 class AlternativeGeneration; |
| 15 class BoyerMooreLookahead; | 17 class BoyerMooreLookahead; |
| 16 class NodeVisitor; | 18 class NodeVisitor; |
| 17 class QuickCheckDetails; | 19 class QuickCheckDetails; |
| 18 class RegExpAtom; | 20 class RegExpAtom; |
| 19 class RegExpCharacterClass; | 21 class RegExpCharacterClass; |
| (...skipping 212 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 232 // this case anchored branches will always fail and can be ignored when | 234 // this case anchored branches will always fail and can be ignored when |
| 233 // determining how many characters are consumed on success. | 235 // determining how many characters are consumed on success. |
| 234 virtual intptr_t EatsAtLeast(intptr_t still_to_find, intptr_t budget, | 236 virtual intptr_t EatsAtLeast(intptr_t still_to_find, intptr_t budget, |
| 235 bool not_at_start) = 0; | 237 bool not_at_start) = 0; |
| 236 // Emits some quick code that checks whether the preloaded characters match. | 238 // Emits some quick code that checks whether the preloaded characters match. |
| 237 // Falls through on certain failure, jumps to the label on possible success. | 239 // Falls through on certain failure, jumps to the label on possible success. |
| 238 // If the node cannot make a quick check it does nothing and returns false. | 240 // If the node cannot make a quick check it does nothing and returns false. |
| 239 bool EmitQuickCheck(RegExpCompiler* compiler, | 241 bool EmitQuickCheck(RegExpCompiler* compiler, |
| 240 Trace* trace, | 242 Trace* trace, |
| 241 bool preload_has_checked_bounds, | 243 bool preload_has_checked_bounds, |
| 242 Label* on_possible_success, | 244 BlockLabel* on_possible_success, |
| 243 QuickCheckDetails* details_return, | 245 QuickCheckDetails* details_return, |
| 244 bool fall_through_on_failure); | 246 bool fall_through_on_failure); |
| 245 // For a given number of characters this returns a mask and a value. The | 247 // For a given number of characters this returns a mask and a value. The |
| 246 // next n characters are anded with the mask and compared with the value. | 248 // next n characters are anded with the mask and compared with the value. |
| 247 // A comparison failure indicates the node cannot match the next n characters. | 249 // A comparison failure indicates the node cannot match the next n characters. |
| 248 // A comparison success indicates the node may match. | 250 // A comparison success indicates the node may match. |
| 249 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 251 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 250 RegExpCompiler* compiler, | 252 RegExpCompiler* compiler, |
| 251 intptr_t characters_filled_in, | 253 intptr_t characters_filled_in, |
| 252 bool not_at_start) = 0; | 254 bool not_at_start) = 0; |
| (...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 292 } | 294 } |
| 293 | 295 |
| 294 // We want to avoid recalculating the lookahead info, so we store it on the | 296 // We want to avoid recalculating the lookahead info, so we store it on the |
| 295 // node. Only info that is for this node is stored. We can tell that the | 297 // node. Only info that is for this node is stored. We can tell that the |
| 296 // info is for this node when offset == 0, so the information is calculated | 298 // info is for this node when offset == 0, so the information is calculated |
| 297 // relative to this node. | 299 // relative to this node. |
| 298 void SaveBMInfo(BoyerMooreLookahead* bm, bool not_at_start, intptr_t offset) { | 300 void SaveBMInfo(BoyerMooreLookahead* bm, bool not_at_start, intptr_t offset) { |
| 299 if (offset == 0) set_bm_info(not_at_start, bm); | 301 if (offset == 0) set_bm_info(not_at_start, bm); |
| 300 } | 302 } |
| 301 | 303 |
| 302 Label* label() { return &label_; } | 304 BlockLabel* label() { return &label_; } |
| 303 // If non-generic code is generated for a node (i.e. the node is not at the | 305 // If non-generic code is generated for a node (i.e. the node is not at the |
| 304 // start of the trace) then it cannot be reused. This variable sets a limit | 306 // start of the trace) then it cannot be reused. This variable sets a limit |
| 305 // on how often we allow that to happen before we insist on starting a new | 307 // on how often we allow that to happen before we insist on starting a new |
| 306 // trace and generating generic code for a node that can be reused by flushing | 308 // trace and generating generic code for a node that can be reused by flushing |
| 307 // the deferred actions in the current trace and generating a goto. | 309 // the deferred actions in the current trace and generating a goto. |
| 308 static const intptr_t kMaxCopiesCodeGenerated = 10; | 310 static const intptr_t kMaxCopiesCodeGenerated = 10; |
| 309 | 311 |
| 310 NodeInfo* info() { return &info_; } | 312 NodeInfo* info() { return &info_; } |
| 311 | 313 |
| 312 BoyerMooreLookahead* bm_info(bool not_at_start) { | 314 BoyerMooreLookahead* bm_info(bool not_at_start) { |
| 313 return bm_info_[not_at_start ? 1 : 0]; | 315 return bm_info_[not_at_start ? 1 : 0]; |
| 314 } | 316 } |
| 315 | 317 |
| 316 Isolate* isolate() const { return isolate_; } | 318 Isolate* isolate() const { return isolate_; } |
| 317 | 319 |
| 318 protected: | 320 protected: |
| 319 enum LimitResult { DONE, CONTINUE }; | 321 enum LimitResult { DONE, CONTINUE }; |
| 320 RegExpNode* replacement_; | 322 RegExpNode* replacement_; |
| 321 | 323 |
| 324 LimitResult LimitVersions(RegExpCompiler* compiler, Trace* trace); | |
| 325 | |
| 322 void set_bm_info(bool not_at_start, BoyerMooreLookahead* bm) { | 326 void set_bm_info(bool not_at_start, BoyerMooreLookahead* bm) { |
| 323 bm_info_[not_at_start ? 1 : 0] = bm; | 327 bm_info_[not_at_start ? 1 : 0] = bm; |
| 324 } | 328 } |
| 325 | 329 |
| 326 private: | 330 private: |
| 327 static const intptr_t kFirstCharBudget = 10; | 331 static const intptr_t kFirstCharBudget = 10; |
| 328 Label label_; | 332 BlockLabel label_; |
| 329 NodeInfo info_; | 333 NodeInfo info_; |
| 330 // This variable keeps track of how many times code has been generated for | 334 // This variable keeps track of how many times code has been generated for |
| 331 // this node (in different traces). We don't keep track of where the | 335 // this node (in different traces). We don't keep track of where the |
| 332 // generated code is located unless the code is generated at the start of | 336 // generated code is located unless the code is generated at the start of |
| 333 // a trace, in which case it is generic and can be reused by flushing the | 337 // a trace, in which case it is generic and can be reused by flushing the |
| 334 // deferred operations in the current trace and generating a goto. | 338 // deferred operations in the current trace and generating a goto. |
| 335 intptr_t trace_count_; | 339 intptr_t trace_count_; |
| 336 BoyerMooreLookahead* bm_info_[2]; | 340 BoyerMooreLookahead* bm_info_[2]; |
| 337 Isolate* isolate_; | 341 Isolate* isolate_; |
| 338 }; | 342 }; |
| (...skipping 189 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 528 RegExpNode* node_; | 532 RegExpNode* node_; |
| 529 ZoneGrowableArray<Guard*>* guards_; | 533 ZoneGrowableArray<Guard*>* guards_; |
| 530 }; | 534 }; |
| 531 | 535 |
| 532 | 536 |
| 533 // A set of unsigned integers that behaves especially well on small | 537 // A set of unsigned integers that behaves especially well on small |
| 534 // integers (< 32). May do zone-allocation. | 538 // integers (< 32). May do zone-allocation. |
| 535 class OutSet: public ZoneAllocated { | 539 class OutSet: public ZoneAllocated { |
| 536 public: | 540 public: |
| 537 OutSet() : first_(0), remaining_(NULL), successors_(NULL) { } | 541 OutSet() : first_(0), remaining_(NULL), successors_(NULL) { } |
| 538 OutSet* Extend(unsigned value, Zone* zone); | 542 OutSet* Extend(unsigned value, Isolate* isolate); |
| 539 bool Get(unsigned value) const; | 543 bool Get(unsigned value) const; |
| 540 static const unsigned kFirstLimit = 32; | 544 static const unsigned kFirstLimit = 32; |
| 541 | 545 |
| 542 private: | 546 private: |
| 543 // Destructively set a value in this set. In most cases you want | 547 // Destructively set a value in this set. In most cases you want |
| 544 // to use Extend instead to ensure that only one instance exists | 548 // to use Extend instead to ensure that only one instance exists |
| 545 // that contains the same values. | 549 // that contains the same values. |
| 546 void Set(unsigned value, Zone* zone); | 550 void Set(unsigned value, Isolate* isolate); |
| 547 | 551 |
| 548 // The successors are a list of sets that contain the same values | 552 // The successors are a list of sets that contain the same values |
| 549 // as this set and the one more value that is not present in this | 553 // as this set and the one more value that is not present in this |
| 550 // set. | 554 // set. |
| 551 ZoneGrowableArray<OutSet*>* successors() { return successors_; } | 555 ZoneGrowableArray<OutSet*>* successors() { return successors_; } |
| 552 | 556 |
| 553 OutSet(uint32_t first, ZoneGrowableArray<unsigned>* remaining) | 557 OutSet(uint32_t first, ZoneGrowableArray<unsigned>* remaining) |
| 554 : first_(first), remaining_(remaining), successors_(NULL) { } | 558 : first_(first), remaining_(remaining), successors_(NULL) { } |
| 555 uint32_t first_; | 559 uint32_t first_; |
| 556 ZoneGrowableArray<unsigned>* remaining_; | 560 ZoneGrowableArray<unsigned>* remaining_; |
| 557 ZoneGrowableArray<OutSet*>* successors_; | 561 ZoneGrowableArray<OutSet*>* successors_; |
| 558 friend class Trace; | 562 friend class Trace; |
| 559 }; | 563 }; |
| 560 | 564 |
| 561 | 565 |
| 562 // A mapping from integers, specified as ranges, to a set of integers. | 566 // A mapping from integers, specified as ranges, to a set of integers. |
| 563 // Used for mapping character ranges to choices. | 567 // Used for mapping character ranges to choices. |
| 564 class DispatchTable : public ZoneAllocated { | 568 class DispatchTable : public ZoneAllocated { |
| 565 public: | 569 public: |
| 566 DispatchTable() { } | 570 DispatchTable() { } |
| 567 | 571 |
| 568 class Entry { | 572 class Entry { |
| 569 public: | 573 public: |
| 570 Entry() : from_(0), to_(0), out_set_(NULL) { } | 574 Entry() : from_(0), to_(0), out_set_(NULL) { } |
| 571 Entry(uint16_t from, uint16_t to, OutSet* out_set) | 575 Entry(uint16_t from, uint16_t to, OutSet* out_set) |
| 572 : from_(from), to_(to), out_set_(out_set) { } | 576 : from_(from), to_(to), out_set_(out_set) { } |
| 573 uint16_t from() { return from_; } | 577 uint16_t from() { return from_; } |
| 574 uint16_t to() { return to_; } | 578 uint16_t to() { return to_; } |
| 575 void set_to(uint16_t value) { to_ = value; } | 579 void set_to(uint16_t value) { to_ = value; } |
| 576 void AddValue(intptr_t value, Zone* zone) { | 580 void AddValue(intptr_t value, Isolate* isolate) { |
| 577 out_set_ = out_set_->Extend(value, zone); | 581 out_set_ = out_set_->Extend(value, isolate); |
| 578 } | 582 } |
| 579 OutSet* out_set() { return out_set_; } | 583 OutSet* out_set() { return out_set_; } |
| 580 private: | 584 private: |
| 581 uint16_t from_; | 585 uint16_t from_; |
| 582 uint16_t to_; | 586 uint16_t to_; |
| 583 OutSet* out_set_; | 587 OutSet* out_set_; |
| 584 }; | 588 }; |
| 585 | 589 |
| 586 class Config { | 590 class Config { |
| 587 public: | 591 public: |
| (...skipping 391 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 979 Position positions_[4]; | 983 Position positions_[4]; |
| 980 // These values are the condensate of the above array after Rationalize(). | 984 // These values are the condensate of the above array after Rationalize(). |
| 981 uint32_t mask_; | 985 uint32_t mask_; |
| 982 uint32_t value_; | 986 uint32_t value_; |
| 983 // If set to true, there is no way this quick check can match at all. | 987 // If set to true, there is no way this quick check can match at all. |
| 984 // E.g., if it requires to be at the start of the input, and isn't. | 988 // E.g., if it requires to be at the start of the input, and isn't. |
| 985 bool cannot_match_; | 989 bool cannot_match_; |
| 986 }; | 990 }; |
| 987 | 991 |
| 988 | 992 |
| 993 // Improve the speed that we scan for an initial point where a non-anchored | |
| 994 // regexp can match by using a Boyer-Moore-like table. This is done by | |
| 995 // identifying non-greedy non-capturing loops in the nodes that eat any | |
| 996 // character one at a time. For example in the middle of the regexp | |
| 997 // /foo[\s\S]*?bar/ we find such a loop. There is also such a loop implicitly | |
| 998 // inserted at the start of any non-anchored regexp. | |
| 999 // | |
| 1000 // When we have found such a loop we look ahead in the nodes to find the set of | |
| 1001 // characters that can come at given distances. For example for the regexp | |
| 1002 // /.?foo/ we know that there are at least 3 characters ahead of us, and the | |
| 1003 // sets of characters that can occur are [any, [f, o], [o]]. We find a range in | |
| 1004 // the lookahead info where the set of characters is reasonably constrained. In | |
| 1005 // our example this is from index 1 to 2 (0 is not constrained). We can now | |
| 1006 // look 3 characters ahead and if we don't find one of [f, o] (the union of | |
| 1007 // [f, o] and [o]) then we can skip forwards by the range size (in this case 2). | |
| 1008 // | |
| 1009 // For Unicode input strings we do the same, but modulo 128. | |
| 1010 // | |
| 1011 // We also look at the first string fed to the regexp and use that to get a hint | |
| 1012 // of the character frequencies in the inputs. This affects the assessment of | |
| 1013 // whether the set of characters is 'reasonably constrained'. | |
| 1014 // | |
| 1015 // We also have another lookahead mechanism (called quick check in the code), | |
| 1016 // which uses a wide load of multiple characters followed by a mask and compare | |
| 1017 // to determine whether a match is possible at this point. | |
| 1018 enum ContainedInLattice { | |
| 1019 kNotYet = 0, | |
| 1020 kLatticeIn = 1, | |
| 1021 kLatticeOut = 2, | |
| 1022 kLatticeUnknown = 3 // Can also mean both in and out. | |
| 1023 }; | |
| 1024 | |
| 1025 | |
| 1026 inline ContainedInLattice Combine(ContainedInLattice a, ContainedInLattice b) { | |
| 1027 return static_cast<ContainedInLattice>(a | b); | |
| 1028 } | |
| 1029 | |
| 1030 | |
| 1031 ContainedInLattice AddRange(ContainedInLattice a, | |
| 1032 const int* ranges, | |
| 1033 intptr_t ranges_size, | |
| 1034 Interval new_range); | |
| 1035 | |
| 1036 | |
| 1037 class BoyerMoorePositionInfo : public ZoneAllocated { | |
| 1038 public: | |
| 1039 explicit BoyerMoorePositionInfo(Isolate* isolate) | |
| 1040 : map_(new(isolate) ZoneGrowableArray<bool>(kMapSize)), | |
| 1041 map_count_(0), | |
| 1042 w_(kNotYet), | |
| 1043 s_(kNotYet), | |
| 1044 d_(kNotYet), | |
| 1045 surrogate_(kNotYet) { | |
| 1046 for (intptr_t i = 0; i < kMapSize; i++) { | |
| 1047 map_->Add(false); | |
| 1048 } | |
| 1049 } | |
| 1050 | |
| 1051 bool& at(intptr_t i) { return map_->At(i); } | |
| 1052 | |
| 1053 static const intptr_t kMapSize = 128; | |
| 1054 static const intptr_t kMask = kMapSize - 1; | |
| 1055 | |
| 1056 intptr_t map_count() const { return map_count_; } | |
| 1057 | |
| 1058 void Set(intptr_t character); | |
| 1059 void SetInterval(const Interval& interval); | |
| 1060 void SetAll(); | |
| 1061 bool is_non_word() { return w_ == kLatticeOut; } | |
| 1062 bool is_word() { return w_ == kLatticeIn; } | |
| 1063 | |
| 1064 private: | |
| 1065 ZoneGrowableArray<bool>* map_; | |
| 1066 intptr_t map_count_; // Number of set bits in the map. | |
| 1067 ContainedInLattice w_; // The \w character class. | |
| 1068 ContainedInLattice s_; // The \s character class. | |
| 1069 ContainedInLattice d_; // The \d character class. | |
| 1070 ContainedInLattice surrogate_; // Surrogate UTF-16 code units. | |
| 1071 }; | |
| 1072 | |
| 1073 | |
| 1074 class BoyerMooreLookahead : public ZoneAllocated{ | |
| 1075 public: | |
| 1076 BoyerMooreLookahead(intptr_t length, RegExpCompiler* compiler, | |
| 1077 Isolate* Isolate); | |
| 1078 | |
| 1079 intptr_t length() { return length_; } | |
| 1080 intptr_t max_char() { return max_char_; } | |
| 1081 RegExpCompiler* compiler() { return compiler_; } | |
| 1082 | |
| 1083 intptr_t Count(intptr_t map_number) { | |
| 1084 return bitmaps_->At(map_number)->map_count(); | |
| 1085 } | |
| 1086 | |
| 1087 BoyerMoorePositionInfo* at(intptr_t i) { return bitmaps_->At(i); } | |
| 1088 | |
| 1089 void Set(intptr_t map_number, intptr_t character) { | |
| 1090 if (character > max_char_) return; | |
| 1091 BoyerMoorePositionInfo* info = bitmaps_->At(map_number); | |
| 1092 info->Set(character); | |
| 1093 } | |
| 1094 | |
| 1095 void SetInterval(intptr_t map_number, const Interval& interval) { | |
| 1096 if (interval.from() > max_char_) return; | |
| 1097 BoyerMoorePositionInfo* info = bitmaps_->At(map_number); | |
| 1098 if (interval.to() > max_char_) { | |
| 1099 info->SetInterval(Interval(interval.from(), max_char_)); | |
| 1100 } else { | |
| 1101 info->SetInterval(interval); | |
| 1102 } | |
| 1103 } | |
| 1104 | |
| 1105 void SetAll(intptr_t map_number) { | |
| 1106 bitmaps_->At(map_number)->SetAll(); | |
| 1107 } | |
| 1108 | |
| 1109 void SetRest(intptr_t from_map) { | |
| 1110 for (intptr_t i = from_map; i < length_; i++) SetAll(i); | |
| 1111 } | |
| 1112 bool EmitSkipInstructions(RegExpMacroAssembler* masm); | |
| 1113 | |
| 1114 private: | |
| 1115 // This is the value obtained by EatsAtLeast. If we do not have at least this | |
| 1116 // many characters left in the sample string then the match is bound to fail. | |
| 1117 // Therefore it is OK to read a character this far ahead of the current match | |
| 1118 // point. | |
| 1119 intptr_t length_; | |
| 1120 RegExpCompiler* compiler_; | |
| 1121 // 0x7f for ASCII, 0xffff for UTF-16. | |
| 1122 intptr_t max_char_; | |
| 1123 ZoneGrowableArray<BoyerMoorePositionInfo*>* bitmaps_; | |
| 1124 | |
| 1125 intptr_t GetSkipTable(intptr_t min_lookahead, | |
| 1126 intptr_t max_lookahead, | |
|
ricow1
2014/09/05 05:49:58
indentation
jgruber1
2014/09/05 06:17:25
Done.
| |
| 1127 TypedData* boolean_skip_table); | |
| 1128 bool FindWorthwhileInterval(int* from, int* to); | |
| 1129 intptr_t FindBestInterval( | |
| 1130 intptr_t max_number_of_chars, | |
| 1131 intptr_t old_biggest_points, | |
| 1132 int* from, int* to); | |
| 1133 }; | |
| 1134 | |
| 1135 | |
| 989 // There are many ways to generate code for a node. This class encapsulates | 1136 // There are many ways to generate code for a node. This class encapsulates |
| 990 // the current way we should be generating. In other words it encapsulates | 1137 // the current way we should be generating. In other words it encapsulates |
| 991 // the current state of the code generator. The effect of this is that we | 1138 // the current state of the code generator. The effect of this is that we |
| 992 // generate code for paths that the matcher can take through the regular | 1139 // generate code for paths that the matcher can take through the regular |
| 993 // expression. A given node in the regexp can be code-generated several times | 1140 // expression. A given node in the regexp can be code-generated several times |
| 994 // as it can be part of several traces. For example for the regexp: | 1141 // as it can be part of several traces. For example for the regexp: |
| 995 // /foo(bar|ip)baz/ the code to match baz will be generated twice, once as part | 1142 // /foo(bar|ip)baz/ the code to match baz will be generated twice, once as part |
| 996 // of the foo-bar-baz trace and once as part of the foo-ip-baz trace. The code | 1143 // of the foo-bar-baz trace and once as part of the foo-ip-baz trace. The code |
| 997 // to match foo is generated only once (the traces have a common prefix). The | 1144 // to match foo is generated only once (the traces have a common prefix). The |
| 998 // code to store the capture is deferred and generated (twice) after the places | 1145 // code to store the capture is deferred and generated (twice) after the places |
| (...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1094 cp_offset_ == 0 && | 1241 cp_offset_ == 0 && |
| 1095 characters_preloaded_ == 0 && | 1242 characters_preloaded_ == 0 && |
| 1096 bound_checked_up_to_ == 0 && | 1243 bound_checked_up_to_ == 0 && |
| 1097 quick_check_performed_.characters() == 0 && | 1244 quick_check_performed_.characters() == 0 && |
| 1098 at_start_ == UNKNOWN; | 1245 at_start_ == UNKNOWN; |
| 1099 } | 1246 } |
| 1100 TriBool at_start() { return at_start_; } | 1247 TriBool at_start() { return at_start_; } |
| 1101 void set_at_start(bool at_start) { | 1248 void set_at_start(bool at_start) { |
| 1102 at_start_ = at_start ? TRUE_VALUE : FALSE_VALUE; | 1249 at_start_ = at_start ? TRUE_VALUE : FALSE_VALUE; |
| 1103 } | 1250 } |
| 1104 Label* backtrack() { return backtrack_; } | 1251 BlockLabel* backtrack() { return backtrack_; } |
| 1105 Label* loop_label() { return loop_label_; } | 1252 BlockLabel* loop_label() { return loop_label_; } |
| 1106 RegExpNode* stop_node() { return stop_node_; } | 1253 RegExpNode* stop_node() { return stop_node_; } |
| 1107 intptr_t characters_preloaded() { return characters_preloaded_; } | 1254 intptr_t characters_preloaded() { return characters_preloaded_; } |
| 1108 intptr_t bound_checked_up_to() { return bound_checked_up_to_; } | 1255 intptr_t bound_checked_up_to() { return bound_checked_up_to_; } |
| 1109 intptr_t flush_budget() { return flush_budget_; } | 1256 intptr_t flush_budget() { return flush_budget_; } |
| 1110 QuickCheckDetails* quick_check_performed() { return &quick_check_performed_; } | 1257 QuickCheckDetails* quick_check_performed() { return &quick_check_performed_; } |
| 1111 bool mentions_reg(intptr_t reg); | 1258 bool mentions_reg(intptr_t reg); |
| 1112 // Returns true if a deferred position store exists to the specified | 1259 // Returns true if a deferred position store exists to the specified |
| 1113 // register and stores the offset in the out-parameter. Otherwise | 1260 // register and stores the offset in the out-parameter. Otherwise |
| 1114 // returns false. | 1261 // returns false. |
| 1115 bool GetStoredPosition(intptr_t reg, intptr_t* cp_offset); | 1262 bool GetStoredPosition(intptr_t reg, intptr_t* cp_offset); |
| 1116 // These set methods and AdvanceCurrentPositionInTrace should be used only on | 1263 // These set methods and AdvanceCurrentPositionInTrace should be used only on |
| 1117 // new traces - the intention is that traces are immutable after creation. | 1264 // new traces - the intention is that traces are immutable after creation. |
| 1118 void add_action(DeferredAction* new_action) { | 1265 void add_action(DeferredAction* new_action) { |
| 1119 ASSERT(new_action->next_ == NULL); | 1266 ASSERT(new_action->next_ == NULL); |
| 1120 new_action->next_ = actions_; | 1267 new_action->next_ = actions_; |
| 1121 actions_ = new_action; | 1268 actions_ = new_action; |
| 1122 } | 1269 } |
| 1123 void set_backtrack(Label* backtrack) { backtrack_ = backtrack; } | 1270 void set_backtrack(BlockLabel* backtrack) { backtrack_ = backtrack; } |
| 1124 void set_stop_node(RegExpNode* node) { stop_node_ = node; } | 1271 void set_stop_node(RegExpNode* node) { stop_node_ = node; } |
| 1125 void set_loop_label(Label* label) { loop_label_ = label; } | 1272 void set_loop_label(BlockLabel* label) { loop_label_ = label; } |
| 1126 void set_characters_preloaded(intptr_t count) { | 1273 void set_characters_preloaded(intptr_t count) { |
| 1127 characters_preloaded_ = count; | 1274 characters_preloaded_ = count; |
| 1128 } | 1275 } |
| 1129 void set_bound_checked_up_to(intptr_t to) { bound_checked_up_to_ = to; } | 1276 void set_bound_checked_up_to(intptr_t to) { bound_checked_up_to_ = to; } |
| 1130 void set_flush_budget(intptr_t to) { flush_budget_ = to; } | 1277 void set_flush_budget(intptr_t to) { flush_budget_ = to; } |
| 1131 void set_quick_check_performed(QuickCheckDetails* d) { | 1278 void set_quick_check_performed(QuickCheckDetails* d) { |
| 1132 quick_check_performed_ = *d; | 1279 quick_check_performed_ = *d; |
| 1133 } | 1280 } |
| 1134 void InvalidateCurrentCharacter(); | 1281 void InvalidateCurrentCharacter(); |
| 1135 void AdvanceCurrentPositionInTrace(intptr_t by, RegExpCompiler* compiler); | 1282 void AdvanceCurrentPositionInTrace(intptr_t by, RegExpCompiler* compiler); |
| 1136 | 1283 |
| 1137 private: | 1284 private: |
| 1138 intptr_t FindAffectedRegisters(OutSet* affected_registers, Zone* zone); | 1285 intptr_t FindAffectedRegisters(OutSet* affected_registers, Isolate* isolate); |
| 1139 void PerformDeferredActions(RegExpMacroAssembler* macro, | 1286 void PerformDeferredActions(RegExpMacroAssembler* macro, |
| 1140 intptr_t max_register, | 1287 intptr_t max_register, |
| 1141 const OutSet& affected_registers, | 1288 const OutSet& affected_registers, |
| 1142 OutSet* registers_to_pop, | 1289 OutSet* registers_to_pop, |
| 1143 OutSet* registers_to_clear, | 1290 OutSet* registers_to_clear, |
| 1144 Zone* zone); | 1291 Isolate* isolate); |
| 1145 void RestoreAffectedRegisters(RegExpMacroAssembler* macro, | 1292 void RestoreAffectedRegisters(RegExpMacroAssembler* macro, |
| 1146 intptr_t max_register, | 1293 intptr_t max_register, |
| 1147 const OutSet& registers_to_pop, | 1294 const OutSet& registers_to_pop, |
| 1148 const OutSet& registers_to_clear); | 1295 const OutSet& registers_to_clear); |
| 1149 intptr_t cp_offset_; | 1296 intptr_t cp_offset_; |
| 1150 DeferredAction* actions_; | 1297 DeferredAction* actions_; |
| 1151 Label* backtrack_; | 1298 BlockLabel* backtrack_; |
| 1152 RegExpNode* stop_node_; | 1299 RegExpNode* stop_node_; |
| 1153 Label* loop_label_; | 1300 BlockLabel* loop_label_; |
| 1154 intptr_t characters_preloaded_; | 1301 intptr_t characters_preloaded_; |
| 1155 intptr_t bound_checked_up_to_; | 1302 intptr_t bound_checked_up_to_; |
| 1156 QuickCheckDetails quick_check_performed_; | 1303 QuickCheckDetails quick_check_performed_; |
| 1157 intptr_t flush_budget_; | 1304 intptr_t flush_budget_; |
| 1158 TriBool at_start_; | 1305 TriBool at_start_; |
| 1159 }; | 1306 }; |
| 1160 | 1307 |
| 1161 | 1308 |
| 1162 class NodeVisitor : public ValueObject { | 1309 class NodeVisitor : public ValueObject { |
| 1163 public: | 1310 public: |
| (...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1228 bool contains_anchor; | 1375 bool contains_anchor; |
| 1229 String& error; | 1376 String& error; |
| 1230 intptr_t capture_count; | 1377 intptr_t capture_count; |
| 1231 }; | 1378 }; |
| 1232 | 1379 |
| 1233 | 1380 |
| 1234 class RegExpEngine: public AllStatic { | 1381 class RegExpEngine: public AllStatic { |
| 1235 public: | 1382 public: |
| 1236 struct CompilationResult { | 1383 struct CompilationResult { |
| 1237 explicit CompilationResult(const char* error_message) | 1384 explicit CompilationResult(const char* error_message) |
| 1238 : error_message(error_message), | 1385 : function(Function::Handle()), |
| 1239 num_registers(0) {} | 1386 error_message(error_message) {} |
| 1240 CompilationResult(Object* code, intptr_t registers) | 1387 explicit CompilationResult(const Function& function) |
| 1241 : error_message(NULL), | 1388 : function(function), |
| 1242 num_registers(registers) {} | 1389 error_message(NULL) {} |
| 1390 | |
| 1391 const Function& function; | |
| 1243 const char* error_message; | 1392 const char* error_message; |
| 1244 intptr_t num_registers; | |
| 1245 }; | 1393 }; |
| 1246 | 1394 |
| 1247 static CompilationResult Compile(RegExpCompileData* input, | 1395 static CompilationResult Compile(RegExpCompileData* input, |
| 1248 bool ignore_case, | 1396 bool ignore_case, |
| 1249 bool global, | 1397 bool global, |
| 1250 bool multiline, | 1398 bool multiline, |
| 1251 const String& pattern, | 1399 const String& pattern, |
| 1252 const String& sample_subject, | 1400 const String& sample_subject, |
| 1253 bool is_ascii); | 1401 bool is_ascii); |
| 1254 | 1402 |
| 1403 static RawJSRegExp* New(const String& pattern, | |
| 1404 bool multi_line, | |
| 1405 bool ignore_case); | |
| 1406 | |
| 1255 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); | 1407 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); |
| 1256 }; | 1408 }; |
| 1257 | 1409 |
| 1258 } // namespace dart | 1410 } // namespace dart |
| 1259 | 1411 |
| 1260 #endif // VM_REGEXP_H_ | 1412 #endif // VM_REGEXP_H_ |
| OLD | NEW |