Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(692)

Side by Side Diff: runtime/vm/regexp.h

Issue 539153002: Port and integrate the irregexp engine from V8 (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Port remaining V8 regexp tests and fix exposed bugs. Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 #ifndef VM_REGEXP_H_ 5 #ifndef VM_REGEXP_H_
6 #define VM_REGEXP_H_ 6 #define VM_REGEXP_H_
7 7
8 #include "vm/assembler.h" 8 #include "vm/assembler.h"
9 #include "vm/intermediate_language.h"
9 #include "vm/object.h" 10 #include "vm/object.h"
11 #include "vm/regexp_assembler.h"
10 12
11 namespace dart { 13 namespace dart {
12 14
13 // Forward declarations. 15 // Forward declarations.
14 class AlternativeGeneration; 16 class AlternativeGeneration;
15 class BoyerMooreLookahead; 17 class BoyerMooreLookahead;
16 class NodeVisitor; 18 class NodeVisitor;
17 class QuickCheckDetails; 19 class QuickCheckDetails;
18 class RegExpAtom; 20 class RegExpAtom;
19 class RegExpCharacterClass; 21 class RegExpCharacterClass;
(...skipping 212 matching lines...) Expand 10 before | Expand all | Expand 10 after
232 // this case anchored branches will always fail and can be ignored when 234 // this case anchored branches will always fail and can be ignored when
233 // determining how many characters are consumed on success. 235 // determining how many characters are consumed on success.
234 virtual intptr_t EatsAtLeast(intptr_t still_to_find, intptr_t budget, 236 virtual intptr_t EatsAtLeast(intptr_t still_to_find, intptr_t budget,
235 bool not_at_start) = 0; 237 bool not_at_start) = 0;
236 // Emits some quick code that checks whether the preloaded characters match. 238 // Emits some quick code that checks whether the preloaded characters match.
237 // Falls through on certain failure, jumps to the label on possible success. 239 // Falls through on certain failure, jumps to the label on possible success.
238 // If the node cannot make a quick check it does nothing and returns false. 240 // If the node cannot make a quick check it does nothing and returns false.
239 bool EmitQuickCheck(RegExpCompiler* compiler, 241 bool EmitQuickCheck(RegExpCompiler* compiler,
240 Trace* trace, 242 Trace* trace,
241 bool preload_has_checked_bounds, 243 bool preload_has_checked_bounds,
242 Label* on_possible_success, 244 BlockLabel* on_possible_success,
243 QuickCheckDetails* details_return, 245 QuickCheckDetails* details_return,
244 bool fall_through_on_failure); 246 bool fall_through_on_failure);
245 // For a given number of characters this returns a mask and a value. The 247 // For a given number of characters this returns a mask and a value. The
246 // next n characters are anded with the mask and compared with the value. 248 // next n characters are anded with the mask and compared with the value.
247 // A comparison failure indicates the node cannot match the next n characters. 249 // A comparison failure indicates the node cannot match the next n characters.
248 // A comparison success indicates the node may match. 250 // A comparison success indicates the node may match.
249 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 251 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
250 RegExpCompiler* compiler, 252 RegExpCompiler* compiler,
251 intptr_t characters_filled_in, 253 intptr_t characters_filled_in,
252 bool not_at_start) = 0; 254 bool not_at_start) = 0;
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
292 } 294 }
293 295
294 // We want to avoid recalculating the lookahead info, so we store it on the 296 // We want to avoid recalculating the lookahead info, so we store it on the
295 // node. Only info that is for this node is stored. We can tell that the 297 // node. Only info that is for this node is stored. We can tell that the
296 // info is for this node when offset == 0, so the information is calculated 298 // info is for this node when offset == 0, so the information is calculated
297 // relative to this node. 299 // relative to this node.
298 void SaveBMInfo(BoyerMooreLookahead* bm, bool not_at_start, intptr_t offset) { 300 void SaveBMInfo(BoyerMooreLookahead* bm, bool not_at_start, intptr_t offset) {
299 if (offset == 0) set_bm_info(not_at_start, bm); 301 if (offset == 0) set_bm_info(not_at_start, bm);
300 } 302 }
301 303
302 Label* label() { return &label_; } 304 BlockLabel* label() { return &label_; }
303 // If non-generic code is generated for a node (i.e. the node is not at the 305 // If non-generic code is generated for a node (i.e. the node is not at the
304 // start of the trace) then it cannot be reused. This variable sets a limit 306 // start of the trace) then it cannot be reused. This variable sets a limit
305 // on how often we allow that to happen before we insist on starting a new 307 // on how often we allow that to happen before we insist on starting a new
306 // trace and generating generic code for a node that can be reused by flushing 308 // trace and generating generic code for a node that can be reused by flushing
307 // the deferred actions in the current trace and generating a goto. 309 // the deferred actions in the current trace and generating a goto.
308 static const intptr_t kMaxCopiesCodeGenerated = 10; 310 static const intptr_t kMaxCopiesCodeGenerated = 10;
309 311
310 NodeInfo* info() { return &info_; } 312 NodeInfo* info() { return &info_; }
311 313
312 BoyerMooreLookahead* bm_info(bool not_at_start) { 314 BoyerMooreLookahead* bm_info(bool not_at_start) {
313 return bm_info_[not_at_start ? 1 : 0]; 315 return bm_info_[not_at_start ? 1 : 0];
314 } 316 }
315 317
316 Isolate* isolate() const { return isolate_; } 318 Isolate* isolate() const { return isolate_; }
317 319
318 protected: 320 protected:
319 enum LimitResult { DONE, CONTINUE }; 321 enum LimitResult { DONE, CONTINUE };
320 RegExpNode* replacement_; 322 RegExpNode* replacement_;
321 323
324 LimitResult LimitVersions(RegExpCompiler* compiler, Trace* trace);
325
322 void set_bm_info(bool not_at_start, BoyerMooreLookahead* bm) { 326 void set_bm_info(bool not_at_start, BoyerMooreLookahead* bm) {
323 bm_info_[not_at_start ? 1 : 0] = bm; 327 bm_info_[not_at_start ? 1 : 0] = bm;
324 } 328 }
325 329
326 private: 330 private:
327 static const intptr_t kFirstCharBudget = 10; 331 static const intptr_t kFirstCharBudget = 10;
328 Label label_; 332 BlockLabel label_;
329 NodeInfo info_; 333 NodeInfo info_;
330 // This variable keeps track of how many times code has been generated for 334 // This variable keeps track of how many times code has been generated for
331 // this node (in different traces). We don't keep track of where the 335 // this node (in different traces). We don't keep track of where the
332 // generated code is located unless the code is generated at the start of 336 // generated code is located unless the code is generated at the start of
333 // a trace, in which case it is generic and can be reused by flushing the 337 // a trace, in which case it is generic and can be reused by flushing the
334 // deferred operations in the current trace and generating a goto. 338 // deferred operations in the current trace and generating a goto.
335 intptr_t trace_count_; 339 intptr_t trace_count_;
336 BoyerMooreLookahead* bm_info_[2]; 340 BoyerMooreLookahead* bm_info_[2];
337 Isolate* isolate_; 341 Isolate* isolate_;
338 }; 342 };
(...skipping 189 matching lines...) Expand 10 before | Expand all | Expand 10 after
528 RegExpNode* node_; 532 RegExpNode* node_;
529 ZoneGrowableArray<Guard*>* guards_; 533 ZoneGrowableArray<Guard*>* guards_;
530 }; 534 };
531 535
532 536
533 // A set of unsigned integers that behaves especially well on small 537 // A set of unsigned integers that behaves especially well on small
534 // integers (< 32). May do zone-allocation. 538 // integers (< 32). May do zone-allocation.
535 class OutSet: public ZoneAllocated { 539 class OutSet: public ZoneAllocated {
536 public: 540 public:
537 OutSet() : first_(0), remaining_(NULL), successors_(NULL) { } 541 OutSet() : first_(0), remaining_(NULL), successors_(NULL) { }
538 OutSet* Extend(unsigned value, Zone* zone); 542 OutSet* Extend(unsigned value, Isolate* isolate);
539 bool Get(unsigned value) const; 543 bool Get(unsigned value) const;
540 static const unsigned kFirstLimit = 32; 544 static const unsigned kFirstLimit = 32;
541 545
542 private: 546 private:
543 // Destructively set a value in this set. In most cases you want 547 // Destructively set a value in this set. In most cases you want
544 // to use Extend instead to ensure that only one instance exists 548 // to use Extend instead to ensure that only one instance exists
545 // that contains the same values. 549 // that contains the same values.
546 void Set(unsigned value, Zone* zone); 550 void Set(unsigned value, Isolate* isolate);
547 551
548 // The successors are a list of sets that contain the same values 552 // The successors are a list of sets that contain the same values
549 // as this set and the one more value that is not present in this 553 // as this set and the one more value that is not present in this
550 // set. 554 // set.
551 ZoneGrowableArray<OutSet*>* successors() { return successors_; } 555 ZoneGrowableArray<OutSet*>* successors() { return successors_; }
552 556
553 OutSet(uint32_t first, ZoneGrowableArray<unsigned>* remaining) 557 OutSet(uint32_t first, ZoneGrowableArray<unsigned>* remaining)
554 : first_(first), remaining_(remaining), successors_(NULL) { } 558 : first_(first), remaining_(remaining), successors_(NULL) { }
555 uint32_t first_; 559 uint32_t first_;
556 ZoneGrowableArray<unsigned>* remaining_; 560 ZoneGrowableArray<unsigned>* remaining_;
557 ZoneGrowableArray<OutSet*>* successors_; 561 ZoneGrowableArray<OutSet*>* successors_;
558 friend class Trace; 562 friend class Trace;
559 }; 563 };
560 564
561 565
562 // A mapping from integers, specified as ranges, to a set of integers. 566 // A mapping from integers, specified as ranges, to a set of integers.
563 // Used for mapping character ranges to choices. 567 // Used for mapping character ranges to choices.
564 class DispatchTable : public ZoneAllocated { 568 class DispatchTable : public ZoneAllocated {
565 public: 569 public:
566 DispatchTable() { } 570 DispatchTable() { }
567 571
568 class Entry { 572 class Entry {
569 public: 573 public:
570 Entry() : from_(0), to_(0), out_set_(NULL) { } 574 Entry() : from_(0), to_(0), out_set_(NULL) { }
571 Entry(uint16_t from, uint16_t to, OutSet* out_set) 575 Entry(uint16_t from, uint16_t to, OutSet* out_set)
572 : from_(from), to_(to), out_set_(out_set) { } 576 : from_(from), to_(to), out_set_(out_set) { }
573 uint16_t from() { return from_; } 577 uint16_t from() { return from_; }
574 uint16_t to() { return to_; } 578 uint16_t to() { return to_; }
575 void set_to(uint16_t value) { to_ = value; } 579 void set_to(uint16_t value) { to_ = value; }
576 void AddValue(intptr_t value, Zone* zone) { 580 void AddValue(intptr_t value, Isolate* isolate) {
577 out_set_ = out_set_->Extend(value, zone); 581 out_set_ = out_set_->Extend(value, isolate);
578 } 582 }
579 OutSet* out_set() { return out_set_; } 583 OutSet* out_set() { return out_set_; }
580 private: 584 private:
581 uint16_t from_; 585 uint16_t from_;
582 uint16_t to_; 586 uint16_t to_;
583 OutSet* out_set_; 587 OutSet* out_set_;
584 }; 588 };
585 589
586 class Config { 590 class Config {
587 public: 591 public:
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
625 table_(NULL), 629 table_(NULL),
626 not_at_start_(false), 630 not_at_start_(false),
627 being_calculated_(false) { } 631 being_calculated_(false) { }
628 virtual void Accept(NodeVisitor* visitor); 632 virtual void Accept(NodeVisitor* visitor);
629 void AddAlternative(GuardedAlternative node) { 633 void AddAlternative(GuardedAlternative node) {
630 alternatives()->Add(node); 634 alternatives()->Add(node);
631 } 635 }
632 ZoneGrowableArray<GuardedAlternative>* alternatives() { 636 ZoneGrowableArray<GuardedAlternative>* alternatives() {
633 return alternatives_; 637 return alternatives_;
634 } 638 }
635 DispatchTable* GetTable(bool ignore_case);
636 virtual void Emit(RegExpCompiler* compiler, Trace* trace); 639 virtual void Emit(RegExpCompiler* compiler, Trace* trace);
637 virtual intptr_t EatsAtLeast(intptr_t still_to_find, intptr_t budget, 640 virtual intptr_t EatsAtLeast(intptr_t still_to_find, intptr_t budget,
638 bool not_at_start); 641 bool not_at_start);
639 intptr_t EatsAtLeastHelper(intptr_t still_to_find, 642 intptr_t EatsAtLeastHelper(intptr_t still_to_find,
640 intptr_t budget, 643 intptr_t budget,
641 RegExpNode* ignore_this_node, 644 RegExpNode* ignore_this_node,
642 bool not_at_start); 645 bool not_at_start);
643 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 646 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
644 RegExpCompiler* compiler, 647 RegExpCompiler* compiler,
645 intptr_t characters_filled_in, 648 intptr_t characters_filled_in,
(...skipping 333 matching lines...) Expand 10 before | Expand all | Expand 10 after
979 Position positions_[4]; 982 Position positions_[4];
980 // These values are the condensate of the above array after Rationalize(). 983 // These values are the condensate of the above array after Rationalize().
981 uint32_t mask_; 984 uint32_t mask_;
982 uint32_t value_; 985 uint32_t value_;
983 // If set to true, there is no way this quick check can match at all. 986 // If set to true, there is no way this quick check can match at all.
984 // E.g., if it requires to be at the start of the input, and isn't. 987 // E.g., if it requires to be at the start of the input, and isn't.
985 bool cannot_match_; 988 bool cannot_match_;
986 }; 989 };
987 990
988 991
992 // Improve the speed that we scan for an initial point where a non-anchored
993 // regexp can match by using a Boyer-Moore-like table. This is done by
994 // identifying non-greedy non-capturing loops in the nodes that eat any
995 // character one at a time. For example in the middle of the regexp
996 // /foo[\s\S]*?bar/ we find such a loop. There is also such a loop implicitly
997 // inserted at the start of any non-anchored regexp.
998 //
999 // When we have found such a loop we look ahead in the nodes to find the set of
1000 // characters that can come at given distances. For example for the regexp
1001 // /.?foo/ we know that there are at least 3 characters ahead of us, and the
1002 // sets of characters that can occur are [any, [f, o], [o]]. We find a range in
1003 // the lookahead info where the set of characters is reasonably constrained. In
1004 // our example this is from index 1 to 2 (0 is not constrained). We can now
1005 // look 3 characters ahead and if we don't find one of [f, o] (the union of
1006 // [f, o] and [o]) then we can skip forwards by the range size (in this case 2).
1007 //
1008 // For Unicode input strings we do the same, but modulo 128.
1009 //
1010 // We also look at the first string fed to the regexp and use that to get a hint
1011 // of the character frequencies in the inputs. This affects the assessment of
1012 // whether the set of characters is 'reasonably constrained'.
1013 //
1014 // We also have another lookahead mechanism (called quick check in the code),
1015 // which uses a wide load of multiple characters followed by a mask and compare
1016 // to determine whether a match is possible at this point.
1017 enum ContainedInLattice {
1018 kNotYet = 0,
1019 kLatticeIn = 1,
1020 kLatticeOut = 2,
1021 kLatticeUnknown = 3 // Can also mean both in and out.
1022 };
1023
1024
1025 inline ContainedInLattice Combine(ContainedInLattice a, ContainedInLattice b) {
1026 return static_cast<ContainedInLattice>(a | b);
1027 }
1028
1029
1030 ContainedInLattice AddRange(ContainedInLattice a,
1031 const int* ranges,
1032 intptr_t ranges_size,
1033 Interval new_range);
1034
1035
1036 class BoyerMoorePositionInfo : public ZoneAllocated {
1037 public:
1038 explicit BoyerMoorePositionInfo(Isolate* isolate)
1039 : map_(new(isolate) ZoneGrowableArray<bool>(kMapSize)),
1040 map_count_(0),
1041 w_(kNotYet),
1042 s_(kNotYet),
1043 d_(kNotYet),
1044 surrogate_(kNotYet) {
1045 for (intptr_t i = 0; i < kMapSize; i++) {
1046 map_->Add(false);
1047 }
1048 }
1049
1050 bool& at(intptr_t i) { return (*map_)[i]; }
1051
1052 static const intptr_t kMapSize = 128;
1053 static const intptr_t kMask = kMapSize - 1;
1054
1055 intptr_t map_count() const { return map_count_; }
1056
1057 void Set(intptr_t character);
1058 void SetInterval(const Interval& interval);
1059 void SetAll();
1060 bool is_non_word() { return w_ == kLatticeOut; }
1061 bool is_word() { return w_ == kLatticeIn; }
1062
1063 private:
1064 ZoneGrowableArray<bool>* map_;
1065 intptr_t map_count_; // Number of set bits in the map.
1066 ContainedInLattice w_; // The \w character class.
1067 ContainedInLattice s_; // The \s character class.
1068 ContainedInLattice d_; // The \d character class.
1069 ContainedInLattice surrogate_; // Surrogate UTF-16 code units.
1070 };
1071
1072
1073 class BoyerMooreLookahead : public ZoneAllocated{
1074 public:
1075 BoyerMooreLookahead(intptr_t length, RegExpCompiler* compiler,
1076 Isolate* Isolate);
1077
1078 intptr_t length() { return length_; }
1079 intptr_t max_char() { return max_char_; }
1080 RegExpCompiler* compiler() { return compiler_; }
1081
1082 intptr_t Count(intptr_t map_number) {
1083 return bitmaps_->At(map_number)->map_count();
1084 }
1085
1086 BoyerMoorePositionInfo* at(intptr_t i) { return bitmaps_->At(i); }
1087
1088 void Set(intptr_t map_number, intptr_t character) {
1089 if (character > max_char_) return;
1090 BoyerMoorePositionInfo* info = bitmaps_->At(map_number);
1091 info->Set(character);
1092 }
1093
1094 void SetInterval(intptr_t map_number, const Interval& interval) {
1095 if (interval.from() > max_char_) return;
1096 BoyerMoorePositionInfo* info = bitmaps_->At(map_number);
1097 if (interval.to() > max_char_) {
1098 info->SetInterval(Interval(interval.from(), max_char_));
1099 } else {
1100 info->SetInterval(interval);
1101 }
1102 }
1103
1104 void SetAll(intptr_t map_number) {
1105 bitmaps_->At(map_number)->SetAll();
1106 }
1107
1108 void SetRest(intptr_t from_map) {
1109 for (intptr_t i = from_map; i < length_; i++) SetAll(i);
1110 }
1111 bool EmitSkipInstructions(RegExpMacroAssembler* masm);
1112
1113 private:
1114 // This is the value obtained by EatsAtLeast. If we do not have at least this
1115 // many characters left in the sample string then the match is bound to fail.
1116 // Therefore it is OK to read a character this far ahead of the current match
1117 // point.
1118 intptr_t length_;
1119 RegExpCompiler* compiler_;
1120 // 0x7f for ASCII, 0xffff for UTF-16.
1121 intptr_t max_char_;
1122 ZoneGrowableArray<BoyerMoorePositionInfo*>* bitmaps_;
1123
1124 intptr_t GetSkipTable(intptr_t min_lookahead,
1125 intptr_t max_lookahead,
1126 TypedData* boolean_skip_table);
1127 bool FindWorthwhileInterval(intptr_t* from, intptr_t* to);
1128 intptr_t FindBestInterval(
1129 intptr_t max_number_of_chars,
1130 intptr_t old_biggest_points,
1131 intptr_t* from, intptr_t* to);
1132 };
1133
1134
989 // There are many ways to generate code for a node. This class encapsulates 1135 // There are many ways to generate code for a node. This class encapsulates
990 // the current way we should be generating. In other words it encapsulates 1136 // the current way we should be generating. In other words it encapsulates
991 // the current state of the code generator. The effect of this is that we 1137 // the current state of the code generator. The effect of this is that we
992 // generate code for paths that the matcher can take through the regular 1138 // generate code for paths that the matcher can take through the regular
993 // expression. A given node in the regexp can be code-generated several times 1139 // expression. A given node in the regexp can be code-generated several times
994 // as it can be part of several traces. For example for the regexp: 1140 // as it can be part of several traces. For example for the regexp:
995 // /foo(bar|ip)baz/ the code to match baz will be generated twice, once as part 1141 // /foo(bar|ip)baz/ the code to match baz will be generated twice, once as part
996 // of the foo-bar-baz trace and once as part of the foo-ip-baz trace. The code 1142 // of the foo-bar-baz trace and once as part of the foo-ip-baz trace. The code
997 // to match foo is generated only once (the traces have a common prefix). The 1143 // to match foo is generated only once (the traces have a common prefix). The
998 // code to store the capture is deferred and generated (twice) after the places 1144 // code to store the capture is deferred and generated (twice) after the places
(...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after
1094 cp_offset_ == 0 && 1240 cp_offset_ == 0 &&
1095 characters_preloaded_ == 0 && 1241 characters_preloaded_ == 0 &&
1096 bound_checked_up_to_ == 0 && 1242 bound_checked_up_to_ == 0 &&
1097 quick_check_performed_.characters() == 0 && 1243 quick_check_performed_.characters() == 0 &&
1098 at_start_ == UNKNOWN; 1244 at_start_ == UNKNOWN;
1099 } 1245 }
1100 TriBool at_start() { return at_start_; } 1246 TriBool at_start() { return at_start_; }
1101 void set_at_start(bool at_start) { 1247 void set_at_start(bool at_start) {
1102 at_start_ = at_start ? TRUE_VALUE : FALSE_VALUE; 1248 at_start_ = at_start ? TRUE_VALUE : FALSE_VALUE;
1103 } 1249 }
1104 Label* backtrack() { return backtrack_; } 1250 BlockLabel* backtrack() { return backtrack_; }
1105 Label* loop_label() { return loop_label_; } 1251 BlockLabel* loop_label() { return loop_label_; }
1106 RegExpNode* stop_node() { return stop_node_; } 1252 RegExpNode* stop_node() { return stop_node_; }
1107 intptr_t characters_preloaded() { return characters_preloaded_; } 1253 intptr_t characters_preloaded() { return characters_preloaded_; }
1108 intptr_t bound_checked_up_to() { return bound_checked_up_to_; } 1254 intptr_t bound_checked_up_to() { return bound_checked_up_to_; }
1109 intptr_t flush_budget() { return flush_budget_; } 1255 intptr_t flush_budget() { return flush_budget_; }
1110 QuickCheckDetails* quick_check_performed() { return &quick_check_performed_; } 1256 QuickCheckDetails* quick_check_performed() { return &quick_check_performed_; }
1111 bool mentions_reg(intptr_t reg); 1257 bool mentions_reg(intptr_t reg);
1112 // Returns true if a deferred position store exists to the specified 1258 // Returns true if a deferred position store exists to the specified
1113 // register and stores the offset in the out-parameter. Otherwise 1259 // register and stores the offset in the out-parameter. Otherwise
1114 // returns false. 1260 // returns false.
1115 bool GetStoredPosition(intptr_t reg, intptr_t* cp_offset); 1261 bool GetStoredPosition(intptr_t reg, intptr_t* cp_offset);
1116 // These set methods and AdvanceCurrentPositionInTrace should be used only on 1262 // These set methods and AdvanceCurrentPositionInTrace should be used only on
1117 // new traces - the intention is that traces are immutable after creation. 1263 // new traces - the intention is that traces are immutable after creation.
1118 void add_action(DeferredAction* new_action) { 1264 void add_action(DeferredAction* new_action) {
1119 ASSERT(new_action->next_ == NULL); 1265 ASSERT(new_action->next_ == NULL);
1120 new_action->next_ = actions_; 1266 new_action->next_ = actions_;
1121 actions_ = new_action; 1267 actions_ = new_action;
1122 } 1268 }
1123 void set_backtrack(Label* backtrack) { backtrack_ = backtrack; } 1269 void set_backtrack(BlockLabel* backtrack) { backtrack_ = backtrack; }
1124 void set_stop_node(RegExpNode* node) { stop_node_ = node; } 1270 void set_stop_node(RegExpNode* node) { stop_node_ = node; }
1125 void set_loop_label(Label* label) { loop_label_ = label; } 1271 void set_loop_label(BlockLabel* label) { loop_label_ = label; }
1126 void set_characters_preloaded(intptr_t count) { 1272 void set_characters_preloaded(intptr_t count) {
1127 characters_preloaded_ = count; 1273 characters_preloaded_ = count;
1128 } 1274 }
1129 void set_bound_checked_up_to(intptr_t to) { bound_checked_up_to_ = to; } 1275 void set_bound_checked_up_to(intptr_t to) { bound_checked_up_to_ = to; }
1130 void set_flush_budget(intptr_t to) { flush_budget_ = to; } 1276 void set_flush_budget(intptr_t to) { flush_budget_ = to; }
1131 void set_quick_check_performed(QuickCheckDetails* d) { 1277 void set_quick_check_performed(QuickCheckDetails* d) {
1132 quick_check_performed_ = *d; 1278 quick_check_performed_ = *d;
1133 } 1279 }
1134 void InvalidateCurrentCharacter(); 1280 void InvalidateCurrentCharacter();
1135 void AdvanceCurrentPositionInTrace(intptr_t by, RegExpCompiler* compiler); 1281 void AdvanceCurrentPositionInTrace(intptr_t by, RegExpCompiler* compiler);
1136 1282
1137 private: 1283 private:
1138 intptr_t FindAffectedRegisters(OutSet* affected_registers, Zone* zone); 1284 intptr_t FindAffectedRegisters(OutSet* affected_registers, Isolate* isolate);
1139 void PerformDeferredActions(RegExpMacroAssembler* macro, 1285 void PerformDeferredActions(RegExpMacroAssembler* macro,
1140 intptr_t max_register, 1286 intptr_t max_register,
1141 const OutSet& affected_registers, 1287 const OutSet& affected_registers,
1142 OutSet* registers_to_pop, 1288 OutSet* registers_to_pop,
1143 OutSet* registers_to_clear, 1289 OutSet* registers_to_clear,
1144 Zone* zone); 1290 Isolate* isolate);
1145 void RestoreAffectedRegisters(RegExpMacroAssembler* macro, 1291 void RestoreAffectedRegisters(RegExpMacroAssembler* macro,
1146 intptr_t max_register, 1292 intptr_t max_register,
1147 const OutSet& registers_to_pop, 1293 const OutSet& registers_to_pop,
1148 const OutSet& registers_to_clear); 1294 const OutSet& registers_to_clear);
1149 intptr_t cp_offset_; 1295 intptr_t cp_offset_;
1150 DeferredAction* actions_; 1296 DeferredAction* actions_;
1151 Label* backtrack_; 1297 BlockLabel* backtrack_;
1152 RegExpNode* stop_node_; 1298 RegExpNode* stop_node_;
1153 Label* loop_label_; 1299 BlockLabel* loop_label_;
1154 intptr_t characters_preloaded_; 1300 intptr_t characters_preloaded_;
1155 intptr_t bound_checked_up_to_; 1301 intptr_t bound_checked_up_to_;
1156 QuickCheckDetails quick_check_performed_; 1302 QuickCheckDetails quick_check_performed_;
1157 intptr_t flush_budget_; 1303 intptr_t flush_budget_;
1158 TriBool at_start_; 1304 TriBool at_start_;
1159 }; 1305 };
1160 1306
1161 1307
1162 class NodeVisitor : public ValueObject { 1308 class NodeVisitor : public ValueObject {
1163 public: 1309 public:
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
1207 1353
1208 private: 1354 private:
1209 bool ignore_case_; 1355 bool ignore_case_;
1210 bool is_ascii_; 1356 bool is_ascii_;
1211 const char* error_message_; 1357 const char* error_message_;
1212 1358
1213 DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis); 1359 DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis);
1214 }; 1360 };
1215 1361
1216 1362
1217 struct RegExpCompileData { 1363 struct RegExpCompileData : public ZoneAllocated {
1218 RegExpCompileData() 1364 RegExpCompileData()
1219 : tree(NULL), 1365 : tree(NULL),
1220 node(NULL), 1366 node(NULL),
1221 simple(true), 1367 simple(true),
1222 contains_anchor(false), 1368 contains_anchor(false),
1223 error(String::Handle(String::null())), 1369 error(String::Handle(String::null())),
1224 capture_count(0) { } 1370 capture_count(0) { }
1225 RegExpTree* tree; 1371 RegExpTree* tree;
1226 RegExpNode* node; 1372 RegExpNode* node;
1227 bool simple; 1373 bool simple;
1228 bool contains_anchor; 1374 bool contains_anchor;
1229 String& error; 1375 String& error;
1230 intptr_t capture_count; 1376 intptr_t capture_count;
1231 }; 1377 };
1232 1378
1233 1379
1234 class RegExpEngine: public AllStatic { 1380 class RegExpEngine: public AllStatic {
1235 public: 1381 public:
1236 struct CompilationResult { 1382 struct CompilationResult {
1237 explicit CompilationResult(const char* error_message) 1383 explicit CompilationResult(const char* error_message)
1238 : error_message(error_message), 1384 : macro_assembler(NULL),
1239 num_registers(0) {} 1385 graph_entry(NULL),
1240 CompilationResult(Object* code, intptr_t registers) 1386 num_blocks(-1),
1241 : error_message(NULL), 1387 num_stack_locals(-1),
1242 num_registers(registers) {} 1388 error_message(error_message) {}
1389 explicit CompilationResult(IRRegExpMacroAssembler* macro_assembler,
Vyacheslav Egorov (Google) 2014/10/01 20:13:21 No need to mark multiple arguments constructor "ex
jgruber1 2014/10/03 18:59:51 Done.
1390 GraphEntryInstr* graph_entry,
1391 intptr_t num_blocks,
1392 intptr_t num_stack_locals)
1393 : macro_assembler(macro_assembler),
1394 graph_entry(graph_entry),
1395 num_blocks(num_blocks),
1396 num_stack_locals(num_stack_locals),
1397 error_message(NULL) {}
1398
1399 IRRegExpMacroAssembler* macro_assembler;
1400 GraphEntryInstr* graph_entry;
1401 const intptr_t num_blocks;
1402 const intptr_t num_stack_locals;
1403
1243 const char* error_message; 1404 const char* error_message;
1244 intptr_t num_registers;
1245 }; 1405 };
1246 1406
1247 static CompilationResult Compile(RegExpCompileData* input, 1407 static CompilationResult Compile(
1248 bool ignore_case, 1408 RegExpCompileData* input,
1249 bool global, 1409 const ParsedFunction* parsed_function,
1250 bool multiline, 1410 ZoneGrowableArray<const ICData*>* ic_data_array);
1251 const String& pattern, 1411
1252 const String& sample_subject, 1412 static RawJSRegExp* New(Isolate* isolate,
1253 bool is_ascii); 1413 const String& pattern,
1414 bool multi_line,
1415 bool ignore_case);
1254 1416
1255 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); 1417 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case);
1256 }; 1418 };
1257 1419
1258 } // namespace dart 1420 } // namespace dart
1259 1421
1260 #endif // VM_REGEXP_H_ 1422 #endif // VM_REGEXP_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698