Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1220)

Side by Side Diff: src/jsregexp.h

Issue 9965010: Regexp: Improve the speed that we scan for an initial point where a non-anchored (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: '' Created 8 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 172 matching lines...) Expand 10 before | Expand all | Expand 10 after
183 // is not tracked, however. As a conservative approximation we track the 183 // is not tracked, however. As a conservative approximation we track the
184 // total regexp code compiled including code that has subsequently been freed 184 // total regexp code compiled including code that has subsequently been freed
185 // and the total executable memory at any point. 185 // and the total executable memory at any point.
186 static const int kRegExpExecutableMemoryLimit = 16 * MB; 186 static const int kRegExpExecutableMemoryLimit = 16 * MB;
187 static const int kRegWxpCompiledLimit = 1 * MB; 187 static const int kRegWxpCompiledLimit = 1 * MB;
188 188
189 private: 189 private:
190 static String* last_ascii_string_; 190 static String* last_ascii_string_;
191 static String* two_byte_cached_string_; 191 static String* two_byte_cached_string_;
192 192
193 static bool CompileIrregexp(Handle<JSRegExp> re, bool is_ascii); 193 static bool CompileIrregexp(
194 static inline bool EnsureCompiledIrregexp(Handle<JSRegExp> re, bool is_ascii); 194 Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii);
195 static inline bool EnsureCompiledIrregexp(
196 Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii);
195 197
196 198
197 // Set the subject cache. The previous string buffer is not deleted, so the 199 // Set the subject cache. The previous string buffer is not deleted, so the
198 // caller should ensure that it doesn't leak. 200 // caller should ensure that it doesn't leak.
199 static void SetSubjectCache(String* subject, 201 static void SetSubjectCache(String* subject,
200 char* utf8_subject, 202 char* utf8_subject,
201 int uft8_length, 203 int uft8_length,
202 int character_position, 204 int character_position,
203 int utf8_position); 205 int utf8_position);
204 206
(...skipping 207 matching lines...) Expand 10 before | Expand all | Expand 10 after
412 private: 414 private:
413 // There can't be a static empty set since it allocates its 415 // There can't be a static empty set since it allocates its
414 // successors in a zone and caches them. 416 // successors in a zone and caches them.
415 OutSet* empty() { return &empty_; } 417 OutSet* empty() { return &empty_; }
416 OutSet empty_; 418 OutSet empty_;
417 ZoneSplayTree<Config>* tree() { return &tree_; } 419 ZoneSplayTree<Config>* tree() { return &tree_; }
418 ZoneSplayTree<Config> tree_; 420 ZoneSplayTree<Config> tree_;
419 }; 421 };
420 422
421 423
424 // Improve the speed that we scan for an initial point where a non-anchored
425 // regexp can match by using a Boyer-Moore-like table. This is done by
426 // identifying non-greedy non-capturing loops in the nodes that eat any
427 // character one at a time. For example in the middle of the regexp
428 // /foo[\s\S]*?bar/ we find such a loop. There is also such a loop implicitly
429 // inserted at the start of any non-anchored regexp.
430 //
431 // When we have found such a loop we look ahead in the nodes to find the set of
432 // characters that can come at given distances. For example for the regexp
433 // /.?foo/ we know that there are at least 3 characters ahead of us, and the
434 // sets of characters that can occur are [any, [f, o], [o]]. We find a range in
435 // the lookahead info where the set of characters is reasonably constrained. In
436 // our example this is from index 1 to 2 (0 is not constrained). We can now
437 // look 3 characters ahead and if we don't find one of [f, o] (the union of
438 // [f, o] and [o]) then we can skip forwards by the range size (in this case 2).
439 //
440 // For Unicode input strings we do the same, but modulo 128.
441 //
442 // We also look at the first string fed to the regexp and use that to get a hint
443 // of the character frequencies in the inputs. This affects the assessment of
444 // whether the set of characters is 'reasonably constrained'.
445 //
446 // We also have another lookahead mechanism (called quick check in the code),
447 // which uses a wide load of multiple characters followed by a mask and compare
448 // to determine whether a match is possible at this point.
449 class BoyerMooreLookahead {
450 public:
451 BoyerMooreLookahead(int length, int map_length, RegExpCompiler* compiler);
452
453 int length() { return length_; }
454 int max_char() { return max_char_; }
455 RegExpCompiler* compiler() { return compiler_; }
456
457 static const int kTooManyCharacters = 32;
458
459 int Count(int map_number) {
460 ZoneList<bool>* map = bitmaps_->at(map_number);
461 if (map == NULL) return map_length_;
462 int count = 0;
463 for (int i = 0; i < map_length_; i++) {
464 if (map->at(i)) count++;
465 }
466 return count;
467 }
468
469 void Set(int map_number, int character) {
470 if (character > max_char_) return;
471 ZoneList<bool>* map = bitmaps_->at(map_number);
472 if (map == NULL) return;
473 map->at(character & (map_length_ - 1)) = true;
474 }
475
476 void SetAll(int map_number) {
477 bitmaps_->at(map_number) = NULL;
478 }
479
480 void SetRest(int from_map) {
481 for (int i = from_map; i < length_; i++) SetAll(i);
482 }
483 bool EmitSkipInstructions(RegExpMacroAssembler* masm);
484
485 private:
486 // This is the value obtained by EatsAtLeast. If we do not have at least this
487 // many characters left in the sample string then the match is bound to fail.
488 // Therefore it is OK to read a character this far ahead of the current match
489 // point.
490 int length_;
491 // We conservatively consider all character values modulo this length. For
492 // ASCII there is no loss of precision, since this has a value of 128.
493 int map_length_;
494 RegExpCompiler* compiler_;
495 // 0x7f for ASCII, 0xffff for UTF-16.
496 int max_char_;
497 ZoneList<ZoneList<bool>*>* bitmaps_;
498
499 int GetSkipTable(int min_lookahead,
500 int max_lookahead,
501 Handle<ByteArray> boolean_skip_table);
502 bool FindWorthwhileInterval(int* from, int* to);
503 int FindBestInterval(
504 int max_number_of_chars, int old_biggest_points, int* from, int* to);
505 };
506
507
422 #define FOR_EACH_NODE_TYPE(VISIT) \ 508 #define FOR_EACH_NODE_TYPE(VISIT) \
423 VISIT(End) \ 509 VISIT(End) \
424 VISIT(Action) \ 510 VISIT(Action) \
425 VISIT(Choice) \ 511 VISIT(Choice) \
426 VISIT(BackReference) \ 512 VISIT(BackReference) \
427 VISIT(Assertion) \ 513 VISIT(Assertion) \
428 VISIT(Text) 514 VISIT(Text)
429 515
430 516
431 #define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \ 517 #define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \
(...skipping 196 matching lines...) Expand 10 before | Expand all | Expand 10 after
628 // For a given number of characters this returns a mask and a value. The 714 // For a given number of characters this returns a mask and a value. The
629 // next n characters are anded with the mask and compared with the value. 715 // next n characters are anded with the mask and compared with the value.
630 // A comparison failure indicates the node cannot match the next n characters. 716 // A comparison failure indicates the node cannot match the next n characters.
631 // A comparison success indicates the node may match. 717 // A comparison success indicates the node may match.
632 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 718 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
633 RegExpCompiler* compiler, 719 RegExpCompiler* compiler,
634 int characters_filled_in, 720 int characters_filled_in,
635 bool not_at_start) = 0; 721 bool not_at_start) = 0;
636 static const int kNodeIsTooComplexForGreedyLoops = -1; 722 static const int kNodeIsTooComplexForGreedyLoops = -1;
637 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } 723 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; }
724 // Only returns the successor for a text node of length 1 that matches any
725 // character and that has no guards on it.
726 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode(
727 RegExpCompiler* compiler) {
728 return NULL;
729 }
730
731 // Collects information on the possible code units (mod 128) that can match if
732 // we look forward. This is used for a Boyer-Moore-like string searching
733 // implementation. TODO(erikcorry): This should share more code with
734 // EatsAtLeast, GetQuickCheckDetails and ComputeFirstCharacterSet.
735 virtual void FillInBMInfo(
736 int offset, BoyerMooreLookahead* bm, bool not_at_start) {
737 UNREACHABLE();
738 }
739
638 Label* label() { return &label_; } 740 Label* label() { return &label_; }
639 // If non-generic code is generated for a node (i.e. the node is not at the 741 // If non-generic code is generated for a node (i.e. the node is not at the
640 // start of the trace) then it cannot be reused. This variable sets a limit 742 // start of the trace) then it cannot be reused. This variable sets a limit
641 // on how often we allow that to happen before we insist on starting a new 743 // on how often we allow that to happen before we insist on starting a new
642 // trace and generating generic code for a node that can be reused by flushing 744 // trace and generating generic code for a node that can be reused by flushing
643 // the deferred actions in the current trace and generating a goto. 745 // the deferred actions in the current trace and generating a goto.
644 static const int kMaxCopiesCodeGenerated = 10; 746 static const int kMaxCopiesCodeGenerated = 10;
645 747
646 NodeInfo* info() { return &info_; } 748 NodeInfo* info() { return &info_; }
647 749
(...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after
740 int to_; 842 int to_;
741 }; 843 };
742 844
743 845
744 class SeqRegExpNode: public RegExpNode { 846 class SeqRegExpNode: public RegExpNode {
745 public: 847 public:
746 explicit SeqRegExpNode(RegExpNode* on_success) 848 explicit SeqRegExpNode(RegExpNode* on_success)
747 : on_success_(on_success) { } 849 : on_success_(on_success) { }
748 RegExpNode* on_success() { return on_success_; } 850 RegExpNode* on_success() { return on_success_; }
749 void set_on_success(RegExpNode* node) { on_success_ = node; } 851 void set_on_success(RegExpNode* node) { on_success_ = node; }
852 virtual void FillInBMInfo(
853 int offset, BoyerMooreLookahead* bm, bool not_at_start) {
854 on_success_->FillInBMInfo(offset, bm, not_at_start);
855 }
750 private: 856 private:
751 RegExpNode* on_success_; 857 RegExpNode* on_success_;
752 }; 858 };
753 859
754 860
755 class ActionNode: public SeqRegExpNode { 861 class ActionNode: public SeqRegExpNode {
756 public: 862 public:
757 enum Type { 863 enum Type {
758 SET_REGISTER, 864 SET_REGISTER,
759 INCREMENT_REGISTER, 865 INCREMENT_REGISTER,
(...skipping 26 matching lines...) Expand all
786 virtual int EatsAtLeast(int still_to_find, 892 virtual int EatsAtLeast(int still_to_find,
787 int recursion_depth, 893 int recursion_depth,
788 bool not_at_start); 894 bool not_at_start);
789 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 895 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
790 RegExpCompiler* compiler, 896 RegExpCompiler* compiler,
791 int filled_in, 897 int filled_in,
792 bool not_at_start) { 898 bool not_at_start) {
793 return on_success()->GetQuickCheckDetails( 899 return on_success()->GetQuickCheckDetails(
794 details, compiler, filled_in, not_at_start); 900 details, compiler, filled_in, not_at_start);
795 } 901 }
902 virtual void FillInBMInfo(
903 int offset, BoyerMooreLookahead* bm, bool not_at_start);
796 Type type() { return type_; } 904 Type type() { return type_; }
797 // TODO(erikcorry): We should allow some action nodes in greedy loops. 905 // TODO(erikcorry): We should allow some action nodes in greedy loops.
798 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } 906 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; }
799 virtual ActionNode* Clone() { return new ActionNode(*this); } 907 virtual ActionNode* Clone() { return new ActionNode(*this); }
800 virtual int ComputeFirstCharacterSet(int budget); 908 virtual int ComputeFirstCharacterSet(int budget);
801 909
802 private: 910 private:
803 union { 911 union {
804 struct { 912 struct {
805 int reg; 913 int reg;
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after
853 virtual int EatsAtLeast(int still_to_find, 961 virtual int EatsAtLeast(int still_to_find,
854 int recursion_depth, 962 int recursion_depth,
855 bool not_at_start); 963 bool not_at_start);
856 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 964 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
857 RegExpCompiler* compiler, 965 RegExpCompiler* compiler,
858 int characters_filled_in, 966 int characters_filled_in,
859 bool not_at_start); 967 bool not_at_start);
860 ZoneList<TextElement>* elements() { return elms_; } 968 ZoneList<TextElement>* elements() { return elms_; }
861 void MakeCaseIndependent(bool is_ascii); 969 void MakeCaseIndependent(bool is_ascii);
862 virtual int GreedyLoopTextLength(); 970 virtual int GreedyLoopTextLength();
971 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode(
972 RegExpCompiler* compiler);
973 virtual void FillInBMInfo(
974 int offset, BoyerMooreLookahead* bm, bool not_at_start);
863 virtual TextNode* Clone() { 975 virtual TextNode* Clone() {
864 TextNode* result = new TextNode(*this); 976 TextNode* result = new TextNode(*this);
865 result->CalculateOffsets(); 977 result->CalculateOffsets();
866 return result; 978 return result;
867 } 979 }
868 void CalculateOffsets(); 980 void CalculateOffsets();
869 virtual int ComputeFirstCharacterSet(int budget); 981 virtual int ComputeFirstCharacterSet(int budget);
870 982
871 private: 983 private:
872 enum TextEmitPassType { 984 enum TextEmitPassType {
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
921 } 1033 }
922 virtual void Accept(NodeVisitor* visitor); 1034 virtual void Accept(NodeVisitor* visitor);
923 virtual void Emit(RegExpCompiler* compiler, Trace* trace); 1035 virtual void Emit(RegExpCompiler* compiler, Trace* trace);
924 virtual int EatsAtLeast(int still_to_find, 1036 virtual int EatsAtLeast(int still_to_find,
925 int recursion_depth, 1037 int recursion_depth,
926 bool not_at_start); 1038 bool not_at_start);
927 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 1039 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
928 RegExpCompiler* compiler, 1040 RegExpCompiler* compiler,
929 int filled_in, 1041 int filled_in,
930 bool not_at_start); 1042 bool not_at_start);
1043 virtual void FillInBMInfo(
1044 int offset, BoyerMooreLookahead* bm, bool not_at_start);
931 virtual int ComputeFirstCharacterSet(int budget); 1045 virtual int ComputeFirstCharacterSet(int budget);
932 virtual AssertionNode* Clone() { return new AssertionNode(*this); } 1046 virtual AssertionNode* Clone() { return new AssertionNode(*this); }
933 AssertionNodeType type() { return type_; } 1047 AssertionNodeType type() { return type_; }
934 void set_type(AssertionNodeType type) { type_ = type; } 1048 void set_type(AssertionNodeType type) { type_ = type; }
935 1049
936 private: 1050 private:
937 AssertionNode(AssertionNodeType t, RegExpNode* on_success) 1051 AssertionNode(AssertionNodeType t, RegExpNode* on_success)
938 : SeqRegExpNode(on_success), type_(t) { } 1052 : SeqRegExpNode(on_success), type_(t) { }
939 AssertionNodeType type_; 1053 AssertionNodeType type_;
940 }; 1054 };
(...skipping 13 matching lines...) Expand all
954 virtual void Emit(RegExpCompiler* compiler, Trace* trace); 1068 virtual void Emit(RegExpCompiler* compiler, Trace* trace);
955 virtual int EatsAtLeast(int still_to_find, 1069 virtual int EatsAtLeast(int still_to_find,
956 int recursion_depth, 1070 int recursion_depth,
957 bool not_at_start); 1071 bool not_at_start);
958 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 1072 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
959 RegExpCompiler* compiler, 1073 RegExpCompiler* compiler,
960 int characters_filled_in, 1074 int characters_filled_in,
961 bool not_at_start) { 1075 bool not_at_start) {
962 return; 1076 return;
963 } 1077 }
1078 virtual void FillInBMInfo(
1079 int offset, BoyerMooreLookahead* bm, bool not_at_start) {
1080 // Working out the set of characters that a backreference can match is too
1081 // hard, so we just say that any character can match.
1082 bm->SetRest(offset);
1083 }
964 virtual BackReferenceNode* Clone() { return new BackReferenceNode(*this); } 1084 virtual BackReferenceNode* Clone() { return new BackReferenceNode(*this); }
965 virtual int ComputeFirstCharacterSet(int budget); 1085 virtual int ComputeFirstCharacterSet(int budget);
966 1086
967 private: 1087 private:
968 int start_reg_; 1088 int start_reg_;
969 int end_reg_; 1089 int end_reg_;
970 }; 1090 };
971 1091
972 1092
973 class EndNode: public RegExpNode { 1093 class EndNode: public RegExpNode {
974 public: 1094 public:
975 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; 1095 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS };
976 explicit EndNode(Action action) : action_(action) { } 1096 explicit EndNode(Action action) : action_(action) { }
977 virtual void Accept(NodeVisitor* visitor); 1097 virtual void Accept(NodeVisitor* visitor);
978 virtual void Emit(RegExpCompiler* compiler, Trace* trace); 1098 virtual void Emit(RegExpCompiler* compiler, Trace* trace);
979 virtual int EatsAtLeast(int still_to_find, 1099 virtual int EatsAtLeast(int still_to_find,
980 int recursion_depth, 1100 int recursion_depth,
981 bool not_at_start) { return 0; } 1101 bool not_at_start) { return 0; }
982 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 1102 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
983 RegExpCompiler* compiler, 1103 RegExpCompiler* compiler,
984 int characters_filled_in, 1104 int characters_filled_in,
985 bool not_at_start) { 1105 bool not_at_start) {
986 // Returning 0 from EatsAtLeast should ensure we never get here. 1106 // Returning 0 from EatsAtLeast should ensure we never get here.
987 UNREACHABLE(); 1107 UNREACHABLE();
988 } 1108 }
1109 virtual void FillInBMInfo(
1110 int offset, BoyerMooreLookahead* bm, bool not_at_start) {
1111 // Returning 0 from EatsAtLeast should ensure we never get here.
1112 UNREACHABLE();
1113 }
989 virtual EndNode* Clone() { return new EndNode(*this); } 1114 virtual EndNode* Clone() { return new EndNode(*this); }
990 private: 1115 private:
991 Action action_; 1116 Action action_;
992 }; 1117 };
993 1118
994 1119
995 class NegativeSubmatchSuccess: public EndNode { 1120 class NegativeSubmatchSuccess: public EndNode {
996 public: 1121 public:
997 NegativeSubmatchSuccess(int stack_pointer_reg, 1122 NegativeSubmatchSuccess(int stack_pointer_reg,
998 int position_reg, 1123 int position_reg,
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after
1064 int recursion_depth, 1189 int recursion_depth,
1065 bool not_at_start); 1190 bool not_at_start);
1066 int EatsAtLeastHelper(int still_to_find, 1191 int EatsAtLeastHelper(int still_to_find,
1067 int recursion_depth, 1192 int recursion_depth,
1068 RegExpNode* ignore_this_node, 1193 RegExpNode* ignore_this_node,
1069 bool not_at_start); 1194 bool not_at_start);
1070 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 1195 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
1071 RegExpCompiler* compiler, 1196 RegExpCompiler* compiler,
1072 int characters_filled_in, 1197 int characters_filled_in,
1073 bool not_at_start); 1198 bool not_at_start);
1199 virtual void FillInBMInfo(
1200 int offset, BoyerMooreLookahead* bm, bool not_at_start);
1074 virtual ChoiceNode* Clone() { return new ChoiceNode(*this); } 1201 virtual ChoiceNode* Clone() { return new ChoiceNode(*this); }
1075 1202
1076 bool being_calculated() { return being_calculated_; } 1203 bool being_calculated() { return being_calculated_; }
1077 bool not_at_start() { return not_at_start_; } 1204 bool not_at_start() { return not_at_start_; }
1078 void set_not_at_start() { not_at_start_ = true; } 1205 void set_not_at_start() { not_at_start_ = true; }
1079 void set_being_calculated(bool b) { being_calculated_ = b; } 1206 void set_being_calculated(bool b) { being_calculated_ = b; }
1080 virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; } 1207 virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; }
1081 1208
1082 protected: 1209 protected:
1083 int GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative); 1210 int GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative);
1084 ZoneList<GuardedAlternative>* alternatives_; 1211 ZoneList<GuardedAlternative>* alternatives_;
1085 1212
1086 private: 1213 private:
1087 friend class DispatchTableConstructor; 1214 friend class DispatchTableConstructor;
1088 friend class Analysis; 1215 friend class Analysis;
1089 void GenerateGuard(RegExpMacroAssembler* macro_assembler, 1216 void GenerateGuard(RegExpMacroAssembler* macro_assembler,
1090 Guard* guard, 1217 Guard* guard,
1091 Trace* trace); 1218 Trace* trace);
1092 int CalculatePreloadCharacters(RegExpCompiler* compiler, bool not_at_start); 1219 int CalculatePreloadCharacters(RegExpCompiler* compiler, int eats_at_least);
1093 void EmitOutOfLineContinuation(RegExpCompiler* compiler, 1220 void EmitOutOfLineContinuation(RegExpCompiler* compiler,
1094 Trace* trace, 1221 Trace* trace,
1095 GuardedAlternative alternative, 1222 GuardedAlternative alternative,
1096 AlternativeGeneration* alt_gen, 1223 AlternativeGeneration* alt_gen,
1097 int preload_characters, 1224 int preload_characters,
1098 bool next_expects_preload); 1225 bool next_expects_preload);
1099 DispatchTable* table_; 1226 DispatchTable* table_;
1100 // If true, this node is never checked at the start of the input. 1227 // If true, this node is never checked at the start of the input.
1101 // Allows a new trace to start with at_start() set to false. 1228 // Allows a new trace to start with at_start() set to false.
1102 bool not_at_start_; 1229 bool not_at_start_;
1103 bool being_calculated_; 1230 bool being_calculated_;
1104 }; 1231 };
1105 1232
1106 1233
1107 class NegativeLookaheadChoiceNode: public ChoiceNode { 1234 class NegativeLookaheadChoiceNode: public ChoiceNode {
1108 public: 1235 public:
1109 explicit NegativeLookaheadChoiceNode(GuardedAlternative this_must_fail, 1236 explicit NegativeLookaheadChoiceNode(GuardedAlternative this_must_fail,
1110 GuardedAlternative then_do_this) 1237 GuardedAlternative then_do_this)
1111 : ChoiceNode(2) { 1238 : ChoiceNode(2) {
1112 AddAlternative(this_must_fail); 1239 AddAlternative(this_must_fail);
1113 AddAlternative(then_do_this); 1240 AddAlternative(then_do_this);
1114 } 1241 }
1115 virtual int EatsAtLeast(int still_to_find, 1242 virtual int EatsAtLeast(int still_to_find,
1116 int recursion_depth, 1243 int recursion_depth,
1117 bool not_at_start); 1244 bool not_at_start);
1118 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 1245 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
1119 RegExpCompiler* compiler, 1246 RegExpCompiler* compiler,
1120 int characters_filled_in, 1247 int characters_filled_in,
1121 bool not_at_start); 1248 bool not_at_start);
1249 virtual void FillInBMInfo(
1250 int offset, BoyerMooreLookahead* bm, bool not_at_start) {
1251 alternatives_->at(1).node()->FillInBMInfo(offset, bm, not_at_start);
1252 }
1122 // For a negative lookahead we don't emit the quick check for the 1253 // For a negative lookahead we don't emit the quick check for the
1123 // alternative that is expected to fail. This is because quick check code 1254 // alternative that is expected to fail. This is because quick check code
1124 // starts by loading enough characters for the alternative that takes fewest 1255 // starts by loading enough characters for the alternative that takes fewest
1125 // characters, but on a negative lookahead the negative branch did not take 1256 // characters, but on a negative lookahead the negative branch did not take
1126 // part in that calculation (EatsAtLeast) so the assumptions don't hold. 1257 // part in that calculation (EatsAtLeast) so the assumptions don't hold.
1127 virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; } 1258 virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; }
1128 virtual int ComputeFirstCharacterSet(int budget); 1259 virtual int ComputeFirstCharacterSet(int budget);
1129 }; 1260 };
1130 1261
1131 1262
1132 class LoopChoiceNode: public ChoiceNode { 1263 class LoopChoiceNode: public ChoiceNode {
1133 public: 1264 public:
1134 explicit LoopChoiceNode(bool body_can_be_zero_length) 1265 explicit LoopChoiceNode(bool body_can_be_zero_length)
1135 : ChoiceNode(2), 1266 : ChoiceNode(2),
1136 loop_node_(NULL), 1267 loop_node_(NULL),
1137 continue_node_(NULL), 1268 continue_node_(NULL),
1138 body_can_be_zero_length_(body_can_be_zero_length) { } 1269 body_can_be_zero_length_(body_can_be_zero_length) { }
1139 void AddLoopAlternative(GuardedAlternative alt); 1270 void AddLoopAlternative(GuardedAlternative alt);
1140 void AddContinueAlternative(GuardedAlternative alt); 1271 void AddContinueAlternative(GuardedAlternative alt);
1141 virtual void Emit(RegExpCompiler* compiler, Trace* trace); 1272 virtual void Emit(RegExpCompiler* compiler, Trace* trace);
1142 virtual int EatsAtLeast(int still_to_find, 1273 virtual int EatsAtLeast(int still_to_find,
1143 int recursion_depth, 1274 int recursion_depth,
1144 bool not_at_start); 1275 bool not_at_start);
1145 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 1276 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
1146 RegExpCompiler* compiler, 1277 RegExpCompiler* compiler,
1147 int characters_filled_in, 1278 int characters_filled_in,
1148 bool not_at_start); 1279 bool not_at_start);
1280 virtual void FillInBMInfo(
1281 int offset, BoyerMooreLookahead* bm, bool not_at_start);
1149 virtual int ComputeFirstCharacterSet(int budget); 1282 virtual int ComputeFirstCharacterSet(int budget);
1150 virtual LoopChoiceNode* Clone() { return new LoopChoiceNode(*this); } 1283 virtual LoopChoiceNode* Clone() { return new LoopChoiceNode(*this); }
1151 RegExpNode* loop_node() { return loop_node_; } 1284 RegExpNode* loop_node() { return loop_node_; }
1152 RegExpNode* continue_node() { return continue_node_; } 1285 RegExpNode* continue_node() { return continue_node_; }
1153 bool body_can_be_zero_length() { return body_can_be_zero_length_; } 1286 bool body_can_be_zero_length() { return body_can_be_zero_length_; }
1154 virtual void Accept(NodeVisitor* visitor); 1287 virtual void Accept(NodeVisitor* visitor);
1155 1288
1156 private: 1289 private:
1157 // AddAlternative is made private for loop nodes because alternatives 1290 // AddAlternative is made private for loop nodes because alternatives
1158 // should not be added freely, we need to keep track of which node 1291 // should not be added freely, we need to keep track of which node
(...skipping 292 matching lines...) Expand 10 before | Expand all | Expand 10 after
1451 num_registers(registers) {} 1584 num_registers(registers) {}
1452 const char* error_message; 1585 const char* error_message;
1453 Object* code; 1586 Object* code;
1454 int num_registers; 1587 int num_registers;
1455 }; 1588 };
1456 1589
1457 static CompilationResult Compile(RegExpCompileData* input, 1590 static CompilationResult Compile(RegExpCompileData* input,
1458 bool ignore_case, 1591 bool ignore_case,
1459 bool multiline, 1592 bool multiline,
1460 Handle<String> pattern, 1593 Handle<String> pattern,
1594 Handle<String> sample_subject,
1461 bool is_ascii); 1595 bool is_ascii);
1462 1596
1463 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); 1597 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case);
1464 }; 1598 };
1465 1599
1466 1600
1467 class OffsetsVector { 1601 class OffsetsVector {
1468 public: 1602 public:
1469 inline OffsetsVector(int num_registers, Isolate* isolate) 1603 inline OffsetsVector(int num_registers, Isolate* isolate)
1470 : offsets_vector_length_(num_registers) { 1604 : offsets_vector_length_(num_registers) {
(...skipping 22 matching lines...) Expand all
1493 int* vector_; 1627 int* vector_;
1494 int offsets_vector_length_; 1628 int offsets_vector_length_;
1495 1629
1496 friend class ExternalReference; 1630 friend class ExternalReference;
1497 }; 1631 };
1498 1632
1499 1633
1500 } } // namespace v8::internal 1634 } } // namespace v8::internal
1501 1635
1502 #endif // V8_JSREGEXP_H_ 1636 #endif // V8_JSREGEXP_H_
OLDNEW
« no previous file with comments | « src/ia32/regexp-macro-assembler-ia32.cc ('k') | src/jsregexp.cc » ('j') | src/jsregexp.cc » ('J')

Powered by Google App Engine
This is Rietveld 408576698