OLD | NEW |
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef V8_JSREGEXP_H_ | 5 #ifndef V8_JSREGEXP_H_ |
6 #define V8_JSREGEXP_H_ | 6 #define V8_JSREGEXP_H_ |
7 | 7 |
8 #include "src/allocation.h" | 8 #include "src/allocation.h" |
9 #include "src/assembler.h" | 9 #include "src/assembler.h" |
10 #include "src/zone-inl.h" | 10 #include "src/zone-inl.h" |
(...skipping 186 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
197 | 197 |
198 static int GetLastCaptureCount(FixedArray* array) { | 198 static int GetLastCaptureCount(FixedArray* array) { |
199 return Smi::cast(array->get(kLastCaptureCount))->value(); | 199 return Smi::cast(array->get(kLastCaptureCount))->value(); |
200 } | 200 } |
201 | 201 |
202 // For acting on the JSRegExp data FixedArray. | 202 // For acting on the JSRegExp data FixedArray. |
203 static int IrregexpMaxRegisterCount(FixedArray* re); | 203 static int IrregexpMaxRegisterCount(FixedArray* re); |
204 static void SetIrregexpMaxRegisterCount(FixedArray* re, int value); | 204 static void SetIrregexpMaxRegisterCount(FixedArray* re, int value); |
205 static int IrregexpNumberOfCaptures(FixedArray* re); | 205 static int IrregexpNumberOfCaptures(FixedArray* re); |
206 static int IrregexpNumberOfRegisters(FixedArray* re); | 206 static int IrregexpNumberOfRegisters(FixedArray* re); |
207 static ByteArray* IrregexpByteCode(FixedArray* re, bool is_ascii); | 207 static ByteArray* IrregexpByteCode(FixedArray* re, bool is_one_byte); |
208 static Code* IrregexpNativeCode(FixedArray* re, bool is_ascii); | 208 static Code* IrregexpNativeCode(FixedArray* re, bool is_one_byte); |
209 | 209 |
210 // Limit the space regexps take up on the heap. In order to limit this we | 210 // Limit the space regexps take up on the heap. In order to limit this we |
211 // would like to keep track of the amount of regexp code on the heap. This | 211 // would like to keep track of the amount of regexp code on the heap. This |
212 // is not tracked, however. As a conservative approximation we track the | 212 // is not tracked, however. As a conservative approximation we track the |
213 // total regexp code compiled including code that has subsequently been freed | 213 // total regexp code compiled including code that has subsequently been freed |
214 // and the total executable memory at any point. | 214 // and the total executable memory at any point. |
215 static const int kRegExpExecutableMemoryLimit = 16 * MB; | 215 static const int kRegExpExecutableMemoryLimit = 16 * MB; |
216 static const int kRegWxpCompiledLimit = 1 * MB; | 216 static const int kRegWxpCompiledLimit = 1 * MB; |
217 | 217 |
218 private: | 218 private: |
219 static bool CompileIrregexp( | 219 static bool CompileIrregexp(Handle<JSRegExp> re, |
220 Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii); | 220 Handle<String> sample_subject, bool is_one_byte); |
221 static inline bool EnsureCompiledIrregexp( | 221 static inline bool EnsureCompiledIrregexp(Handle<JSRegExp> re, |
222 Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii); | 222 Handle<String> sample_subject, |
| 223 bool is_one_byte); |
223 }; | 224 }; |
224 | 225 |
225 | 226 |
226 // Represents the location of one element relative to the intersection of | 227 // Represents the location of one element relative to the intersection of |
227 // two sets. Corresponds to the four areas of a Venn diagram. | 228 // two sets. Corresponds to the four areas of a Venn diagram. |
228 enum ElementInSetsRelation { | 229 enum ElementInSetsRelation { |
229 kInsideNone = 0, | 230 kInsideNone = 0, |
230 kInsideFirst = 1, | 231 kInsideFirst = 1, |
231 kInsideSecond = 2, | 232 kInsideSecond = 2, |
232 kInsideBoth = 3 | 233 kInsideBoth = 3 |
(...skipping 22 matching lines...) Expand all Loading... |
255 return CharacterRange(0, 0xFFFF); | 256 return CharacterRange(0, 0xFFFF); |
256 } | 257 } |
257 bool Contains(uc16 i) { return from_ <= i && i <= to_; } | 258 bool Contains(uc16 i) { return from_ <= i && i <= to_; } |
258 uc16 from() const { return from_; } | 259 uc16 from() const { return from_; } |
259 void set_from(uc16 value) { from_ = value; } | 260 void set_from(uc16 value) { from_ = value; } |
260 uc16 to() const { return to_; } | 261 uc16 to() const { return to_; } |
261 void set_to(uc16 value) { to_ = value; } | 262 void set_to(uc16 value) { to_ = value; } |
262 bool is_valid() { return from_ <= to_; } | 263 bool is_valid() { return from_ <= to_; } |
263 bool IsEverything(uc16 max) { return from_ == 0 && to_ >= max; } | 264 bool IsEverything(uc16 max) { return from_ == 0 && to_ >= max; } |
264 bool IsSingleton() { return (from_ == to_); } | 265 bool IsSingleton() { return (from_ == to_); } |
265 void AddCaseEquivalents(ZoneList<CharacterRange>* ranges, bool is_ascii, | 266 void AddCaseEquivalents(ZoneList<CharacterRange>* ranges, bool is_one_byte, |
266 Zone* zone); | 267 Zone* zone); |
267 static void Split(ZoneList<CharacterRange>* base, | 268 static void Split(ZoneList<CharacterRange>* base, |
268 Vector<const int> overlay, | 269 Vector<const int> overlay, |
269 ZoneList<CharacterRange>** included, | 270 ZoneList<CharacterRange>** included, |
270 ZoneList<CharacterRange>** excluded, | 271 ZoneList<CharacterRange>** excluded, |
271 Zone* zone); | 272 Zone* zone); |
272 // Whether a range list is in canonical form: Ranges ordered by from value, | 273 // Whether a range list is in canonical form: Ranges ordered by from value, |
273 // and ranges non-overlapping and non-adjacent. | 274 // and ranges non-overlapping and non-adjacent. |
274 static bool IsCanonical(ZoneList<CharacterRange>* ranges); | 275 static bool IsCanonical(ZoneList<CharacterRange>* ranges); |
275 // Convert range list to canonical form. The characters covered by the ranges | 276 // Convert range list to canonical form. The characters covered by the ranges |
(...skipping 243 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
519 QuickCheckDetails() | 520 QuickCheckDetails() |
520 : characters_(0), | 521 : characters_(0), |
521 mask_(0), | 522 mask_(0), |
522 value_(0), | 523 value_(0), |
523 cannot_match_(false) { } | 524 cannot_match_(false) { } |
524 explicit QuickCheckDetails(int characters) | 525 explicit QuickCheckDetails(int characters) |
525 : characters_(characters), | 526 : characters_(characters), |
526 mask_(0), | 527 mask_(0), |
527 value_(0), | 528 value_(0), |
528 cannot_match_(false) { } | 529 cannot_match_(false) { } |
529 bool Rationalize(bool ascii); | 530 bool Rationalize(bool one_byte); |
530 // Merge in the information from another branch of an alternation. | 531 // Merge in the information from another branch of an alternation. |
531 void Merge(QuickCheckDetails* other, int from_index); | 532 void Merge(QuickCheckDetails* other, int from_index); |
532 // Advance the current position by some amount. | 533 // Advance the current position by some amount. |
533 void Advance(int by, bool ascii); | 534 void Advance(int by, bool one_byte); |
534 void Clear(); | 535 void Clear(); |
535 bool cannot_match() { return cannot_match_; } | 536 bool cannot_match() { return cannot_match_; } |
536 void set_cannot_match() { cannot_match_ = true; } | 537 void set_cannot_match() { cannot_match_ = true; } |
537 struct Position { | 538 struct Position { |
538 Position() : mask(0), value(0), determines_perfectly(false) { } | 539 Position() : mask(0), value(0), determines_perfectly(false) { } |
539 uc16 mask; | 540 uc16 mask; |
540 uc16 value; | 541 uc16 value; |
541 bool determines_perfectly; | 542 bool determines_perfectly; |
542 }; | 543 }; |
543 int characters() { return characters_; } | 544 int characters() { return characters_; } |
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
617 // EatsAtLeast, GetQuickCheckDetails. The budget argument is used to limit | 618 // EatsAtLeast, GetQuickCheckDetails. The budget argument is used to limit |
618 // the number of nodes we are willing to look at in order to create this data. | 619 // the number of nodes we are willing to look at in order to create this data. |
619 static const int kRecursionBudget = 200; | 620 static const int kRecursionBudget = 200; |
620 virtual void FillInBMInfo(int offset, | 621 virtual void FillInBMInfo(int offset, |
621 int budget, | 622 int budget, |
622 BoyerMooreLookahead* bm, | 623 BoyerMooreLookahead* bm, |
623 bool not_at_start) { | 624 bool not_at_start) { |
624 UNREACHABLE(); | 625 UNREACHABLE(); |
625 } | 626 } |
626 | 627 |
627 // If we know that the input is ASCII then there are some nodes that can | 628 // If we know that the input is one-byte then there are some nodes that can |
628 // never match. This method returns a node that can be substituted for | 629 // never match. This method returns a node that can be substituted for |
629 // itself, or NULL if the node can never match. | 630 // itself, or NULL if the node can never match. |
630 virtual RegExpNode* FilterASCII(int depth, bool ignore_case) { return this; } | 631 virtual RegExpNode* FilterOneByte(int depth, bool ignore_case) { |
631 // Helper for FilterASCII. | 632 return this; |
| 633 } |
| 634 // Helper for FilterOneByte. |
632 RegExpNode* replacement() { | 635 RegExpNode* replacement() { |
633 DCHECK(info()->replacement_calculated); | 636 DCHECK(info()->replacement_calculated); |
634 return replacement_; | 637 return replacement_; |
635 } | 638 } |
636 RegExpNode* set_replacement(RegExpNode* replacement) { | 639 RegExpNode* set_replacement(RegExpNode* replacement) { |
637 info()->replacement_calculated = true; | 640 info()->replacement_calculated = true; |
638 replacement_ = replacement; | 641 replacement_ = replacement; |
639 return replacement; // For convenience. | 642 return replacement; // For convenience. |
640 } | 643 } |
641 | 644 |
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
715 int to_; | 718 int to_; |
716 }; | 719 }; |
717 | 720 |
718 | 721 |
719 class SeqRegExpNode: public RegExpNode { | 722 class SeqRegExpNode: public RegExpNode { |
720 public: | 723 public: |
721 explicit SeqRegExpNode(RegExpNode* on_success) | 724 explicit SeqRegExpNode(RegExpNode* on_success) |
722 : RegExpNode(on_success->zone()), on_success_(on_success) { } | 725 : RegExpNode(on_success->zone()), on_success_(on_success) { } |
723 RegExpNode* on_success() { return on_success_; } | 726 RegExpNode* on_success() { return on_success_; } |
724 void set_on_success(RegExpNode* node) { on_success_ = node; } | 727 void set_on_success(RegExpNode* node) { on_success_ = node; } |
725 virtual RegExpNode* FilterASCII(int depth, bool ignore_case); | 728 virtual RegExpNode* FilterOneByte(int depth, bool ignore_case); |
726 virtual void FillInBMInfo(int offset, | 729 virtual void FillInBMInfo(int offset, |
727 int budget, | 730 int budget, |
728 BoyerMooreLookahead* bm, | 731 BoyerMooreLookahead* bm, |
729 bool not_at_start) { | 732 bool not_at_start) { |
730 on_success_->FillInBMInfo(offset, budget - 1, bm, not_at_start); | 733 on_success_->FillInBMInfo(offset, budget - 1, bm, not_at_start); |
731 if (offset == 0) set_bm_info(not_at_start, bm); | 734 if (offset == 0) set_bm_info(not_at_start, bm); |
732 } | 735 } |
733 | 736 |
734 protected: | 737 protected: |
735 RegExpNode* FilterSuccessor(int depth, bool ignore_case); | 738 RegExpNode* FilterSuccessor(int depth, bool ignore_case); |
(...skipping 100 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
836 elms_->Add(TextElement::CharClass(that), zone()); | 839 elms_->Add(TextElement::CharClass(that), zone()); |
837 } | 840 } |
838 virtual void Accept(NodeVisitor* visitor); | 841 virtual void Accept(NodeVisitor* visitor); |
839 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 842 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
840 virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start); | 843 virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start); |
841 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 844 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
842 RegExpCompiler* compiler, | 845 RegExpCompiler* compiler, |
843 int characters_filled_in, | 846 int characters_filled_in, |
844 bool not_at_start); | 847 bool not_at_start); |
845 ZoneList<TextElement>* elements() { return elms_; } | 848 ZoneList<TextElement>* elements() { return elms_; } |
846 void MakeCaseIndependent(bool is_ascii); | 849 void MakeCaseIndependent(bool is_one_byte); |
847 virtual int GreedyLoopTextLength(); | 850 virtual int GreedyLoopTextLength(); |
848 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( | 851 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( |
849 RegExpCompiler* compiler); | 852 RegExpCompiler* compiler); |
850 virtual void FillInBMInfo(int offset, | 853 virtual void FillInBMInfo(int offset, |
851 int budget, | 854 int budget, |
852 BoyerMooreLookahead* bm, | 855 BoyerMooreLookahead* bm, |
853 bool not_at_start); | 856 bool not_at_start); |
854 void CalculateOffsets(); | 857 void CalculateOffsets(); |
855 virtual RegExpNode* FilterASCII(int depth, bool ignore_case); | 858 virtual RegExpNode* FilterOneByte(int depth, bool ignore_case); |
856 | 859 |
857 private: | 860 private: |
858 enum TextEmitPassType { | 861 enum TextEmitPassType { |
859 NON_ASCII_MATCH, // Check for characters that can't match. | 862 NON_LATIN1_MATCH, // Check for characters that can't match. |
860 SIMPLE_CHARACTER_MATCH, // Case-dependent single character check. | 863 SIMPLE_CHARACTER_MATCH, // Case-dependent single character check. |
861 NON_LETTER_CHARACTER_MATCH, // Check characters that have no case equivs. | 864 NON_LETTER_CHARACTER_MATCH, // Check characters that have no case equivs. |
862 CASE_CHARACTER_MATCH, // Case-independent single character check. | 865 CASE_CHARACTER_MATCH, // Case-independent single character check. |
863 CHARACTER_CLASS_MATCH // Character class. | 866 CHARACTER_CLASS_MATCH // Character class. |
864 }; | 867 }; |
865 static bool SkipPass(int pass, bool ignore_case); | 868 static bool SkipPass(int pass, bool ignore_case); |
866 static const int kFirstRealPass = SIMPLE_CHARACTER_MATCH; | 869 static const int kFirstRealPass = SIMPLE_CHARACTER_MATCH; |
867 static const int kLastPass = CHARACTER_CLASS_MATCH; | 870 static const int kLastPass = CHARACTER_CLASS_MATCH; |
868 void TextEmitPass(RegExpCompiler* compiler, | 871 void TextEmitPass(RegExpCompiler* compiler, |
869 TextEmitPassType pass, | 872 TextEmitPassType pass, |
(...skipping 204 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1074 BoyerMooreLookahead* bm, | 1077 BoyerMooreLookahead* bm, |
1075 bool not_at_start); | 1078 bool not_at_start); |
1076 | 1079 |
1077 bool being_calculated() { return being_calculated_; } | 1080 bool being_calculated() { return being_calculated_; } |
1078 bool not_at_start() { return not_at_start_; } | 1081 bool not_at_start() { return not_at_start_; } |
1079 void set_not_at_start() { not_at_start_ = true; } | 1082 void set_not_at_start() { not_at_start_ = true; } |
1080 void set_being_calculated(bool b) { being_calculated_ = b; } | 1083 void set_being_calculated(bool b) { being_calculated_ = b; } |
1081 virtual bool try_to_emit_quick_check_for_alternative(bool is_first) { | 1084 virtual bool try_to_emit_quick_check_for_alternative(bool is_first) { |
1082 return true; | 1085 return true; |
1083 } | 1086 } |
1084 virtual RegExpNode* FilterASCII(int depth, bool ignore_case); | 1087 virtual RegExpNode* FilterOneByte(int depth, bool ignore_case); |
1085 | 1088 |
1086 protected: | 1089 protected: |
1087 int GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative); | 1090 int GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative); |
1088 ZoneList<GuardedAlternative>* alternatives_; | 1091 ZoneList<GuardedAlternative>* alternatives_; |
1089 | 1092 |
1090 private: | 1093 private: |
1091 friend class DispatchTableConstructor; | 1094 friend class DispatchTableConstructor; |
1092 friend class Analysis; | 1095 friend class Analysis; |
1093 void GenerateGuard(RegExpMacroAssembler* macro_assembler, | 1096 void GenerateGuard(RegExpMacroAssembler* macro_assembler, |
1094 Guard* guard, | 1097 Guard* guard, |
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1148 if (offset == 0) set_bm_info(not_at_start, bm); | 1151 if (offset == 0) set_bm_info(not_at_start, bm); |
1149 } | 1152 } |
1150 // For a negative lookahead we don't emit the quick check for the | 1153 // For a negative lookahead we don't emit the quick check for the |
1151 // alternative that is expected to fail. This is because quick check code | 1154 // alternative that is expected to fail. This is because quick check code |
1152 // starts by loading enough characters for the alternative that takes fewest | 1155 // starts by loading enough characters for the alternative that takes fewest |
1153 // characters, but on a negative lookahead the negative branch did not take | 1156 // characters, but on a negative lookahead the negative branch did not take |
1154 // part in that calculation (EatsAtLeast) so the assumptions don't hold. | 1157 // part in that calculation (EatsAtLeast) so the assumptions don't hold. |
1155 virtual bool try_to_emit_quick_check_for_alternative(bool is_first) { | 1158 virtual bool try_to_emit_quick_check_for_alternative(bool is_first) { |
1156 return !is_first; | 1159 return !is_first; |
1157 } | 1160 } |
1158 virtual RegExpNode* FilterASCII(int depth, bool ignore_case); | 1161 virtual RegExpNode* FilterOneByte(int depth, bool ignore_case); |
1159 }; | 1162 }; |
1160 | 1163 |
1161 | 1164 |
1162 class LoopChoiceNode: public ChoiceNode { | 1165 class LoopChoiceNode: public ChoiceNode { |
1163 public: | 1166 public: |
1164 explicit LoopChoiceNode(bool body_can_be_zero_length, Zone* zone) | 1167 explicit LoopChoiceNode(bool body_can_be_zero_length, Zone* zone) |
1165 : ChoiceNode(2, zone), | 1168 : ChoiceNode(2, zone), |
1166 loop_node_(NULL), | 1169 loop_node_(NULL), |
1167 continue_node_(NULL), | 1170 continue_node_(NULL), |
1168 body_can_be_zero_length_(body_can_be_zero_length) | 1171 body_can_be_zero_length_(body_can_be_zero_length) |
1169 { } | 1172 { } |
1170 void AddLoopAlternative(GuardedAlternative alt); | 1173 void AddLoopAlternative(GuardedAlternative alt); |
1171 void AddContinueAlternative(GuardedAlternative alt); | 1174 void AddContinueAlternative(GuardedAlternative alt); |
1172 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 1175 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
1173 virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start); | 1176 virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start); |
1174 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 1177 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
1175 RegExpCompiler* compiler, | 1178 RegExpCompiler* compiler, |
1176 int characters_filled_in, | 1179 int characters_filled_in, |
1177 bool not_at_start); | 1180 bool not_at_start); |
1178 virtual void FillInBMInfo(int offset, | 1181 virtual void FillInBMInfo(int offset, |
1179 int budget, | 1182 int budget, |
1180 BoyerMooreLookahead* bm, | 1183 BoyerMooreLookahead* bm, |
1181 bool not_at_start); | 1184 bool not_at_start); |
1182 RegExpNode* loop_node() { return loop_node_; } | 1185 RegExpNode* loop_node() { return loop_node_; } |
1183 RegExpNode* continue_node() { return continue_node_; } | 1186 RegExpNode* continue_node() { return continue_node_; } |
1184 bool body_can_be_zero_length() { return body_can_be_zero_length_; } | 1187 bool body_can_be_zero_length() { return body_can_be_zero_length_; } |
1185 virtual void Accept(NodeVisitor* visitor); | 1188 virtual void Accept(NodeVisitor* visitor); |
1186 virtual RegExpNode* FilterASCII(int depth, bool ignore_case); | 1189 virtual RegExpNode* FilterOneByte(int depth, bool ignore_case); |
1187 | 1190 |
1188 private: | 1191 private: |
1189 // AddAlternative is made private for loop nodes because alternatives | 1192 // AddAlternative is made private for loop nodes because alternatives |
1190 // should not be added freely, we need to keep track of which node | 1193 // should not be added freely, we need to keep track of which node |
1191 // goes back to the node itself. | 1194 // goes back to the node itself. |
1192 void AddAlternative(GuardedAlternative node) { | 1195 void AddAlternative(GuardedAlternative node) { |
1193 ChoiceNode::AddAlternative(node); | 1196 ChoiceNode::AddAlternative(node); |
1194 } | 1197 } |
1195 | 1198 |
1196 RegExpNode* loop_node_; | 1199 RegExpNode* loop_node_; |
(...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1319 } | 1322 } |
1320 void EmitSkipInstructions(RegExpMacroAssembler* masm); | 1323 void EmitSkipInstructions(RegExpMacroAssembler* masm); |
1321 | 1324 |
1322 private: | 1325 private: |
1323 // This is the value obtained by EatsAtLeast. If we do not have at least this | 1326 // This is the value obtained by EatsAtLeast. If we do not have at least this |
1324 // many characters left in the sample string then the match is bound to fail. | 1327 // many characters left in the sample string then the match is bound to fail. |
1325 // Therefore it is OK to read a character this far ahead of the current match | 1328 // Therefore it is OK to read a character this far ahead of the current match |
1326 // point. | 1329 // point. |
1327 int length_; | 1330 int length_; |
1328 RegExpCompiler* compiler_; | 1331 RegExpCompiler* compiler_; |
1329 // 0x7f for ASCII, 0xffff for UTF-16. | 1332 // 0xff for Latin1, 0xffff for UTF-16. |
1330 int max_char_; | 1333 int max_char_; |
1331 ZoneList<BoyerMoorePositionInfo*>* bitmaps_; | 1334 ZoneList<BoyerMoorePositionInfo*>* bitmaps_; |
1332 | 1335 |
1333 int GetSkipTable(int min_lookahead, | 1336 int GetSkipTable(int min_lookahead, |
1334 int max_lookahead, | 1337 int max_lookahead, |
1335 Handle<ByteArray> boolean_skip_table); | 1338 Handle<ByteArray> boolean_skip_table); |
1336 bool FindWorthwhileInterval(int* from, int* to); | 1339 bool FindWorthwhileInterval(int* from, int* to); |
1337 int FindBestInterval( | 1340 int FindBestInterval( |
1338 int max_number_of_chars, int old_biggest_points, int* from, int* to); | 1341 int max_number_of_chars, int old_biggest_points, int* from, int* to); |
1339 }; | 1342 }; |
(...skipping 248 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1588 // has to check if it succeeds a word or non-word. In this case the | 1591 // has to check if it succeeds a word or non-word. In this case the |
1589 // result will be something like: | 1592 // result will be something like: |
1590 // | 1593 // |
1591 // +-------+ +------------+ | 1594 // +-------+ +------------+ |
1592 // | . | | . | | 1595 // | . | | . | |
1593 // +-------+ ---> +------------+ | 1596 // +-------+ ---> +------------+ |
1594 // | word? | | check word | | 1597 // | word? | | check word | |
1595 // +-------+ +------------+ | 1598 // +-------+ +------------+ |
1596 class Analysis: public NodeVisitor { | 1599 class Analysis: public NodeVisitor { |
1597 public: | 1600 public: |
1598 Analysis(bool ignore_case, bool is_ascii) | 1601 Analysis(bool ignore_case, bool is_one_byte) |
1599 : ignore_case_(ignore_case), | 1602 : ignore_case_(ignore_case), |
1600 is_ascii_(is_ascii), | 1603 is_one_byte_(is_one_byte), |
1601 error_message_(NULL) { } | 1604 error_message_(NULL) {} |
1602 void EnsureAnalyzed(RegExpNode* node); | 1605 void EnsureAnalyzed(RegExpNode* node); |
1603 | 1606 |
1604 #define DECLARE_VISIT(Type) \ | 1607 #define DECLARE_VISIT(Type) \ |
1605 virtual void Visit##Type(Type##Node* that); | 1608 virtual void Visit##Type(Type##Node* that); |
1606 FOR_EACH_NODE_TYPE(DECLARE_VISIT) | 1609 FOR_EACH_NODE_TYPE(DECLARE_VISIT) |
1607 #undef DECLARE_VISIT | 1610 #undef DECLARE_VISIT |
1608 virtual void VisitLoopChoice(LoopChoiceNode* that); | 1611 virtual void VisitLoopChoice(LoopChoiceNode* that); |
1609 | 1612 |
1610 bool has_failed() { return error_message_ != NULL; } | 1613 bool has_failed() { return error_message_ != NULL; } |
1611 const char* error_message() { | 1614 const char* error_message() { |
1612 DCHECK(error_message_ != NULL); | 1615 DCHECK(error_message_ != NULL); |
1613 return error_message_; | 1616 return error_message_; |
1614 } | 1617 } |
1615 void fail(const char* error_message) { | 1618 void fail(const char* error_message) { |
1616 error_message_ = error_message; | 1619 error_message_ = error_message; |
1617 } | 1620 } |
1618 | 1621 |
1619 private: | 1622 private: |
1620 bool ignore_case_; | 1623 bool ignore_case_; |
1621 bool is_ascii_; | 1624 bool is_one_byte_; |
1622 const char* error_message_; | 1625 const char* error_message_; |
1623 | 1626 |
1624 DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis); | 1627 DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis); |
1625 }; | 1628 }; |
1626 | 1629 |
1627 | 1630 |
1628 struct RegExpCompileData { | 1631 struct RegExpCompileData { |
1629 RegExpCompileData() | 1632 RegExpCompileData() |
1630 : tree(NULL), | 1633 : tree(NULL), |
1631 node(NULL), | 1634 node(NULL), |
(...skipping 18 matching lines...) Expand all Loading... |
1650 num_registers(0) {} | 1653 num_registers(0) {} |
1651 CompilationResult(Object* code, int registers) | 1654 CompilationResult(Object* code, int registers) |
1652 : error_message(NULL), | 1655 : error_message(NULL), |
1653 code(code), | 1656 code(code), |
1654 num_registers(registers) {} | 1657 num_registers(registers) {} |
1655 const char* error_message; | 1658 const char* error_message; |
1656 Object* code; | 1659 Object* code; |
1657 int num_registers; | 1660 int num_registers; |
1658 }; | 1661 }; |
1659 | 1662 |
1660 static CompilationResult Compile(RegExpCompileData* input, | 1663 static CompilationResult Compile(RegExpCompileData* input, bool ignore_case, |
1661 bool ignore_case, | 1664 bool global, bool multiline, |
1662 bool global, | |
1663 bool multiline, | |
1664 Handle<String> pattern, | 1665 Handle<String> pattern, |
1665 Handle<String> sample_subject, | 1666 Handle<String> sample_subject, |
1666 bool is_ascii, Zone* zone); | 1667 bool is_one_byte, Zone* zone); |
1667 | 1668 |
1668 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); | 1669 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); |
1669 }; | 1670 }; |
1670 | 1671 |
1671 | 1672 |
1672 } } // namespace v8::internal | 1673 } } // namespace v8::internal |
1673 | 1674 |
1674 #endif // V8_JSREGEXP_H_ | 1675 #endif // V8_JSREGEXP_H_ |
OLD | NEW |