| OLD | NEW |
| 1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef V8_JSREGEXP_H_ | 5 #ifndef V8_JSREGEXP_H_ |
| 6 #define V8_JSREGEXP_H_ | 6 #define V8_JSREGEXP_H_ |
| 7 | 7 |
| 8 #include "src/allocation.h" | 8 #include "src/allocation.h" |
| 9 #include "src/assembler.h" | 9 #include "src/assembler.h" |
| 10 #include "src/zone-inl.h" | 10 #include "src/zone-inl.h" |
| (...skipping 186 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 197 | 197 |
| 198 static int GetLastCaptureCount(FixedArray* array) { | 198 static int GetLastCaptureCount(FixedArray* array) { |
| 199 return Smi::cast(array->get(kLastCaptureCount))->value(); | 199 return Smi::cast(array->get(kLastCaptureCount))->value(); |
| 200 } | 200 } |
| 201 | 201 |
| 202 // For acting on the JSRegExp data FixedArray. | 202 // For acting on the JSRegExp data FixedArray. |
| 203 static int IrregexpMaxRegisterCount(FixedArray* re); | 203 static int IrregexpMaxRegisterCount(FixedArray* re); |
| 204 static void SetIrregexpMaxRegisterCount(FixedArray* re, int value); | 204 static void SetIrregexpMaxRegisterCount(FixedArray* re, int value); |
| 205 static int IrregexpNumberOfCaptures(FixedArray* re); | 205 static int IrregexpNumberOfCaptures(FixedArray* re); |
| 206 static int IrregexpNumberOfRegisters(FixedArray* re); | 206 static int IrregexpNumberOfRegisters(FixedArray* re); |
| 207 static ByteArray* IrregexpByteCode(FixedArray* re, bool is_ascii); | 207 static ByteArray* IrregexpByteCode(FixedArray* re, bool is_one_byte); |
| 208 static Code* IrregexpNativeCode(FixedArray* re, bool is_ascii); | 208 static Code* IrregexpNativeCode(FixedArray* re, bool is_one_byte); |
| 209 | 209 |
| 210 // Limit the space regexps take up on the heap. In order to limit this we | 210 // Limit the space regexps take up on the heap. In order to limit this we |
| 211 // would like to keep track of the amount of regexp code on the heap. This | 211 // would like to keep track of the amount of regexp code on the heap. This |
| 212 // is not tracked, however. As a conservative approximation we track the | 212 // is not tracked, however. As a conservative approximation we track the |
| 213 // total regexp code compiled including code that has subsequently been freed | 213 // total regexp code compiled including code that has subsequently been freed |
| 214 // and the total executable memory at any point. | 214 // and the total executable memory at any point. |
| 215 static const int kRegExpExecutableMemoryLimit = 16 * MB; | 215 static const int kRegExpExecutableMemoryLimit = 16 * MB; |
| 216 static const int kRegWxpCompiledLimit = 1 * MB; | 216 static const int kRegWxpCompiledLimit = 1 * MB; |
| 217 | 217 |
| 218 private: | 218 private: |
| 219 static bool CompileIrregexp( | 219 static bool CompileIrregexp(Handle<JSRegExp> re, |
| 220 Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii); | 220 Handle<String> sample_subject, bool is_one_byte); |
| 221 static inline bool EnsureCompiledIrregexp( | 221 static inline bool EnsureCompiledIrregexp(Handle<JSRegExp> re, |
| 222 Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii); | 222 Handle<String> sample_subject, |
| 223 bool is_one_byte); |
| 223 }; | 224 }; |
| 224 | 225 |
| 225 | 226 |
| 226 // Represents the location of one element relative to the intersection of | 227 // Represents the location of one element relative to the intersection of |
| 227 // two sets. Corresponds to the four areas of a Venn diagram. | 228 // two sets. Corresponds to the four areas of a Venn diagram. |
| 228 enum ElementInSetsRelation { | 229 enum ElementInSetsRelation { |
| 229 kInsideNone = 0, | 230 kInsideNone = 0, |
| 230 kInsideFirst = 1, | 231 kInsideFirst = 1, |
| 231 kInsideSecond = 2, | 232 kInsideSecond = 2, |
| 232 kInsideBoth = 3 | 233 kInsideBoth = 3 |
| (...skipping 22 matching lines...) Expand all Loading... |
| 255 return CharacterRange(0, 0xFFFF); | 256 return CharacterRange(0, 0xFFFF); |
| 256 } | 257 } |
| 257 bool Contains(uc16 i) { return from_ <= i && i <= to_; } | 258 bool Contains(uc16 i) { return from_ <= i && i <= to_; } |
| 258 uc16 from() const { return from_; } | 259 uc16 from() const { return from_; } |
| 259 void set_from(uc16 value) { from_ = value; } | 260 void set_from(uc16 value) { from_ = value; } |
| 260 uc16 to() const { return to_; } | 261 uc16 to() const { return to_; } |
| 261 void set_to(uc16 value) { to_ = value; } | 262 void set_to(uc16 value) { to_ = value; } |
| 262 bool is_valid() { return from_ <= to_; } | 263 bool is_valid() { return from_ <= to_; } |
| 263 bool IsEverything(uc16 max) { return from_ == 0 && to_ >= max; } | 264 bool IsEverything(uc16 max) { return from_ == 0 && to_ >= max; } |
| 264 bool IsSingleton() { return (from_ == to_); } | 265 bool IsSingleton() { return (from_ == to_); } |
| 265 void AddCaseEquivalents(ZoneList<CharacterRange>* ranges, bool is_ascii, | 266 void AddCaseEquivalents(ZoneList<CharacterRange>* ranges, bool is_one_byte, |
| 266 Zone* zone); | 267 Zone* zone); |
| 267 static void Split(ZoneList<CharacterRange>* base, | 268 static void Split(ZoneList<CharacterRange>* base, |
| 268 Vector<const int> overlay, | 269 Vector<const int> overlay, |
| 269 ZoneList<CharacterRange>** included, | 270 ZoneList<CharacterRange>** included, |
| 270 ZoneList<CharacterRange>** excluded, | 271 ZoneList<CharacterRange>** excluded, |
| 271 Zone* zone); | 272 Zone* zone); |
| 272 // Whether a range list is in canonical form: Ranges ordered by from value, | 273 // Whether a range list is in canonical form: Ranges ordered by from value, |
| 273 // and ranges non-overlapping and non-adjacent. | 274 // and ranges non-overlapping and non-adjacent. |
| 274 static bool IsCanonical(ZoneList<CharacterRange>* ranges); | 275 static bool IsCanonical(ZoneList<CharacterRange>* ranges); |
| 275 // Convert range list to canonical form. The characters covered by the ranges | 276 // Convert range list to canonical form. The characters covered by the ranges |
| (...skipping 243 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 519 QuickCheckDetails() | 520 QuickCheckDetails() |
| 520 : characters_(0), | 521 : characters_(0), |
| 521 mask_(0), | 522 mask_(0), |
| 522 value_(0), | 523 value_(0), |
| 523 cannot_match_(false) { } | 524 cannot_match_(false) { } |
| 524 explicit QuickCheckDetails(int characters) | 525 explicit QuickCheckDetails(int characters) |
| 525 : characters_(characters), | 526 : characters_(characters), |
| 526 mask_(0), | 527 mask_(0), |
| 527 value_(0), | 528 value_(0), |
| 528 cannot_match_(false) { } | 529 cannot_match_(false) { } |
| 529 bool Rationalize(bool ascii); | 530 bool Rationalize(bool one_byte); |
| 530 // Merge in the information from another branch of an alternation. | 531 // Merge in the information from another branch of an alternation. |
| 531 void Merge(QuickCheckDetails* other, int from_index); | 532 void Merge(QuickCheckDetails* other, int from_index); |
| 532 // Advance the current position by some amount. | 533 // Advance the current position by some amount. |
| 533 void Advance(int by, bool ascii); | 534 void Advance(int by, bool one_byte); |
| 534 void Clear(); | 535 void Clear(); |
| 535 bool cannot_match() { return cannot_match_; } | 536 bool cannot_match() { return cannot_match_; } |
| 536 void set_cannot_match() { cannot_match_ = true; } | 537 void set_cannot_match() { cannot_match_ = true; } |
| 537 struct Position { | 538 struct Position { |
| 538 Position() : mask(0), value(0), determines_perfectly(false) { } | 539 Position() : mask(0), value(0), determines_perfectly(false) { } |
| 539 uc16 mask; | 540 uc16 mask; |
| 540 uc16 value; | 541 uc16 value; |
| 541 bool determines_perfectly; | 542 bool determines_perfectly; |
| 542 }; | 543 }; |
| 543 int characters() { return characters_; } | 544 int characters() { return characters_; } |
| (...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 617 // EatsAtLeast, GetQuickCheckDetails. The budget argument is used to limit | 618 // EatsAtLeast, GetQuickCheckDetails. The budget argument is used to limit |
| 618 // the number of nodes we are willing to look at in order to create this data. | 619 // the number of nodes we are willing to look at in order to create this data. |
| 619 static const int kRecursionBudget = 200; | 620 static const int kRecursionBudget = 200; |
| 620 virtual void FillInBMInfo(int offset, | 621 virtual void FillInBMInfo(int offset, |
| 621 int budget, | 622 int budget, |
| 622 BoyerMooreLookahead* bm, | 623 BoyerMooreLookahead* bm, |
| 623 bool not_at_start) { | 624 bool not_at_start) { |
| 624 UNREACHABLE(); | 625 UNREACHABLE(); |
| 625 } | 626 } |
| 626 | 627 |
| 627 // If we know that the input is ASCII then there are some nodes that can | 628 // If we know that the input is one-byte then there are some nodes that can |
| 628 // never match. This method returns a node that can be substituted for | 629 // never match. This method returns a node that can be substituted for |
| 629 // itself, or NULL if the node can never match. | 630 // itself, or NULL if the node can never match. |
| 630 virtual RegExpNode* FilterASCII(int depth, bool ignore_case) { return this; } | 631 virtual RegExpNode* FilterOneByte(int depth, bool ignore_case) { |
| 631 // Helper for FilterASCII. | 632 return this; |
| 633 } |
| 634 // Helper for FilterOneByte. |
| 632 RegExpNode* replacement() { | 635 RegExpNode* replacement() { |
| 633 DCHECK(info()->replacement_calculated); | 636 DCHECK(info()->replacement_calculated); |
| 634 return replacement_; | 637 return replacement_; |
| 635 } | 638 } |
| 636 RegExpNode* set_replacement(RegExpNode* replacement) { | 639 RegExpNode* set_replacement(RegExpNode* replacement) { |
| 637 info()->replacement_calculated = true; | 640 info()->replacement_calculated = true; |
| 638 replacement_ = replacement; | 641 replacement_ = replacement; |
| 639 return replacement; // For convenience. | 642 return replacement; // For convenience. |
| 640 } | 643 } |
| 641 | 644 |
| (...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 715 int to_; | 718 int to_; |
| 716 }; | 719 }; |
| 717 | 720 |
| 718 | 721 |
| 719 class SeqRegExpNode: public RegExpNode { | 722 class SeqRegExpNode: public RegExpNode { |
| 720 public: | 723 public: |
| 721 explicit SeqRegExpNode(RegExpNode* on_success) | 724 explicit SeqRegExpNode(RegExpNode* on_success) |
| 722 : RegExpNode(on_success->zone()), on_success_(on_success) { } | 725 : RegExpNode(on_success->zone()), on_success_(on_success) { } |
| 723 RegExpNode* on_success() { return on_success_; } | 726 RegExpNode* on_success() { return on_success_; } |
| 724 void set_on_success(RegExpNode* node) { on_success_ = node; } | 727 void set_on_success(RegExpNode* node) { on_success_ = node; } |
| 725 virtual RegExpNode* FilterASCII(int depth, bool ignore_case); | 728 virtual RegExpNode* FilterOneByte(int depth, bool ignore_case); |
| 726 virtual void FillInBMInfo(int offset, | 729 virtual void FillInBMInfo(int offset, |
| 727 int budget, | 730 int budget, |
| 728 BoyerMooreLookahead* bm, | 731 BoyerMooreLookahead* bm, |
| 729 bool not_at_start) { | 732 bool not_at_start) { |
| 730 on_success_->FillInBMInfo(offset, budget - 1, bm, not_at_start); | 733 on_success_->FillInBMInfo(offset, budget - 1, bm, not_at_start); |
| 731 if (offset == 0) set_bm_info(not_at_start, bm); | 734 if (offset == 0) set_bm_info(not_at_start, bm); |
| 732 } | 735 } |
| 733 | 736 |
| 734 protected: | 737 protected: |
| 735 RegExpNode* FilterSuccessor(int depth, bool ignore_case); | 738 RegExpNode* FilterSuccessor(int depth, bool ignore_case); |
| (...skipping 100 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 836 elms_->Add(TextElement::CharClass(that), zone()); | 839 elms_->Add(TextElement::CharClass(that), zone()); |
| 837 } | 840 } |
| 838 virtual void Accept(NodeVisitor* visitor); | 841 virtual void Accept(NodeVisitor* visitor); |
| 839 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 842 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
| 840 virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start); | 843 virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start); |
| 841 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 844 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 842 RegExpCompiler* compiler, | 845 RegExpCompiler* compiler, |
| 843 int characters_filled_in, | 846 int characters_filled_in, |
| 844 bool not_at_start); | 847 bool not_at_start); |
| 845 ZoneList<TextElement>* elements() { return elms_; } | 848 ZoneList<TextElement>* elements() { return elms_; } |
| 846 void MakeCaseIndependent(bool is_ascii); | 849 void MakeCaseIndependent(bool is_one_byte); |
| 847 virtual int GreedyLoopTextLength(); | 850 virtual int GreedyLoopTextLength(); |
| 848 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( | 851 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( |
| 849 RegExpCompiler* compiler); | 852 RegExpCompiler* compiler); |
| 850 virtual void FillInBMInfo(int offset, | 853 virtual void FillInBMInfo(int offset, |
| 851 int budget, | 854 int budget, |
| 852 BoyerMooreLookahead* bm, | 855 BoyerMooreLookahead* bm, |
| 853 bool not_at_start); | 856 bool not_at_start); |
| 854 void CalculateOffsets(); | 857 void CalculateOffsets(); |
| 855 virtual RegExpNode* FilterASCII(int depth, bool ignore_case); | 858 virtual RegExpNode* FilterOneByte(int depth, bool ignore_case); |
| 856 | 859 |
| 857 private: | 860 private: |
| 858 enum TextEmitPassType { | 861 enum TextEmitPassType { |
| 859 NON_ASCII_MATCH, // Check for characters that can't match. | 862 NON_LATIN1_MATCH, // Check for characters that can't match. |
| 860 SIMPLE_CHARACTER_MATCH, // Case-dependent single character check. | 863 SIMPLE_CHARACTER_MATCH, // Case-dependent single character check. |
| 861 NON_LETTER_CHARACTER_MATCH, // Check characters that have no case equivs. | 864 NON_LETTER_CHARACTER_MATCH, // Check characters that have no case equivs. |
| 862 CASE_CHARACTER_MATCH, // Case-independent single character check. | 865 CASE_CHARACTER_MATCH, // Case-independent single character check. |
| 863 CHARACTER_CLASS_MATCH // Character class. | 866 CHARACTER_CLASS_MATCH // Character class. |
| 864 }; | 867 }; |
| 865 static bool SkipPass(int pass, bool ignore_case); | 868 static bool SkipPass(int pass, bool ignore_case); |
| 866 static const int kFirstRealPass = SIMPLE_CHARACTER_MATCH; | 869 static const int kFirstRealPass = SIMPLE_CHARACTER_MATCH; |
| 867 static const int kLastPass = CHARACTER_CLASS_MATCH; | 870 static const int kLastPass = CHARACTER_CLASS_MATCH; |
| 868 void TextEmitPass(RegExpCompiler* compiler, | 871 void TextEmitPass(RegExpCompiler* compiler, |
| 869 TextEmitPassType pass, | 872 TextEmitPassType pass, |
| (...skipping 204 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1074 BoyerMooreLookahead* bm, | 1077 BoyerMooreLookahead* bm, |
| 1075 bool not_at_start); | 1078 bool not_at_start); |
| 1076 | 1079 |
| 1077 bool being_calculated() { return being_calculated_; } | 1080 bool being_calculated() { return being_calculated_; } |
| 1078 bool not_at_start() { return not_at_start_; } | 1081 bool not_at_start() { return not_at_start_; } |
| 1079 void set_not_at_start() { not_at_start_ = true; } | 1082 void set_not_at_start() { not_at_start_ = true; } |
| 1080 void set_being_calculated(bool b) { being_calculated_ = b; } | 1083 void set_being_calculated(bool b) { being_calculated_ = b; } |
| 1081 virtual bool try_to_emit_quick_check_for_alternative(bool is_first) { | 1084 virtual bool try_to_emit_quick_check_for_alternative(bool is_first) { |
| 1082 return true; | 1085 return true; |
| 1083 } | 1086 } |
| 1084 virtual RegExpNode* FilterASCII(int depth, bool ignore_case); | 1087 virtual RegExpNode* FilterOneByte(int depth, bool ignore_case); |
| 1085 | 1088 |
| 1086 protected: | 1089 protected: |
| 1087 int GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative); | 1090 int GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative); |
| 1088 ZoneList<GuardedAlternative>* alternatives_; | 1091 ZoneList<GuardedAlternative>* alternatives_; |
| 1089 | 1092 |
| 1090 private: | 1093 private: |
| 1091 friend class DispatchTableConstructor; | 1094 friend class DispatchTableConstructor; |
| 1092 friend class Analysis; | 1095 friend class Analysis; |
| 1093 void GenerateGuard(RegExpMacroAssembler* macro_assembler, | 1096 void GenerateGuard(RegExpMacroAssembler* macro_assembler, |
| 1094 Guard* guard, | 1097 Guard* guard, |
| (...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1148 if (offset == 0) set_bm_info(not_at_start, bm); | 1151 if (offset == 0) set_bm_info(not_at_start, bm); |
| 1149 } | 1152 } |
| 1150 // For a negative lookahead we don't emit the quick check for the | 1153 // For a negative lookahead we don't emit the quick check for the |
| 1151 // alternative that is expected to fail. This is because quick check code | 1154 // alternative that is expected to fail. This is because quick check code |
| 1152 // starts by loading enough characters for the alternative that takes fewest | 1155 // starts by loading enough characters for the alternative that takes fewest |
| 1153 // characters, but on a negative lookahead the negative branch did not take | 1156 // characters, but on a negative lookahead the negative branch did not take |
| 1154 // part in that calculation (EatsAtLeast) so the assumptions don't hold. | 1157 // part in that calculation (EatsAtLeast) so the assumptions don't hold. |
| 1155 virtual bool try_to_emit_quick_check_for_alternative(bool is_first) { | 1158 virtual bool try_to_emit_quick_check_for_alternative(bool is_first) { |
| 1156 return !is_first; | 1159 return !is_first; |
| 1157 } | 1160 } |
| 1158 virtual RegExpNode* FilterASCII(int depth, bool ignore_case); | 1161 virtual RegExpNode* FilterOneByte(int depth, bool ignore_case); |
| 1159 }; | 1162 }; |
| 1160 | 1163 |
| 1161 | 1164 |
| 1162 class LoopChoiceNode: public ChoiceNode { | 1165 class LoopChoiceNode: public ChoiceNode { |
| 1163 public: | 1166 public: |
| 1164 explicit LoopChoiceNode(bool body_can_be_zero_length, Zone* zone) | 1167 explicit LoopChoiceNode(bool body_can_be_zero_length, Zone* zone) |
| 1165 : ChoiceNode(2, zone), | 1168 : ChoiceNode(2, zone), |
| 1166 loop_node_(NULL), | 1169 loop_node_(NULL), |
| 1167 continue_node_(NULL), | 1170 continue_node_(NULL), |
| 1168 body_can_be_zero_length_(body_can_be_zero_length) | 1171 body_can_be_zero_length_(body_can_be_zero_length) |
| 1169 { } | 1172 { } |
| 1170 void AddLoopAlternative(GuardedAlternative alt); | 1173 void AddLoopAlternative(GuardedAlternative alt); |
| 1171 void AddContinueAlternative(GuardedAlternative alt); | 1174 void AddContinueAlternative(GuardedAlternative alt); |
| 1172 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 1175 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
| 1173 virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start); | 1176 virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start); |
| 1174 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 1177 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 1175 RegExpCompiler* compiler, | 1178 RegExpCompiler* compiler, |
| 1176 int characters_filled_in, | 1179 int characters_filled_in, |
| 1177 bool not_at_start); | 1180 bool not_at_start); |
| 1178 virtual void FillInBMInfo(int offset, | 1181 virtual void FillInBMInfo(int offset, |
| 1179 int budget, | 1182 int budget, |
| 1180 BoyerMooreLookahead* bm, | 1183 BoyerMooreLookahead* bm, |
| 1181 bool not_at_start); | 1184 bool not_at_start); |
| 1182 RegExpNode* loop_node() { return loop_node_; } | 1185 RegExpNode* loop_node() { return loop_node_; } |
| 1183 RegExpNode* continue_node() { return continue_node_; } | 1186 RegExpNode* continue_node() { return continue_node_; } |
| 1184 bool body_can_be_zero_length() { return body_can_be_zero_length_; } | 1187 bool body_can_be_zero_length() { return body_can_be_zero_length_; } |
| 1185 virtual void Accept(NodeVisitor* visitor); | 1188 virtual void Accept(NodeVisitor* visitor); |
| 1186 virtual RegExpNode* FilterASCII(int depth, bool ignore_case); | 1189 virtual RegExpNode* FilterOneByte(int depth, bool ignore_case); |
| 1187 | 1190 |
| 1188 private: | 1191 private: |
| 1189 // AddAlternative is made private for loop nodes because alternatives | 1192 // AddAlternative is made private for loop nodes because alternatives |
| 1190 // should not be added freely, we need to keep track of which node | 1193 // should not be added freely, we need to keep track of which node |
| 1191 // goes back to the node itself. | 1194 // goes back to the node itself. |
| 1192 void AddAlternative(GuardedAlternative node) { | 1195 void AddAlternative(GuardedAlternative node) { |
| 1193 ChoiceNode::AddAlternative(node); | 1196 ChoiceNode::AddAlternative(node); |
| 1194 } | 1197 } |
| 1195 | 1198 |
| 1196 RegExpNode* loop_node_; | 1199 RegExpNode* loop_node_; |
| (...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1319 } | 1322 } |
| 1320 void EmitSkipInstructions(RegExpMacroAssembler* masm); | 1323 void EmitSkipInstructions(RegExpMacroAssembler* masm); |
| 1321 | 1324 |
| 1322 private: | 1325 private: |
| 1323 // This is the value obtained by EatsAtLeast. If we do not have at least this | 1326 // This is the value obtained by EatsAtLeast. If we do not have at least this |
| 1324 // many characters left in the sample string then the match is bound to fail. | 1327 // many characters left in the sample string then the match is bound to fail. |
| 1325 // Therefore it is OK to read a character this far ahead of the current match | 1328 // Therefore it is OK to read a character this far ahead of the current match |
| 1326 // point. | 1329 // point. |
| 1327 int length_; | 1330 int length_; |
| 1328 RegExpCompiler* compiler_; | 1331 RegExpCompiler* compiler_; |
| 1329 // 0x7f for ASCII, 0xffff for UTF-16. | 1332 // 0xff for Latin1, 0xffff for UTF-16. |
| 1330 int max_char_; | 1333 int max_char_; |
| 1331 ZoneList<BoyerMoorePositionInfo*>* bitmaps_; | 1334 ZoneList<BoyerMoorePositionInfo*>* bitmaps_; |
| 1332 | 1335 |
| 1333 int GetSkipTable(int min_lookahead, | 1336 int GetSkipTable(int min_lookahead, |
| 1334 int max_lookahead, | 1337 int max_lookahead, |
| 1335 Handle<ByteArray> boolean_skip_table); | 1338 Handle<ByteArray> boolean_skip_table); |
| 1336 bool FindWorthwhileInterval(int* from, int* to); | 1339 bool FindWorthwhileInterval(int* from, int* to); |
| 1337 int FindBestInterval( | 1340 int FindBestInterval( |
| 1338 int max_number_of_chars, int old_biggest_points, int* from, int* to); | 1341 int max_number_of_chars, int old_biggest_points, int* from, int* to); |
| 1339 }; | 1342 }; |
| (...skipping 248 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1588 // has to check if it succeeds a word or non-word. In this case the | 1591 // has to check if it succeeds a word or non-word. In this case the |
| 1589 // result will be something like: | 1592 // result will be something like: |
| 1590 // | 1593 // |
| 1591 // +-------+ +------------+ | 1594 // +-------+ +------------+ |
| 1592 // | . | | . | | 1595 // | . | | . | |
| 1593 // +-------+ ---> +------------+ | 1596 // +-------+ ---> +------------+ |
| 1594 // | word? | | check word | | 1597 // | word? | | check word | |
| 1595 // +-------+ +------------+ | 1598 // +-------+ +------------+ |
| 1596 class Analysis: public NodeVisitor { | 1599 class Analysis: public NodeVisitor { |
| 1597 public: | 1600 public: |
| 1598 Analysis(bool ignore_case, bool is_ascii) | 1601 Analysis(bool ignore_case, bool is_one_byte) |
| 1599 : ignore_case_(ignore_case), | 1602 : ignore_case_(ignore_case), |
| 1600 is_ascii_(is_ascii), | 1603 is_one_byte_(is_one_byte), |
| 1601 error_message_(NULL) { } | 1604 error_message_(NULL) {} |
| 1602 void EnsureAnalyzed(RegExpNode* node); | 1605 void EnsureAnalyzed(RegExpNode* node); |
| 1603 | 1606 |
| 1604 #define DECLARE_VISIT(Type) \ | 1607 #define DECLARE_VISIT(Type) \ |
| 1605 virtual void Visit##Type(Type##Node* that); | 1608 virtual void Visit##Type(Type##Node* that); |
| 1606 FOR_EACH_NODE_TYPE(DECLARE_VISIT) | 1609 FOR_EACH_NODE_TYPE(DECLARE_VISIT) |
| 1607 #undef DECLARE_VISIT | 1610 #undef DECLARE_VISIT |
| 1608 virtual void VisitLoopChoice(LoopChoiceNode* that); | 1611 virtual void VisitLoopChoice(LoopChoiceNode* that); |
| 1609 | 1612 |
| 1610 bool has_failed() { return error_message_ != NULL; } | 1613 bool has_failed() { return error_message_ != NULL; } |
| 1611 const char* error_message() { | 1614 const char* error_message() { |
| 1612 DCHECK(error_message_ != NULL); | 1615 DCHECK(error_message_ != NULL); |
| 1613 return error_message_; | 1616 return error_message_; |
| 1614 } | 1617 } |
| 1615 void fail(const char* error_message) { | 1618 void fail(const char* error_message) { |
| 1616 error_message_ = error_message; | 1619 error_message_ = error_message; |
| 1617 } | 1620 } |
| 1618 | 1621 |
| 1619 private: | 1622 private: |
| 1620 bool ignore_case_; | 1623 bool ignore_case_; |
| 1621 bool is_ascii_; | 1624 bool is_one_byte_; |
| 1622 const char* error_message_; | 1625 const char* error_message_; |
| 1623 | 1626 |
| 1624 DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis); | 1627 DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis); |
| 1625 }; | 1628 }; |
| 1626 | 1629 |
| 1627 | 1630 |
| 1628 struct RegExpCompileData { | 1631 struct RegExpCompileData { |
| 1629 RegExpCompileData() | 1632 RegExpCompileData() |
| 1630 : tree(NULL), | 1633 : tree(NULL), |
| 1631 node(NULL), | 1634 node(NULL), |
| (...skipping 18 matching lines...) Expand all Loading... |
| 1650 num_registers(0) {} | 1653 num_registers(0) {} |
| 1651 CompilationResult(Object* code, int registers) | 1654 CompilationResult(Object* code, int registers) |
| 1652 : error_message(NULL), | 1655 : error_message(NULL), |
| 1653 code(code), | 1656 code(code), |
| 1654 num_registers(registers) {} | 1657 num_registers(registers) {} |
| 1655 const char* error_message; | 1658 const char* error_message; |
| 1656 Object* code; | 1659 Object* code; |
| 1657 int num_registers; | 1660 int num_registers; |
| 1658 }; | 1661 }; |
| 1659 | 1662 |
| 1660 static CompilationResult Compile(RegExpCompileData* input, | 1663 static CompilationResult Compile(RegExpCompileData* input, bool ignore_case, |
| 1661 bool ignore_case, | 1664 bool global, bool multiline, |
| 1662 bool global, | |
| 1663 bool multiline, | |
| 1664 Handle<String> pattern, | 1665 Handle<String> pattern, |
| 1665 Handle<String> sample_subject, | 1666 Handle<String> sample_subject, |
| 1666 bool is_ascii, Zone* zone); | 1667 bool is_one_byte, Zone* zone); |
| 1667 | 1668 |
| 1668 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); | 1669 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); |
| 1669 }; | 1670 }; |
| 1670 | 1671 |
| 1671 | 1672 |
| 1672 } } // namespace v8::internal | 1673 } } // namespace v8::internal |
| 1673 | 1674 |
| 1674 #endif // V8_JSREGEXP_H_ | 1675 #endif // V8_JSREGEXP_H_ |
| OLD | NEW |