| OLD | NEW |
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 510 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 521 if (that->follows_start != UNKNOWN) { | 521 if (that->follows_start != UNKNOWN) { |
| 522 ASSERT(follows_start == UNKNOWN || | 522 ASSERT(follows_start == UNKNOWN || |
| 523 follows_start == that->follows_start); | 523 follows_start == that->follows_start); |
| 524 follows_start = that->follows_start; | 524 follows_start = that->follows_start; |
| 525 } | 525 } |
| 526 does_determine_word = that->does_determine_word; | 526 does_determine_word = that->does_determine_word; |
| 527 does_determine_newline = that->does_determine_newline; | 527 does_determine_newline = that->does_determine_newline; |
| 528 does_determine_start = that->does_determine_start; | 528 does_determine_start = that->does_determine_start; |
| 529 } | 529 } |
| 530 | 530 |
| 531 bool HasLookbehind() { |
| 532 return follows_word_interest || |
| 533 follows_newline_interest || |
| 534 follows_start_interest; |
| 535 } |
| 536 |
| 531 // Sets the interests of this node to include the interests of the | 537 // Sets the interests of this node to include the interests of the |
| 532 // following node. | 538 // following node. |
| 533 void AddFromFollowing(NodeInfo* that) { | 539 void AddFromFollowing(NodeInfo* that) { |
| 534 follows_word_interest |= that->follows_word_interest; | 540 follows_word_interest |= that->follows_word_interest; |
| 535 follows_newline_interest |= that->follows_newline_interest; | 541 follows_newline_interest |= that->follows_newline_interest; |
| 536 follows_start_interest |= that->follows_start_interest; | 542 follows_start_interest |= that->follows_start_interest; |
| 537 } | 543 } |
| 538 | 544 |
| 539 void ResetCompilationState() { | 545 void ResetCompilationState() { |
| 540 being_analyzed = false; | 546 being_analyzed = false; |
| (...skipping 346 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 887 | 893 |
| 888 bool being_calculated() { return being_calculated_; } | 894 bool being_calculated() { return being_calculated_; } |
| 889 void set_being_calculated(bool b) { being_calculated_ = b; } | 895 void set_being_calculated(bool b) { being_calculated_ = b; } |
| 890 | 896 |
| 891 protected: | 897 protected: |
| 892 int GreedyLoopTextLength(GuardedAlternative *alternative); | 898 int GreedyLoopTextLength(GuardedAlternative *alternative); |
| 893 ZoneList<GuardedAlternative>* alternatives_; | 899 ZoneList<GuardedAlternative>* alternatives_; |
| 894 | 900 |
| 895 private: | 901 private: |
| 896 friend class DispatchTableConstructor; | 902 friend class DispatchTableConstructor; |
| 897 friend class Analysis; | 903 friend class AssertionPropagation; |
| 898 void GenerateGuard(RegExpMacroAssembler* macro_assembler, | 904 void GenerateGuard(RegExpMacroAssembler* macro_assembler, |
| 899 Guard *guard, | 905 Guard *guard, |
| 900 GenerationVariant* variant); | 906 GenerationVariant* variant); |
| 901 DispatchTable* table_; | 907 DispatchTable* table_; |
| 902 bool being_calculated_; | 908 bool being_calculated_; |
| 903 }; | 909 }; |
| 904 | 910 |
| 905 | 911 |
| 906 class LoopChoiceNode: public ChoiceNode { | 912 class LoopChoiceNode: public ChoiceNode { |
| 907 public: | 913 public: |
| (...skipping 137 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1045 DispatchTable* table() { return table_; } | 1051 DispatchTable* table() { return table_; } |
| 1046 void set_choice_index(int value) { choice_index_ = value; } | 1052 void set_choice_index(int value) { choice_index_ = value; } |
| 1047 | 1053 |
| 1048 protected: | 1054 protected: |
| 1049 DispatchTable *table_; | 1055 DispatchTable *table_; |
| 1050 int choice_index_; | 1056 int choice_index_; |
| 1051 bool ignore_case_; | 1057 bool ignore_case_; |
| 1052 }; | 1058 }; |
| 1053 | 1059 |
| 1054 | 1060 |
| 1055 class Analysis: public NodeVisitor { | 1061 // Assertion propagation moves information about assertions such as |
| 1062 // \b to the affected nodes. For instance, in /.\b./ information must |
| 1063 // be propagated to the first '.' that whatever follows needs to know |
| 1064 // if it matched a word or a non-word, and to the second '.' that it |
| 1065 // has to check if it succeeds a word or non-word. In this case the |
| 1066 // result will be something like: |
| 1067 // |
| 1068 // +-------+ +------------+ |
| 1069 // | . | | . | |
| 1070 // +-------+ ---> +------------+ |
| 1071 // | word? | | check word | |
| 1072 // +-------+ +------------+ |
| 1073 // |
| 1074 // At a later phase all nodes that determine information for their |
| 1075 // following nodes are split into several 'sibling' nodes. In this |
| 1076 // case the first '.' is split into one node that only matches words |
| 1077 // and one that only matches non-words. The second '.' is also split, |
| 1078 // into one node that assumes that the previous character was a word |
| 1079 // character and one that assumes that is was non-word. In this case |
| 1080 // the result is |
| 1081 // |
| 1082 // +------------------+ +------------------+ |
| 1083 // /--> | intersect(., \w) | ---> | intersect(., \W) | |
| 1084 // | +------------------+ +------------------+ |
| 1085 // | | follows \w | |
| 1086 // | +------------------+ |
| 1087 // --? |
| 1088 // | +------------------+ +------------------+ |
| 1089 // \--> | intersect(., \W) | ---> | intersect(., \w) | |
| 1090 // +------------------+ +------------------+ |
| 1091 // | follows \W | |
| 1092 // +------------------+ |
| 1093 // |
| 1094 // This way we don't need to explicitly check the previous character |
| 1095 // but can always assume that whoever consumed the previous character |
| 1096 // has propagated the relevant information forward. |
| 1097 class AssertionPropagation: public NodeVisitor { |
| 1056 public: | 1098 public: |
| 1057 explicit Analysis(bool ignore_case) | 1099 explicit AssertionPropagation(bool ignore_case) |
| 1058 : ignore_case_(ignore_case) { } | 1100 : ignore_case_(ignore_case) { } |
| 1059 void EnsureAnalyzed(RegExpNode* node); | 1101 void EnsureAnalyzed(RegExpNode* node); |
| 1060 | 1102 |
| 1061 #define DECLARE_VISIT(Type) \ | 1103 #define DECLARE_VISIT(Type) \ |
| 1062 virtual void Visit##Type(Type##Node* that); | 1104 virtual void Visit##Type(Type##Node* that); |
| 1063 FOR_EACH_NODE_TYPE(DECLARE_VISIT) | 1105 FOR_EACH_NODE_TYPE(DECLARE_VISIT) |
| 1064 #undef DECLARE_VISIT | 1106 #undef DECLARE_VISIT |
| 1065 | 1107 |
| 1066 private: | 1108 private: |
| 1067 bool ignore_case_; | 1109 bool ignore_case_; |
| 1068 | 1110 |
| 1069 DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis); | 1111 DISALLOW_IMPLICIT_CONSTRUCTORS(AssertionPropagation); |
| 1070 }; | 1112 }; |
| 1071 | 1113 |
| 1072 | 1114 |
| 1073 struct RegExpParseResult { | 1115 struct RegExpCompileData { |
| 1116 RegExpCompileData() |
| 1117 : tree(NULL), |
| 1118 node(NULL), |
| 1119 has_lookbehind(false), |
| 1120 has_character_escapes(false), |
| 1121 capture_count(0) { } |
| 1074 RegExpTree* tree; | 1122 RegExpTree* tree; |
| 1123 RegExpNode* node; |
| 1124 bool has_lookbehind; |
| 1075 bool has_character_escapes; | 1125 bool has_character_escapes; |
| 1076 Handle<String> error; | 1126 Handle<String> error; |
| 1077 int capture_count; | 1127 int capture_count; |
| 1078 }; | 1128 }; |
| 1079 | 1129 |
| 1080 | 1130 |
| 1081 class RegExpEngine: public AllStatic { | 1131 class RegExpEngine: public AllStatic { |
| 1082 public: | 1132 public: |
| 1083 static Handle<FixedArray> Compile(RegExpParseResult* input, | 1133 static Handle<FixedArray> Compile(RegExpCompileData* input, |
| 1084 RegExpNode** node_return, | |
| 1085 bool ignore_case, | 1134 bool ignore_case, |
| 1086 bool multiline, | 1135 bool multiline, |
| 1087 Handle<String> pattern, | 1136 Handle<String> pattern, |
| 1088 bool is_ascii); | 1137 bool is_ascii); |
| 1089 | 1138 |
| 1090 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); | 1139 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); |
| 1091 }; | 1140 }; |
| 1092 | 1141 |
| 1093 | 1142 |
| 1094 } } // namespace v8::internal | 1143 } } // namespace v8::internal |
| 1095 | 1144 |
| 1096 #endif // V8_JSREGEXP_H_ | 1145 #endif // V8_JSREGEXP_H_ |
| OLD | NEW |