| OLD | NEW |
| 1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef V8_REGEXP_REGEXP_PARSER_H_ | 5 #ifndef V8_REGEXP_REGEXP_PARSER_H_ |
| 6 #define V8_REGEXP_REGEXP_PARSER_H_ | 6 #define V8_REGEXP_REGEXP_PARSER_H_ |
| 7 | 7 |
| 8 #include "src/objects.h" | 8 #include "src/objects.h" |
| 9 #include "src/regexp/regexp-ast.h" | 9 #include "src/regexp/regexp-ast.h" |
| 10 #include "src/zone.h" | 10 #include "src/zone.h" |
| (...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 92 | 92 |
| 93 private: | 93 private: |
| 94 ZoneList<T*>* list_; | 94 ZoneList<T*>* list_; |
| 95 T* last_; | 95 T* last_; |
| 96 }; | 96 }; |
| 97 | 97 |
| 98 | 98 |
| 99 // Accumulates RegExp atoms and assertions into lists of terms and alternatives. | 99 // Accumulates RegExp atoms and assertions into lists of terms and alternatives. |
| 100 class RegExpBuilder : public ZoneObject { | 100 class RegExpBuilder : public ZoneObject { |
| 101 public: | 101 public: |
| 102 RegExpBuilder(Zone* zone, JSRegExp::Flags flags); | 102 explicit RegExpBuilder(Zone* zone); |
| 103 void AddCharacter(uc16 character); | 103 void AddCharacter(uc16 character); |
| 104 void AddUnicodeCharacter(uc32 character); | 104 void AddUnicodeCharacter(uc32 character); |
| 105 // "Adds" an empty expression. Does nothing except consume a | 105 // "Adds" an empty expression. Does nothing except consume a |
| 106 // following quantifier | 106 // following quantifier |
| 107 void AddEmpty(); | 107 void AddEmpty(); |
| 108 void AddCharacterClass(RegExpCharacterClass* cc); | |
| 109 void AddAtom(RegExpTree* tree); | 108 void AddAtom(RegExpTree* tree); |
| 110 void AddTerm(RegExpTree* tree); | |
| 111 void AddAssertion(RegExpTree* tree); | 109 void AddAssertion(RegExpTree* tree); |
| 112 void NewAlternative(); // '|' | 110 void NewAlternative(); // '|' |
| 113 void AddQuantifierToAtom(int min, int max, | 111 void AddQuantifierToAtom(int min, int max, |
| 114 RegExpQuantifier::QuantifierType type); | 112 RegExpQuantifier::QuantifierType type); |
| 115 RegExpTree* ToRegExp(); | 113 RegExpTree* ToRegExp(); |
| 116 | 114 |
| 117 private: | 115 private: |
| 118 static const uc16 kNoPendingSurrogate = 0; | |
| 119 void AddLeadSurrogate(uc16 lead_surrogate); | |
| 120 void AddTrailSurrogate(uc16 trail_surrogate); | |
| 121 void FlushPendingSurrogate(); | |
| 122 void FlushCharacters(); | 116 void FlushCharacters(); |
| 123 void FlushText(); | 117 void FlushText(); |
| 124 void FlushTerms(); | 118 void FlushTerms(); |
| 125 Zone* zone() const { return zone_; } | 119 Zone* zone() const { return zone_; } |
| 126 bool unicode() const { return (flags_ & JSRegExp::kUnicode) != 0; } | |
| 127 | 120 |
| 128 Zone* zone_; | 121 Zone* zone_; |
| 129 bool pending_empty_; | 122 bool pending_empty_; |
| 130 JSRegExp::Flags flags_; | |
| 131 ZoneList<uc16>* characters_; | 123 ZoneList<uc16>* characters_; |
| 132 uc16 pending_surrogate_; | |
| 133 BufferedZoneList<RegExpTree, 2> terms_; | 124 BufferedZoneList<RegExpTree, 2> terms_; |
| 134 BufferedZoneList<RegExpTree, 2> text_; | 125 BufferedZoneList<RegExpTree, 2> text_; |
| 135 BufferedZoneList<RegExpTree, 2> alternatives_; | 126 BufferedZoneList<RegExpTree, 2> alternatives_; |
| 136 #ifdef DEBUG | 127 #ifdef DEBUG |
| 137 enum { ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM } last_added_; | 128 enum { ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM } last_added_; |
| 138 #define LAST(x) last_added_ = x; | 129 #define LAST(x) last_added_ = x; |
| 139 #else | 130 #else |
| 140 #define LAST(x) | 131 #define LAST(x) |
| 141 #endif | 132 #endif |
| 142 }; | 133 }; |
| 143 | 134 |
| 144 | 135 |
| 145 class RegExpParser BASE_EMBEDDED { | 136 class RegExpParser BASE_EMBEDDED { |
| 146 public: | 137 public: |
| 147 RegExpParser(FlatStringReader* in, Handle<String>* error, | 138 RegExpParser(FlatStringReader* in, Handle<String>* error, bool multiline_mode, |
| 148 JSRegExp::Flags flags, Isolate* isolate, Zone* zone); | 139 bool unicode, Isolate* isolate, Zone* zone); |
| 149 | 140 |
| 150 static bool ParseRegExp(Isolate* isolate, Zone* zone, FlatStringReader* input, | 141 static bool ParseRegExp(Isolate* isolate, Zone* zone, FlatStringReader* input, |
| 151 JSRegExp::Flags flags, RegExpCompileData* result); | 142 bool multiline, bool unicode, |
| 143 RegExpCompileData* result); |
| 152 | 144 |
| 153 RegExpTree* ParsePattern(); | 145 RegExpTree* ParsePattern(); |
| 154 RegExpTree* ParseDisjunction(); | 146 RegExpTree* ParseDisjunction(); |
| 155 RegExpTree* ParseGroup(); | 147 RegExpTree* ParseGroup(); |
| 156 RegExpTree* ParseCharacterClass(); | 148 RegExpTree* ParseCharacterClass(); |
| 157 | 149 |
| 158 // Parses a {...,...} quantifier and stores the range in the given | 150 // Parses a {...,...} quantifier and stores the range in the given |
| 159 // out parameters. | 151 // out parameters. |
| 160 bool ParseIntervalQuantifier(int* min_out, int* max_out); | 152 bool ParseIntervalQuantifier(int* min_out, int* max_out); |
| 161 | 153 |
| (...skipping 22 matching lines...) Expand all Loading... |
| 184 void Reset(int pos); | 176 void Reset(int pos); |
| 185 | 177 |
| 186 // Reports whether the pattern might be used as a literal search string. | 178 // Reports whether the pattern might be used as a literal search string. |
| 187 // Only use if the result of the parse is a single atom node. | 179 // Only use if the result of the parse is a single atom node. |
| 188 bool simple(); | 180 bool simple(); |
| 189 bool contains_anchor() { return contains_anchor_; } | 181 bool contains_anchor() { return contains_anchor_; } |
| 190 void set_contains_anchor() { contains_anchor_ = true; } | 182 void set_contains_anchor() { contains_anchor_ = true; } |
| 191 int captures_started() { return captures_started_; } | 183 int captures_started() { return captures_started_; } |
| 192 int position() { return next_pos_ - 1; } | 184 int position() { return next_pos_ - 1; } |
| 193 bool failed() { return failed_; } | 185 bool failed() { return failed_; } |
| 194 bool unicode() const { return (flags_ & JSRegExp::kUnicode) != 0; } | |
| 195 bool multiline() const { return (flags_ & JSRegExp::kMultiline) != 0; } | |
| 196 | 186 |
| 197 static bool IsSyntaxCharacter(uc32 c); | 187 static bool IsSyntaxCharacter(uc32 c); |
| 198 | 188 |
| 199 static const int kMaxCaptures = 1 << 16; | 189 static const int kMaxCaptures = 1 << 16; |
| 200 static const uc32 kEndMarker = (1 << 21); | 190 static const uc32 kEndMarker = (1 << 21); |
| 201 | 191 |
| 202 private: | 192 private: |
| 203 enum SubexpressionType { | 193 enum SubexpressionType { |
| 204 INITIAL, | 194 INITIAL, |
| 205 CAPTURE, // All positive values represent captures. | 195 CAPTURE, // All positive values represent captures. |
| 206 POSITIVE_LOOKAROUND, | 196 POSITIVE_LOOKAROUND, |
| 207 NEGATIVE_LOOKAROUND, | 197 NEGATIVE_LOOKAROUND, |
| 208 GROUPING | 198 GROUPING |
| 209 }; | 199 }; |
| 210 | 200 |
| 211 class RegExpParserState : public ZoneObject { | 201 class RegExpParserState : public ZoneObject { |
| 212 public: | 202 public: |
| 213 RegExpParserState(RegExpParserState* previous_state, | 203 RegExpParserState(RegExpParserState* previous_state, |
| 214 SubexpressionType group_type, | 204 SubexpressionType group_type, |
| 215 RegExpLookaround::Type lookaround_type, | 205 RegExpLookaround::Type lookaround_type, |
| 216 int disjunction_capture_index, JSRegExp::Flags flags, | 206 int disjunction_capture_index, Zone* zone) |
| 217 Zone* zone) | |
| 218 : previous_state_(previous_state), | 207 : previous_state_(previous_state), |
| 219 builder_(new (zone) RegExpBuilder(zone, flags)), | 208 builder_(new (zone) RegExpBuilder(zone)), |
| 220 group_type_(group_type), | 209 group_type_(group_type), |
| 221 lookaround_type_(lookaround_type), | 210 lookaround_type_(lookaround_type), |
| 222 disjunction_capture_index_(disjunction_capture_index) {} | 211 disjunction_capture_index_(disjunction_capture_index) {} |
| 223 // Parser state of containing expression, if any. | 212 // Parser state of containing expression, if any. |
| 224 RegExpParserState* previous_state() { return previous_state_; } | 213 RegExpParserState* previous_state() { return previous_state_; } |
| 225 bool IsSubexpression() { return previous_state_ != NULL; } | 214 bool IsSubexpression() { return previous_state_ != NULL; } |
| 226 // RegExpBuilder building this regexp's AST. | 215 // RegExpBuilder building this regexp's AST. |
| 227 RegExpBuilder* builder() { return builder_; } | 216 RegExpBuilder* builder() { return builder_; } |
| 228 // Type of regexp being parsed (parenthesized group or entire regexp). | 217 // Type of regexp being parsed (parenthesized group or entire regexp). |
| 229 SubexpressionType group_type() { return group_type_; } | 218 SubexpressionType group_type() { return group_type_; } |
| (...skipping 23 matching lines...) Expand all Loading... |
| 253 // Return the 1-indexed RegExpCapture object, allocate if necessary. | 242 // Return the 1-indexed RegExpCapture object, allocate if necessary. |
| 254 RegExpCapture* GetCapture(int index); | 243 RegExpCapture* GetCapture(int index); |
| 255 | 244 |
| 256 Isolate* isolate() { return isolate_; } | 245 Isolate* isolate() { return isolate_; } |
| 257 Zone* zone() const { return zone_; } | 246 Zone* zone() const { return zone_; } |
| 258 | 247 |
| 259 uc32 current() { return current_; } | 248 uc32 current() { return current_; } |
| 260 bool has_more() { return has_more_; } | 249 bool has_more() { return has_more_; } |
| 261 bool has_next() { return next_pos_ < in()->length(); } | 250 bool has_next() { return next_pos_ < in()->length(); } |
| 262 uc32 Next(); | 251 uc32 Next(); |
| 263 template <bool update_position> | |
| 264 uc32 ReadNext(); | |
| 265 FlatStringReader* in() { return in_; } | 252 FlatStringReader* in() { return in_; } |
| 266 void ScanForCaptures(); | 253 void ScanForCaptures(); |
| 267 | 254 |
| 268 Isolate* isolate_; | 255 Isolate* isolate_; |
| 269 Zone* zone_; | 256 Zone* zone_; |
| 270 Handle<String>* error_; | 257 Handle<String>* error_; |
| 271 ZoneList<RegExpCapture*>* captures_; | 258 ZoneList<RegExpCapture*>* captures_; |
| 272 FlatStringReader* in_; | 259 FlatStringReader* in_; |
| 273 uc32 current_; | 260 uc32 current_; |
| 274 JSRegExp::Flags flags_; | |
| 275 int next_pos_; | 261 int next_pos_; |
| 276 int captures_started_; | 262 int captures_started_; |
| 277 // The capture count is only valid after we have scanned for captures. | 263 // The capture count is only valid after we have scanned for captures. |
| 278 int capture_count_; | 264 int capture_count_; |
| 279 bool has_more_; | 265 bool has_more_; |
| 266 bool multiline_; |
| 267 bool unicode_; |
| 280 bool simple_; | 268 bool simple_; |
| 281 bool contains_anchor_; | 269 bool contains_anchor_; |
| 282 bool is_scanned_for_captures_; | 270 bool is_scanned_for_captures_; |
| 283 bool failed_; | 271 bool failed_; |
| 284 }; | 272 }; |
| 285 | 273 |
| 286 } // namespace internal | 274 } // namespace internal |
| 287 } // namespace v8 | 275 } // namespace v8 |
| 288 | 276 |
| 289 #endif // V8_REGEXP_REGEXP_PARSER_H_ | 277 #endif // V8_REGEXP_REGEXP_PARSER_H_ |
| OLD | NEW |