| OLD | NEW |
| 1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef V8_REGEXP_REGEXP_PARSER_H_ | 5 #ifndef V8_REGEXP_REGEXP_PARSER_H_ |
| 6 #define V8_REGEXP_REGEXP_PARSER_H_ | 6 #define V8_REGEXP_REGEXP_PARSER_H_ |
| 7 | 7 |
| 8 #include "src/objects.h" | 8 #include "src/objects.h" |
| 9 #include "src/regexp/regexp-ast.h" | 9 #include "src/regexp/regexp-ast.h" |
| 10 #include "src/zone.h" | 10 #include "src/zone.h" |
| (...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 92 | 92 |
| 93 private: | 93 private: |
| 94 ZoneList<T*>* list_; | 94 ZoneList<T*>* list_; |
| 95 T* last_; | 95 T* last_; |
| 96 }; | 96 }; |
| 97 | 97 |
| 98 | 98 |
| 99 // Accumulates RegExp atoms and assertions into lists of terms and alternatives. | 99 // Accumulates RegExp atoms and assertions into lists of terms and alternatives. |
| 100 class RegExpBuilder : public ZoneObject { | 100 class RegExpBuilder : public ZoneObject { |
| 101 public: | 101 public: |
| 102 RegExpBuilder(Zone* zone, JSRegExp::Flags flags); | 102 RegExpBuilder(Zone* zone, bool ignore_case, bool unicode); |
| 103 void AddCharacter(uc16 character); | 103 void AddCharacter(uc16 character); |
| 104 void AddUnicodeCharacter(uc32 character); | 104 void AddUnicodeCharacter(uc32 character); |
| 105 // "Adds" an empty expression. Does nothing except consume a | 105 // "Adds" an empty expression. Does nothing except consume a |
| 106 // following quantifier | 106 // following quantifier |
| 107 void AddEmpty(); | 107 void AddEmpty(); |
| 108 void AddCharacterClass(RegExpCharacterClass* cc); | 108 void AddCharacterClass(RegExpCharacterClass* cc); |
| 109 void AddCharacterClassForDesugaring(uc32 c); | 109 void AddCharacterClassForDesugaring(uc32 c); |
| 110 void AddAtom(RegExpTree* tree); | 110 void AddAtom(RegExpTree* tree); |
| 111 void AddTerm(RegExpTree* tree); | 111 void AddTerm(RegExpTree* tree); |
| 112 void AddAssertion(RegExpTree* tree); | 112 void AddAssertion(RegExpTree* tree); |
| 113 void NewAlternative(); // '|' | 113 void NewAlternative(); // '|' |
| 114 bool AddQuantifierToAtom(int min, int max, | 114 bool AddQuantifierToAtom(int min, int max, |
| 115 RegExpQuantifier::QuantifierType type); | 115 RegExpQuantifier::QuantifierType type); |
| 116 RegExpTree* ToRegExp(); | 116 RegExpTree* ToRegExp(); |
| 117 | 117 |
| 118 private: | 118 private: |
| 119 static const uc16 kNoPendingSurrogate = 0; | 119 static const uc16 kNoPendingSurrogate = 0; |
| 120 void AddLeadSurrogate(uc16 lead_surrogate); | 120 void AddLeadSurrogate(uc16 lead_surrogate); |
| 121 void AddTrailSurrogate(uc16 trail_surrogate); | 121 void AddTrailSurrogate(uc16 trail_surrogate); |
| 122 void FlushPendingSurrogate(); | 122 void FlushPendingSurrogate(); |
| 123 void FlushCharacters(); | 123 void FlushCharacters(); |
| 124 void FlushText(); | 124 void FlushText(); |
| 125 void FlushTerms(); | 125 void FlushTerms(); |
| 126 bool NeedsDesugaringForUnicode(RegExpCharacterClass* cc); | 126 bool NeedsDesugaringForUnicode(RegExpCharacterClass* cc); |
| 127 bool NeedsDesugaringForIgnoreCase(uc32 c); | 127 bool NeedsDesugaringForIgnoreCase(uc32 c); |
| 128 Zone* zone() const { return zone_; } | 128 Zone* zone() const { return zone_; } |
| 129 bool unicode() const { return (flags_ & JSRegExp::kUnicode) != 0; } | 129 bool ignore_case() const { return ignore_case_; } |
| 130 bool ignore_case() const { return (flags_ & JSRegExp::kIgnoreCase) != 0; } | 130 bool unicode() const { return unicode_; } |
| 131 | 131 |
| 132 Zone* zone_; | 132 Zone* zone_; |
| 133 bool pending_empty_; | 133 bool pending_empty_; |
| 134 JSRegExp::Flags flags_; | 134 bool ignore_case_; |
| 135 bool unicode_; |
| 135 ZoneList<uc16>* characters_; | 136 ZoneList<uc16>* characters_; |
| 136 uc16 pending_surrogate_; | 137 uc16 pending_surrogate_; |
| 137 BufferedZoneList<RegExpTree, 2> terms_; | 138 BufferedZoneList<RegExpTree, 2> terms_; |
| 138 BufferedZoneList<RegExpTree, 2> text_; | 139 BufferedZoneList<RegExpTree, 2> text_; |
| 139 BufferedZoneList<RegExpTree, 2> alternatives_; | 140 BufferedZoneList<RegExpTree, 2> alternatives_; |
| 140 #ifdef DEBUG | 141 #ifdef DEBUG |
| 141 enum { ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM } last_added_; | 142 enum { ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM } last_added_; |
| 142 #define LAST(x) last_added_ = x; | 143 #define LAST(x) last_added_ = x; |
| 143 #else | 144 #else |
| 144 #define LAST(x) | 145 #define LAST(x) |
| (...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 188 void Reset(int pos); | 189 void Reset(int pos); |
| 189 | 190 |
| 190 // Reports whether the pattern might be used as a literal search string. | 191 // Reports whether the pattern might be used as a literal search string. |
| 191 // Only use if the result of the parse is a single atom node. | 192 // Only use if the result of the parse is a single atom node. |
| 192 bool simple(); | 193 bool simple(); |
| 193 bool contains_anchor() { return contains_anchor_; } | 194 bool contains_anchor() { return contains_anchor_; } |
| 194 void set_contains_anchor() { contains_anchor_ = true; } | 195 void set_contains_anchor() { contains_anchor_ = true; } |
| 195 int captures_started() { return captures_started_; } | 196 int captures_started() { return captures_started_; } |
| 196 int position() { return next_pos_ - 1; } | 197 int position() { return next_pos_ - 1; } |
| 197 bool failed() { return failed_; } | 198 bool failed() { return failed_; } |
| 198 bool unicode() const { return (flags_ & JSRegExp::kUnicode) != 0; } | 199 bool ignore_case() const { return ignore_case_; } |
| 199 bool multiline() const { return (flags_ & JSRegExp::kMultiline) != 0; } | 200 bool multiline() const { return multiline_; } |
| 201 bool unicode() const { return unicode_; } |
| 200 | 202 |
| 201 static bool IsSyntaxCharacterOrSlash(uc32 c); | 203 static bool IsSyntaxCharacterOrSlash(uc32 c); |
| 202 | 204 |
| 203 static const int kMaxCaptures = 1 << 16; | 205 static const int kMaxCaptures = 1 << 16; |
| 204 static const uc32 kEndMarker = (1 << 21); | 206 static const uc32 kEndMarker = (1 << 21); |
| 205 | 207 |
| 206 private: | 208 private: |
| 207 enum SubexpressionType { | 209 enum SubexpressionType { |
| 208 INITIAL, | 210 INITIAL, |
| 209 CAPTURE, // All positive values represent captures. | 211 CAPTURE, // All positive values represent captures. |
| 210 POSITIVE_LOOKAROUND, | 212 POSITIVE_LOOKAROUND, |
| 211 NEGATIVE_LOOKAROUND, | 213 NEGATIVE_LOOKAROUND, |
| 212 GROUPING | 214 GROUPING |
| 213 }; | 215 }; |
| 214 | 216 |
| 215 class RegExpParserState : public ZoneObject { | 217 class RegExpParserState : public ZoneObject { |
| 216 public: | 218 public: |
| 217 RegExpParserState(RegExpParserState* previous_state, | 219 RegExpParserState(RegExpParserState* previous_state, |
| 218 SubexpressionType group_type, | 220 SubexpressionType group_type, |
| 219 RegExpLookaround::Type lookaround_type, | 221 RegExpLookaround::Type lookaround_type, |
| 220 int disjunction_capture_index, JSRegExp::Flags flags, | 222 int disjunction_capture_index, bool ignore_case, |
| 221 Zone* zone) | 223 bool unicode, Zone* zone) |
| 222 : previous_state_(previous_state), | 224 : previous_state_(previous_state), |
| 223 builder_(new (zone) RegExpBuilder(zone, flags)), | 225 builder_(new (zone) RegExpBuilder(zone, ignore_case, unicode)), |
| 224 group_type_(group_type), | 226 group_type_(group_type), |
| 225 lookaround_type_(lookaround_type), | 227 lookaround_type_(lookaround_type), |
| 226 disjunction_capture_index_(disjunction_capture_index) {} | 228 disjunction_capture_index_(disjunction_capture_index) {} |
| 227 // Parser state of containing expression, if any. | 229 // Parser state of containing expression, if any. |
| 228 RegExpParserState* previous_state() { return previous_state_; } | 230 RegExpParserState* previous_state() { return previous_state_; } |
| 229 bool IsSubexpression() { return previous_state_ != NULL; } | 231 bool IsSubexpression() { return previous_state_ != NULL; } |
| 230 // RegExpBuilder building this regexp's AST. | 232 // RegExpBuilder building this regexp's AST. |
| 231 RegExpBuilder* builder() { return builder_; } | 233 RegExpBuilder* builder() { return builder_; } |
| 232 // Type of regexp being parsed (parenthesized group or entire regexp). | 234 // Type of regexp being parsed (parenthesized group or entire regexp). |
| 233 SubexpressionType group_type() { return group_type_; } | 235 SubexpressionType group_type() { return group_type_; } |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 268 uc32 ReadNext(); | 270 uc32 ReadNext(); |
| 269 FlatStringReader* in() { return in_; } | 271 FlatStringReader* in() { return in_; } |
| 270 void ScanForCaptures(); | 272 void ScanForCaptures(); |
| 271 | 273 |
| 272 Isolate* isolate_; | 274 Isolate* isolate_; |
| 273 Zone* zone_; | 275 Zone* zone_; |
| 274 Handle<String>* error_; | 276 Handle<String>* error_; |
| 275 ZoneList<RegExpCapture*>* captures_; | 277 ZoneList<RegExpCapture*>* captures_; |
| 276 FlatStringReader* in_; | 278 FlatStringReader* in_; |
| 277 uc32 current_; | 279 uc32 current_; |
| 278 JSRegExp::Flags flags_; | 280 bool ignore_case_; |
| 281 bool multiline_; |
| 282 bool unicode_; |
| 279 int next_pos_; | 283 int next_pos_; |
| 280 int captures_started_; | 284 int captures_started_; |
| 281 // The capture count is only valid after we have scanned for captures. | 285 // The capture count is only valid after we have scanned for captures. |
| 282 int capture_count_; | 286 int capture_count_; |
| 283 bool has_more_; | 287 bool has_more_; |
| 284 bool simple_; | 288 bool simple_; |
| 285 bool contains_anchor_; | 289 bool contains_anchor_; |
| 286 bool is_scanned_for_captures_; | 290 bool is_scanned_for_captures_; |
| 287 bool failed_; | 291 bool failed_; |
| 288 }; | 292 }; |
| 289 | 293 |
| 290 } // namespace internal | 294 } // namespace internal |
| 291 } // namespace v8 | 295 } // namespace v8 |
| 292 | 296 |
| 293 #endif // V8_REGEXP_REGEXP_PARSER_H_ | 297 #endif // V8_REGEXP_REGEXP_PARSER_H_ |
| OLD | NEW |