OLD | NEW |
1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef V8_REGEXP_REGEXP_PARSER_H_ | 5 #ifndef V8_REGEXP_REGEXP_PARSER_H_ |
6 #define V8_REGEXP_REGEXP_PARSER_H_ | 6 #define V8_REGEXP_REGEXP_PARSER_H_ |
7 | 7 |
8 #include "src/objects.h" | 8 #include "src/objects.h" |
9 #include "src/regexp/regexp-ast.h" | 9 #include "src/regexp/regexp-ast.h" |
10 #include "src/zone.h" | 10 #include "src/zone.h" |
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
92 | 92 |
93 private: | 93 private: |
94 ZoneList<T*>* list_; | 94 ZoneList<T*>* list_; |
95 T* last_; | 95 T* last_; |
96 }; | 96 }; |
97 | 97 |
98 | 98 |
99 // Accumulates RegExp atoms and assertions into lists of terms and alternatives. | 99 // Accumulates RegExp atoms and assertions into lists of terms and alternatives. |
100 class RegExpBuilder : public ZoneObject { | 100 class RegExpBuilder : public ZoneObject { |
101 public: | 101 public: |
102 RegExpBuilder(Zone* zone, JSRegExp::Flags flags); | 102 RegExpBuilder(Zone* zone, bool ignore_case, bool unicode); |
103 void AddCharacter(uc16 character); | 103 void AddCharacter(uc16 character); |
104 void AddUnicodeCharacter(uc32 character); | 104 void AddUnicodeCharacter(uc32 character); |
105 // "Adds" an empty expression. Does nothing except consume a | 105 // "Adds" an empty expression. Does nothing except consume a |
106 // following quantifier | 106 // following quantifier |
107 void AddEmpty(); | 107 void AddEmpty(); |
108 void AddCharacterClass(RegExpCharacterClass* cc); | 108 void AddCharacterClass(RegExpCharacterClass* cc); |
109 void AddCharacterClassForDesugaring(uc32 c); | 109 void AddCharacterClassForDesugaring(uc32 c); |
110 void AddAtom(RegExpTree* tree); | 110 void AddAtom(RegExpTree* tree); |
111 void AddTerm(RegExpTree* tree); | 111 void AddTerm(RegExpTree* tree); |
112 void AddAssertion(RegExpTree* tree); | 112 void AddAssertion(RegExpTree* tree); |
113 void NewAlternative(); // '|' | 113 void NewAlternative(); // '|' |
114 bool AddQuantifierToAtom(int min, int max, | 114 bool AddQuantifierToAtom(int min, int max, |
115 RegExpQuantifier::QuantifierType type); | 115 RegExpQuantifier::QuantifierType type); |
116 RegExpTree* ToRegExp(); | 116 RegExpTree* ToRegExp(); |
117 | 117 |
118 private: | 118 private: |
119 static const uc16 kNoPendingSurrogate = 0; | 119 static const uc16 kNoPendingSurrogate = 0; |
120 void AddLeadSurrogate(uc16 lead_surrogate); | 120 void AddLeadSurrogate(uc16 lead_surrogate); |
121 void AddTrailSurrogate(uc16 trail_surrogate); | 121 void AddTrailSurrogate(uc16 trail_surrogate); |
122 void FlushPendingSurrogate(); | 122 void FlushPendingSurrogate(); |
123 void FlushCharacters(); | 123 void FlushCharacters(); |
124 void FlushText(); | 124 void FlushText(); |
125 void FlushTerms(); | 125 void FlushTerms(); |
126 bool NeedsDesugaringForUnicode(RegExpCharacterClass* cc); | 126 bool NeedsDesugaringForUnicode(RegExpCharacterClass* cc); |
127 bool NeedsDesugaringForIgnoreCase(uc32 c); | 127 bool NeedsDesugaringForIgnoreCase(uc32 c); |
128 Zone* zone() const { return zone_; } | 128 Zone* zone() const { return zone_; } |
129 bool unicode() const { return (flags_ & JSRegExp::kUnicode) != 0; } | 129 bool ignore_case() const { return ignore_case_; } |
130 bool ignore_case() const { return (flags_ & JSRegExp::kIgnoreCase) != 0; } | 130 bool unicode() const { return unicode_; } |
131 | 131 |
132 Zone* zone_; | 132 Zone* zone_; |
133 bool pending_empty_; | 133 bool pending_empty_; |
134 JSRegExp::Flags flags_; | 134 bool ignore_case_; |
| 135 bool unicode_; |
135 ZoneList<uc16>* characters_; | 136 ZoneList<uc16>* characters_; |
136 uc16 pending_surrogate_; | 137 uc16 pending_surrogate_; |
137 BufferedZoneList<RegExpTree, 2> terms_; | 138 BufferedZoneList<RegExpTree, 2> terms_; |
138 BufferedZoneList<RegExpTree, 2> text_; | 139 BufferedZoneList<RegExpTree, 2> text_; |
139 BufferedZoneList<RegExpTree, 2> alternatives_; | 140 BufferedZoneList<RegExpTree, 2> alternatives_; |
140 #ifdef DEBUG | 141 #ifdef DEBUG |
141 enum { ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM } last_added_; | 142 enum { ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM } last_added_; |
142 #define LAST(x) last_added_ = x; | 143 #define LAST(x) last_added_ = x; |
143 #else | 144 #else |
144 #define LAST(x) | 145 #define LAST(x) |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
188 void Reset(int pos); | 189 void Reset(int pos); |
189 | 190 |
190 // Reports whether the pattern might be used as a literal search string. | 191 // Reports whether the pattern might be used as a literal search string. |
191 // Only use if the result of the parse is a single atom node. | 192 // Only use if the result of the parse is a single atom node. |
192 bool simple(); | 193 bool simple(); |
193 bool contains_anchor() { return contains_anchor_; } | 194 bool contains_anchor() { return contains_anchor_; } |
194 void set_contains_anchor() { contains_anchor_ = true; } | 195 void set_contains_anchor() { contains_anchor_ = true; } |
195 int captures_started() { return captures_started_; } | 196 int captures_started() { return captures_started_; } |
196 int position() { return next_pos_ - 1; } | 197 int position() { return next_pos_ - 1; } |
197 bool failed() { return failed_; } | 198 bool failed() { return failed_; } |
198 bool unicode() const { return (flags_ & JSRegExp::kUnicode) != 0; } | 199 bool ignore_case() const { return ignore_case_; } |
199 bool multiline() const { return (flags_ & JSRegExp::kMultiline) != 0; } | 200 bool multiline() const { return multiline_; } |
| 201 bool unicode() const { return unicode_; } |
200 | 202 |
201 static bool IsSyntaxCharacterOrSlash(uc32 c); | 203 static bool IsSyntaxCharacterOrSlash(uc32 c); |
202 | 204 |
203 static const int kMaxCaptures = 1 << 16; | 205 static const int kMaxCaptures = 1 << 16; |
204 static const uc32 kEndMarker = (1 << 21); | 206 static const uc32 kEndMarker = (1 << 21); |
205 | 207 |
206 private: | 208 private: |
207 enum SubexpressionType { | 209 enum SubexpressionType { |
208 INITIAL, | 210 INITIAL, |
209 CAPTURE, // All positive values represent captures. | 211 CAPTURE, // All positive values represent captures. |
210 POSITIVE_LOOKAROUND, | 212 POSITIVE_LOOKAROUND, |
211 NEGATIVE_LOOKAROUND, | 213 NEGATIVE_LOOKAROUND, |
212 GROUPING | 214 GROUPING |
213 }; | 215 }; |
214 | 216 |
215 class RegExpParserState : public ZoneObject { | 217 class RegExpParserState : public ZoneObject { |
216 public: | 218 public: |
217 RegExpParserState(RegExpParserState* previous_state, | 219 RegExpParserState(RegExpParserState* previous_state, |
218 SubexpressionType group_type, | 220 SubexpressionType group_type, |
219 RegExpLookaround::Type lookaround_type, | 221 RegExpLookaround::Type lookaround_type, |
220 int disjunction_capture_index, JSRegExp::Flags flags, | 222 int disjunction_capture_index, bool ignore_case, |
221 Zone* zone) | 223 bool unicode, Zone* zone) |
222 : previous_state_(previous_state), | 224 : previous_state_(previous_state), |
223 builder_(new (zone) RegExpBuilder(zone, flags)), | 225 builder_(new (zone) RegExpBuilder(zone, ignore_case, unicode)), |
224 group_type_(group_type), | 226 group_type_(group_type), |
225 lookaround_type_(lookaround_type), | 227 lookaround_type_(lookaround_type), |
226 disjunction_capture_index_(disjunction_capture_index) {} | 228 disjunction_capture_index_(disjunction_capture_index) {} |
227 // Parser state of containing expression, if any. | 229 // Parser state of containing expression, if any. |
228 RegExpParserState* previous_state() { return previous_state_; } | 230 RegExpParserState* previous_state() { return previous_state_; } |
229 bool IsSubexpression() { return previous_state_ != NULL; } | 231 bool IsSubexpression() { return previous_state_ != NULL; } |
230 // RegExpBuilder building this regexp's AST. | 232 // RegExpBuilder building this regexp's AST. |
231 RegExpBuilder* builder() { return builder_; } | 233 RegExpBuilder* builder() { return builder_; } |
232 // Type of regexp being parsed (parenthesized group or entire regexp). | 234 // Type of regexp being parsed (parenthesized group or entire regexp). |
233 SubexpressionType group_type() { return group_type_; } | 235 SubexpressionType group_type() { return group_type_; } |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
268 uc32 ReadNext(); | 270 uc32 ReadNext(); |
269 FlatStringReader* in() { return in_; } | 271 FlatStringReader* in() { return in_; } |
270 void ScanForCaptures(); | 272 void ScanForCaptures(); |
271 | 273 |
272 Isolate* isolate_; | 274 Isolate* isolate_; |
273 Zone* zone_; | 275 Zone* zone_; |
274 Handle<String>* error_; | 276 Handle<String>* error_; |
275 ZoneList<RegExpCapture*>* captures_; | 277 ZoneList<RegExpCapture*>* captures_; |
276 FlatStringReader* in_; | 278 FlatStringReader* in_; |
277 uc32 current_; | 279 uc32 current_; |
278 JSRegExp::Flags flags_; | 280 bool ignore_case_; |
| 281 bool multiline_; |
| 282 bool unicode_; |
279 int next_pos_; | 283 int next_pos_; |
280 int captures_started_; | 284 int captures_started_; |
281 // The capture count is only valid after we have scanned for captures. | 285 // The capture count is only valid after we have scanned for captures. |
282 int capture_count_; | 286 int capture_count_; |
283 bool has_more_; | 287 bool has_more_; |
284 bool simple_; | 288 bool simple_; |
285 bool contains_anchor_; | 289 bool contains_anchor_; |
286 bool is_scanned_for_captures_; | 290 bool is_scanned_for_captures_; |
287 bool failed_; | 291 bool failed_; |
288 }; | 292 }; |
289 | 293 |
290 } // namespace internal | 294 } // namespace internal |
291 } // namespace v8 | 295 } // namespace v8 |
292 | 296 |
293 #endif // V8_REGEXP_REGEXP_PARSER_H_ | 297 #endif // V8_REGEXP_REGEXP_PARSER_H_ |
OLD | NEW |