OLD | NEW |
1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef V8_REGEXP_REGEXP_PARSER_H_ | 5 #ifndef V8_REGEXP_REGEXP_PARSER_H_ |
6 #define V8_REGEXP_REGEXP_PARSER_H_ | 6 #define V8_REGEXP_REGEXP_PARSER_H_ |
7 | 7 |
8 #include "src/objects.h" | 8 #include "src/objects.h" |
9 #include "src/regexp/regexp-ast.h" | 9 #include "src/regexp/regexp-ast.h" |
10 #include "src/zone.h" | 10 #include "src/zone.h" |
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
92 | 92 |
93 private: | 93 private: |
94 ZoneList<T*>* list_; | 94 ZoneList<T*>* list_; |
95 T* last_; | 95 T* last_; |
96 }; | 96 }; |
97 | 97 |
98 | 98 |
99 // Accumulates RegExp atoms and assertions into lists of terms and alternatives. | 99 // Accumulates RegExp atoms and assertions into lists of terms and alternatives. |
100 class RegExpBuilder : public ZoneObject { | 100 class RegExpBuilder : public ZoneObject { |
101 public: | 101 public: |
102 RegExpBuilder(Zone* zone, JSRegExp::Flags flags); | 102 explicit RegExpBuilder(Zone* zone); |
103 void AddCharacter(uc16 character); | 103 void AddCharacter(uc16 character); |
104 void AddUnicodeCharacter(uc32 character); | 104 void AddUnicodeCharacter(uc32 character); |
105 // "Adds" an empty expression. Does nothing except consume a | 105 // "Adds" an empty expression. Does nothing except consume a |
106 // following quantifier | 106 // following quantifier |
107 void AddEmpty(); | 107 void AddEmpty(); |
108 void AddCharacterClass(RegExpCharacterClass* cc); | |
109 void AddAtom(RegExpTree* tree); | 108 void AddAtom(RegExpTree* tree); |
110 void AddTerm(RegExpTree* tree); | |
111 void AddAssertion(RegExpTree* tree); | 109 void AddAssertion(RegExpTree* tree); |
112 void NewAlternative(); // '|' | 110 void NewAlternative(); // '|' |
113 void AddQuantifierToAtom(int min, int max, | 111 void AddQuantifierToAtom(int min, int max, |
114 RegExpQuantifier::QuantifierType type); | 112 RegExpQuantifier::QuantifierType type); |
115 RegExpTree* ToRegExp(); | 113 RegExpTree* ToRegExp(); |
116 | 114 |
117 private: | 115 private: |
118 static const uc16 kNoPendingSurrogate = 0; | |
119 void AddLeadSurrogate(uc16 lead_surrogate); | |
120 void AddTrailSurrogate(uc16 trail_surrogate); | |
121 void FlushPendingSurrogate(); | |
122 void FlushCharacters(); | 116 void FlushCharacters(); |
123 void FlushText(); | 117 void FlushText(); |
124 void FlushTerms(); | 118 void FlushTerms(); |
125 Zone* zone() const { return zone_; } | 119 Zone* zone() const { return zone_; } |
126 bool unicode() const { return (flags_ & JSRegExp::kUnicode) != 0; } | |
127 | 120 |
128 Zone* zone_; | 121 Zone* zone_; |
129 bool pending_empty_; | 122 bool pending_empty_; |
130 JSRegExp::Flags flags_; | |
131 ZoneList<uc16>* characters_; | 123 ZoneList<uc16>* characters_; |
132 uc16 pending_surrogate_; | |
133 BufferedZoneList<RegExpTree, 2> terms_; | 124 BufferedZoneList<RegExpTree, 2> terms_; |
134 BufferedZoneList<RegExpTree, 2> text_; | 125 BufferedZoneList<RegExpTree, 2> text_; |
135 BufferedZoneList<RegExpTree, 2> alternatives_; | 126 BufferedZoneList<RegExpTree, 2> alternatives_; |
136 #ifdef DEBUG | 127 #ifdef DEBUG |
137 enum { ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM } last_added_; | 128 enum { ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM } last_added_; |
138 #define LAST(x) last_added_ = x; | 129 #define LAST(x) last_added_ = x; |
139 #else | 130 #else |
140 #define LAST(x) | 131 #define LAST(x) |
141 #endif | 132 #endif |
142 }; | 133 }; |
143 | 134 |
144 | 135 |
145 class RegExpParser BASE_EMBEDDED { | 136 class RegExpParser BASE_EMBEDDED { |
146 public: | 137 public: |
147 RegExpParser(FlatStringReader* in, Handle<String>* error, | 138 RegExpParser(FlatStringReader* in, Handle<String>* error, bool multiline_mode, |
148 JSRegExp::Flags flags, Isolate* isolate, Zone* zone); | 139 bool unicode, Isolate* isolate, Zone* zone); |
149 | 140 |
150 static bool ParseRegExp(Isolate* isolate, Zone* zone, FlatStringReader* input, | 141 static bool ParseRegExp(Isolate* isolate, Zone* zone, FlatStringReader* input, |
151 JSRegExp::Flags flags, RegExpCompileData* result); | 142 bool multiline, bool unicode, |
| 143 RegExpCompileData* result); |
152 | 144 |
153 RegExpTree* ParsePattern(); | 145 RegExpTree* ParsePattern(); |
154 RegExpTree* ParseDisjunction(); | 146 RegExpTree* ParseDisjunction(); |
155 RegExpTree* ParseGroup(); | 147 RegExpTree* ParseGroup(); |
156 RegExpTree* ParseCharacterClass(); | 148 RegExpTree* ParseCharacterClass(); |
157 | 149 |
158 // Parses a {...,...} quantifier and stores the range in the given | 150 // Parses a {...,...} quantifier and stores the range in the given |
159 // out parameters. | 151 // out parameters. |
160 bool ParseIntervalQuantifier(int* min_out, int* max_out); | 152 bool ParseIntervalQuantifier(int* min_out, int* max_out); |
161 | 153 |
(...skipping 22 matching lines...) Expand all Loading... |
184 void Reset(int pos); | 176 void Reset(int pos); |
185 | 177 |
186 // Reports whether the pattern might be used as a literal search string. | 178 // Reports whether the pattern might be used as a literal search string. |
187 // Only use if the result of the parse is a single atom node. | 179 // Only use if the result of the parse is a single atom node. |
188 bool simple(); | 180 bool simple(); |
189 bool contains_anchor() { return contains_anchor_; } | 181 bool contains_anchor() { return contains_anchor_; } |
190 void set_contains_anchor() { contains_anchor_ = true; } | 182 void set_contains_anchor() { contains_anchor_ = true; } |
191 int captures_started() { return captures_started_; } | 183 int captures_started() { return captures_started_; } |
192 int position() { return next_pos_ - 1; } | 184 int position() { return next_pos_ - 1; } |
193 bool failed() { return failed_; } | 185 bool failed() { return failed_; } |
194 bool unicode() const { return (flags_ & JSRegExp::kUnicode) != 0; } | |
195 bool multiline() const { return (flags_ & JSRegExp::kMultiline) != 0; } | |
196 | 186 |
197 static bool IsSyntaxCharacter(uc32 c); | 187 static bool IsSyntaxCharacter(uc32 c); |
198 | 188 |
199 static const int kMaxCaptures = 1 << 16; | 189 static const int kMaxCaptures = 1 << 16; |
200 static const uc32 kEndMarker = (1 << 21); | 190 static const uc32 kEndMarker = (1 << 21); |
201 | 191 |
202 private: | 192 private: |
203 enum SubexpressionType { | 193 enum SubexpressionType { |
204 INITIAL, | 194 INITIAL, |
205 CAPTURE, // All positive values represent captures. | 195 CAPTURE, // All positive values represent captures. |
206 POSITIVE_LOOKAROUND, | 196 POSITIVE_LOOKAROUND, |
207 NEGATIVE_LOOKAROUND, | 197 NEGATIVE_LOOKAROUND, |
208 GROUPING | 198 GROUPING |
209 }; | 199 }; |
210 | 200 |
211 class RegExpParserState : public ZoneObject { | 201 class RegExpParserState : public ZoneObject { |
212 public: | 202 public: |
213 RegExpParserState(RegExpParserState* previous_state, | 203 RegExpParserState(RegExpParserState* previous_state, |
214 SubexpressionType group_type, | 204 SubexpressionType group_type, |
215 RegExpLookaround::Type lookaround_type, | 205 RegExpLookaround::Type lookaround_type, |
216 int disjunction_capture_index, JSRegExp::Flags flags, | 206 int disjunction_capture_index, Zone* zone) |
217 Zone* zone) | |
218 : previous_state_(previous_state), | 207 : previous_state_(previous_state), |
219 builder_(new (zone) RegExpBuilder(zone, flags)), | 208 builder_(new (zone) RegExpBuilder(zone)), |
220 group_type_(group_type), | 209 group_type_(group_type), |
221 lookaround_type_(lookaround_type), | 210 lookaround_type_(lookaround_type), |
222 disjunction_capture_index_(disjunction_capture_index) {} | 211 disjunction_capture_index_(disjunction_capture_index) {} |
223 // Parser state of containing expression, if any. | 212 // Parser state of containing expression, if any. |
224 RegExpParserState* previous_state() { return previous_state_; } | 213 RegExpParserState* previous_state() { return previous_state_; } |
225 bool IsSubexpression() { return previous_state_ != NULL; } | 214 bool IsSubexpression() { return previous_state_ != NULL; } |
226 // RegExpBuilder building this regexp's AST. | 215 // RegExpBuilder building this regexp's AST. |
227 RegExpBuilder* builder() { return builder_; } | 216 RegExpBuilder* builder() { return builder_; } |
228 // Type of regexp being parsed (parenthesized group or entire regexp). | 217 // Type of regexp being parsed (parenthesized group or entire regexp). |
229 SubexpressionType group_type() { return group_type_; } | 218 SubexpressionType group_type() { return group_type_; } |
(...skipping 23 matching lines...) Expand all Loading... |
253 // Return the 1-indexed RegExpCapture object, allocate if necessary. | 242 // Return the 1-indexed RegExpCapture object, allocate if necessary. |
254 RegExpCapture* GetCapture(int index); | 243 RegExpCapture* GetCapture(int index); |
255 | 244 |
256 Isolate* isolate() { return isolate_; } | 245 Isolate* isolate() { return isolate_; } |
257 Zone* zone() const { return zone_; } | 246 Zone* zone() const { return zone_; } |
258 | 247 |
259 uc32 current() { return current_; } | 248 uc32 current() { return current_; } |
260 bool has_more() { return has_more_; } | 249 bool has_more() { return has_more_; } |
261 bool has_next() { return next_pos_ < in()->length(); } | 250 bool has_next() { return next_pos_ < in()->length(); } |
262 uc32 Next(); | 251 uc32 Next(); |
263 template <bool update_position> | |
264 uc32 ReadNext(); | |
265 FlatStringReader* in() { return in_; } | 252 FlatStringReader* in() { return in_; } |
266 void ScanForCaptures(); | 253 void ScanForCaptures(); |
267 | 254 |
268 Isolate* isolate_; | 255 Isolate* isolate_; |
269 Zone* zone_; | 256 Zone* zone_; |
270 Handle<String>* error_; | 257 Handle<String>* error_; |
271 ZoneList<RegExpCapture*>* captures_; | 258 ZoneList<RegExpCapture*>* captures_; |
272 FlatStringReader* in_; | 259 FlatStringReader* in_; |
273 uc32 current_; | 260 uc32 current_; |
274 JSRegExp::Flags flags_; | |
275 int next_pos_; | 261 int next_pos_; |
276 int captures_started_; | 262 int captures_started_; |
277 // The capture count is only valid after we have scanned for captures. | 263 // The capture count is only valid after we have scanned for captures. |
278 int capture_count_; | 264 int capture_count_; |
279 bool has_more_; | 265 bool has_more_; |
| 266 bool multiline_; |
| 267 bool unicode_; |
280 bool simple_; | 268 bool simple_; |
281 bool contains_anchor_; | 269 bool contains_anchor_; |
282 bool is_scanned_for_captures_; | 270 bool is_scanned_for_captures_; |
283 bool failed_; | 271 bool failed_; |
284 }; | 272 }; |
285 | 273 |
286 } // namespace internal | 274 } // namespace internal |
287 } // namespace v8 | 275 } // namespace v8 |
288 | 276 |
289 #endif // V8_REGEXP_REGEXP_PARSER_H_ | 277 #endif // V8_REGEXP_REGEXP_PARSER_H_ |
OLD | NEW |