OLD | NEW |
1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef V8_REGEXP_REGEXP_PARSER_H_ | 5 #ifndef V8_REGEXP_REGEXP_PARSER_H_ |
6 #define V8_REGEXP_REGEXP_PARSER_H_ | 6 #define V8_REGEXP_REGEXP_PARSER_H_ |
7 | 7 |
8 #include "src/objects.h" | 8 #include "src/objects.h" |
9 #include "src/regexp/regexp-ast.h" | 9 #include "src/regexp/regexp-ast.h" |
10 #include "src/zone.h" | 10 #include "src/zone.h" |
(...skipping 204 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
215 POSITIVE_LOOKAROUND, | 215 POSITIVE_LOOKAROUND, |
216 NEGATIVE_LOOKAROUND, | 216 NEGATIVE_LOOKAROUND, |
217 GROUPING | 217 GROUPING |
218 }; | 218 }; |
219 | 219 |
220 class RegExpParserState : public ZoneObject { | 220 class RegExpParserState : public ZoneObject { |
221 public: | 221 public: |
222 RegExpParserState(RegExpParserState* previous_state, | 222 RegExpParserState(RegExpParserState* previous_state, |
223 SubexpressionType group_type, | 223 SubexpressionType group_type, |
224 RegExpLookaround::Type lookaround_type, | 224 RegExpLookaround::Type lookaround_type, |
225 int disjunction_capture_index, bool ignore_case, | 225 int disjunction_capture_index, |
| 226 const ZoneVector<uc16>* capture_name, bool ignore_case, |
226 bool unicode, Zone* zone) | 227 bool unicode, Zone* zone) |
227 : previous_state_(previous_state), | 228 : previous_state_(previous_state), |
228 builder_(new (zone) RegExpBuilder(zone, ignore_case, unicode)), | 229 builder_(new (zone) RegExpBuilder(zone, ignore_case, unicode)), |
229 group_type_(group_type), | 230 group_type_(group_type), |
230 lookaround_type_(lookaround_type), | 231 lookaround_type_(lookaround_type), |
231 disjunction_capture_index_(disjunction_capture_index) {} | 232 disjunction_capture_index_(disjunction_capture_index), |
| 233 capture_name_(capture_name) {} |
232 // Parser state of containing expression, if any. | 234 // Parser state of containing expression, if any. |
233 RegExpParserState* previous_state() { return previous_state_; } | 235 RegExpParserState* previous_state() { return previous_state_; } |
234 bool IsSubexpression() { return previous_state_ != NULL; } | 236 bool IsSubexpression() { return previous_state_ != NULL; } |
235 // RegExpBuilder building this regexp's AST. | 237 // RegExpBuilder building this regexp's AST. |
236 RegExpBuilder* builder() { return builder_; } | 238 RegExpBuilder* builder() { return builder_; } |
237 // Type of regexp being parsed (parenthesized group or entire regexp). | 239 // Type of regexp being parsed (parenthesized group or entire regexp). |
238 SubexpressionType group_type() { return group_type_; } | 240 SubexpressionType group_type() { return group_type_; } |
239 // Lookahead or Lookbehind. | 241 // Lookahead or Lookbehind. |
240 RegExpLookaround::Type lookaround_type() { return lookaround_type_; } | 242 RegExpLookaround::Type lookaround_type() { return lookaround_type_; } |
241 // Index in captures array of first capture in this sub-expression, if any. | 243 // Index in captures array of first capture in this sub-expression, if any. |
242 // Also the capture index of this sub-expression itself, if group_type | 244 // Also the capture index of this sub-expression itself, if group_type |
243 // is CAPTURE. | 245 // is CAPTURE. |
244 int capture_index() { return disjunction_capture_index_; } | 246 int capture_index() { return disjunction_capture_index_; } |
| 247 // The name of the current sub-expression, if group_type is CAPTURE. Only |
| 248 // used for named captures. |
| 249 const ZoneVector<uc16>* capture_name() { return capture_name_; } |
| 250 |
| 251 bool IsNamedCapture() const { return capture_name_ != nullptr; } |
245 | 252 |
246 // Check whether the parser is inside a capture group with the given index. | 253 // Check whether the parser is inside a capture group with the given index. |
247 bool IsInsideCaptureGroup(int index); | 254 bool IsInsideCaptureGroup(int index); |
| 255 // Check whether the parser is inside a capture group with the given name. |
| 256 bool IsInsideCaptureGroup(const ZoneVector<uc16>* name); |
248 | 257 |
249 private: | 258 private: |
250 // Linked list implementation of stack of states. | 259 // Linked list implementation of stack of states. |
251 RegExpParserState* previous_state_; | 260 RegExpParserState* previous_state_; |
252 // Builder for the stored disjunction. | 261 // Builder for the stored disjunction. |
253 RegExpBuilder* builder_; | 262 RegExpBuilder* builder_; |
254 // Stored disjunction type (capture, look-ahead or grouping), if any. | 263 // Stored disjunction type (capture, look-ahead or grouping), if any. |
255 SubexpressionType group_type_; | 264 SubexpressionType group_type_; |
256 // Stored read direction. | 265 // Stored read direction. |
257 RegExpLookaround::Type lookaround_type_; | 266 RegExpLookaround::Type lookaround_type_; |
258 // Stored disjunction's capture index (if any). | 267 // Stored disjunction's capture index (if any). |
259 int disjunction_capture_index_; | 268 int disjunction_capture_index_; |
| 269 // Stored capture name (if any). |
| 270 const ZoneVector<uc16>* capture_name_; |
260 }; | 271 }; |
261 | 272 |
262 // Return the 1-indexed RegExpCapture object, allocate if necessary. | 273 // Return the 1-indexed RegExpCapture object, allocate if necessary. |
263 RegExpCapture* GetCapture(int index); | 274 RegExpCapture* GetCapture(int index); |
264 | 275 |
| 276 // Creates a new named capture at the specified index. Must be called exactly |
| 277 // once for each named capture. Fails if a capture with the same name is |
| 278 // encountered. |
| 279 bool CreateNamedCaptureAtIndex(const ZoneVector<uc16>* name, int index); |
| 280 |
| 281 // Parses the name of a capture group (?<name>pattern). The name must adhere |
| 282 // to IdentifierName in the ECMAScript standard. |
| 283 const ZoneVector<uc16>* ParseCaptureGroupName(); |
| 284 |
| 285 bool ParseNamedBackReference(RegExpBuilder* builder, |
| 286 RegExpParserState* state); |
| 287 |
| 288 // After the initial parsing pass, patch corresponding RegExpCapture objects |
| 289 // into all RegExpBackReferences. This is done after initial parsing in order |
| 290 // to avoid complicating cases in which references comes before the capture. |
| 291 void PatchNamedBackReferences(); |
| 292 |
| 293 Handle<FixedArray> CreateCaptureNameMap(); |
| 294 |
265 Isolate* isolate() { return isolate_; } | 295 Isolate* isolate() { return isolate_; } |
266 Zone* zone() const { return zone_; } | 296 Zone* zone() const { return zone_; } |
267 | 297 |
268 uc32 current() { return current_; } | 298 uc32 current() { return current_; } |
269 bool has_more() { return has_more_; } | 299 bool has_more() { return has_more_; } |
270 bool has_next() { return next_pos_ < in()->length(); } | 300 bool has_next() { return next_pos_ < in()->length(); } |
271 uc32 Next(); | 301 uc32 Next(); |
272 template <bool update_position> | 302 template <bool update_position> |
273 uc32 ReadNext(); | 303 uc32 ReadNext(); |
274 FlatStringReader* in() { return in_; } | 304 FlatStringReader* in() { return in_; } |
275 void ScanForCaptures(); | 305 void ScanForCaptures(); |
276 | 306 |
277 Isolate* isolate_; | 307 Isolate* isolate_; |
278 Zone* zone_; | 308 Zone* zone_; |
279 Handle<String>* error_; | 309 Handle<String>* error_; |
280 ZoneList<RegExpCapture*>* captures_; | 310 ZoneList<RegExpCapture*>* captures_; |
| 311 ZoneList<RegExpCapture*>* named_captures_; |
| 312 ZoneList<RegExpBackReference*>* named_back_references_; |
281 FlatStringReader* in_; | 313 FlatStringReader* in_; |
282 uc32 current_; | 314 uc32 current_; |
283 bool ignore_case_; | 315 bool ignore_case_; |
284 bool multiline_; | 316 bool multiline_; |
285 bool unicode_; | 317 bool unicode_; |
286 int next_pos_; | 318 int next_pos_; |
287 int captures_started_; | 319 int captures_started_; |
288 // The capture count is only valid after we have scanned for captures. | 320 // The capture count is only valid after we have scanned for captures. |
289 int capture_count_; | 321 int capture_count_; |
290 bool has_more_; | 322 bool has_more_; |
291 bool simple_; | 323 bool simple_; |
292 bool contains_anchor_; | 324 bool contains_anchor_; |
293 bool is_scanned_for_captures_; | 325 bool is_scanned_for_captures_; |
294 bool failed_; | 326 bool failed_; |
295 }; | 327 }; |
296 | 328 |
297 } // namespace internal | 329 } // namespace internal |
298 } // namespace v8 | 330 } // namespace v8 |
299 | 331 |
300 #endif // V8_REGEXP_REGEXP_PARSER_H_ | 332 #endif // V8_REGEXP_REGEXP_PARSER_H_ |
OLD | NEW |