OLD | NEW |
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef V8_PARSER_H_ | 5 #ifndef V8_PARSER_H_ |
6 #define V8_PARSER_H_ | 6 #define V8_PARSER_H_ |
7 | 7 |
8 #include "src/allocation.h" | 8 #include "src/allocation.h" |
9 #include "src/ast.h" | 9 #include "src/ast.h" |
10 #include "src/compiler.h" // TODO(titzer): remove this include dependency | 10 #include "src/compiler.h" // TODO(titzer): remove this include dependency |
(...skipping 352 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
363 | 363 |
364 private: | 364 private: |
365 ZoneList<T*>* list_; | 365 ZoneList<T*>* list_; |
366 T* last_; | 366 T* last_; |
367 }; | 367 }; |
368 | 368 |
369 | 369 |
370 // Accumulates RegExp atoms and assertions into lists of terms and alternatives. | 370 // Accumulates RegExp atoms and assertions into lists of terms and alternatives. |
371 class RegExpBuilder: public ZoneObject { | 371 class RegExpBuilder: public ZoneObject { |
372 public: | 372 public: |
373 explicit RegExpBuilder(Zone* zone); | 373 RegExpBuilder(Zone* zone, RegExpTree::ReadDirection read_direction); |
374 void AddCharacter(uc16 character); | 374 void AddCharacter(uc16 character); |
375 // "Adds" an empty expression. Does nothing except consume a | 375 // "Adds" an empty expression. Does nothing except consume a |
376 // following quantifier | 376 // following quantifier |
377 void AddEmpty(); | 377 void AddEmpty(); |
378 void AddAtom(RegExpTree* tree); | 378 void AddAtom(RegExpTree* tree); |
379 void AddAssertion(RegExpTree* tree); | 379 void AddAssertion(RegExpTree* tree); |
380 void NewAlternative(); // '|' | 380 void NewAlternative(); // '|' |
381 void AddQuantifierToAtom( | 381 void AddQuantifierToAtom( |
382 int min, int max, RegExpQuantifier::QuantifierType type); | 382 int min, int max, RegExpQuantifier::QuantifierType type); |
383 RegExpTree* ToRegExp(); | 383 RegExpTree* ToRegExp(); |
384 | 384 |
385 private: | 385 private: |
386 void FlushCharacters(); | 386 void FlushCharacters(); |
387 void FlushText(); | 387 void FlushText(); |
388 void FlushTerms(); | 388 void FlushTerms(); |
389 Zone* zone() const { return zone_; } | 389 Zone* zone() const { return zone_; } |
390 | 390 |
391 Zone* zone_; | 391 Zone* zone_; |
392 bool pending_empty_; | 392 bool pending_empty_; |
393 ZoneList<uc16>* characters_; | 393 ZoneList<uc16>* characters_; |
394 BufferedZoneList<RegExpTree, 2> terms_; | 394 BufferedZoneList<RegExpTree, 2> terms_; |
395 BufferedZoneList<RegExpTree, 2> text_; | 395 BufferedZoneList<RegExpTree, 2> text_; |
396 BufferedZoneList<RegExpTree, 2> alternatives_; | 396 BufferedZoneList<RegExpTree, 2> alternatives_; |
| 397 RegExpTree::ReadDirection read_direction_; |
397 #ifdef DEBUG | 398 #ifdef DEBUG |
398 enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_; | 399 enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_; |
399 #define LAST(x) last_added_ = x; | 400 #define LAST(x) last_added_ = x; |
400 #else | 401 #else |
401 #define LAST(x) | 402 #define LAST(x) |
402 #endif | 403 #endif |
403 }; | 404 }; |
404 | 405 |
405 | 406 |
406 class RegExpParser BASE_EMBEDDED { | 407 class RegExpParser BASE_EMBEDDED { |
407 public: | 408 public: |
408 RegExpParser(FlatStringReader* in, Handle<String>* error, bool multiline_mode, | 409 RegExpParser(FlatStringReader* in, Handle<String>* error, bool multiline_mode, |
409 bool unicode, Isolate* isolate, Zone* zone); | 410 bool unicode, Isolate* isolate, Zone* zone); |
410 | 411 |
411 static bool ParseRegExp(Isolate* isolate, Zone* zone, FlatStringReader* input, | 412 static bool ParseRegExp(Isolate* isolate, Zone* zone, FlatStringReader* input, |
412 bool multiline, bool unicode, | 413 bool multiline, bool unicode, |
413 RegExpCompileData* result); | 414 RegExpCompileData* result); |
414 | 415 |
415 RegExpTree* ParsePattern(); | 416 RegExpTree* ParsePattern(); |
416 RegExpTree* ParseDisjunction(); | 417 RegExpTree* ParseDisjunction(); |
417 RegExpTree* ParseGroup(); | 418 RegExpTree* ParseGroup(); |
418 RegExpTree* ParseCharacterClass(); | 419 RegExpTree* ParseCharacterClass(RegExpTree::ReadDirection read_direction); |
419 | 420 |
420 // Parses a {...,...} quantifier and stores the range in the given | 421 // Parses a {...,...} quantifier and stores the range in the given |
421 // out parameters. | 422 // out parameters. |
422 bool ParseIntervalQuantifier(int* min_out, int* max_out); | 423 bool ParseIntervalQuantifier(int* min_out, int* max_out); |
423 | 424 |
424 // Parses and returns a single escaped character. The character | 425 // Parses and returns a single escaped character. The character |
425 // must not be 'b' or 'B' since they are usually handle specially. | 426 // must not be 'b' or 'B' since they are usually handle specially. |
426 uc32 ParseClassCharacterEscape(); | 427 uc32 ParseClassCharacterEscape(); |
427 | 428 |
428 // Checks whether the following is a length-digit hexadecimal number, | 429 // Checks whether the following is a length-digit hexadecimal number, |
(...skipping 14 matching lines...) Expand all Loading... |
443 RegExpTree* ReportError(Vector<const char> message); | 444 RegExpTree* ReportError(Vector<const char> message); |
444 void Advance(); | 445 void Advance(); |
445 void Advance(int dist); | 446 void Advance(int dist); |
446 void Reset(int pos); | 447 void Reset(int pos); |
447 | 448 |
448 // Reports whether the pattern might be used as a literal search string. | 449 // Reports whether the pattern might be used as a literal search string. |
449 // Only use if the result of the parse is a single atom node. | 450 // Only use if the result of the parse is a single atom node. |
450 bool simple(); | 451 bool simple(); |
451 bool contains_anchor() { return contains_anchor_; } | 452 bool contains_anchor() { return contains_anchor_; } |
452 void set_contains_anchor() { contains_anchor_ = true; } | 453 void set_contains_anchor() { contains_anchor_ = true; } |
453 int captures_started() { return captures_ == NULL ? 0 : captures_->length(); } | 454 int captures_started() { return captures_started_; } |
454 int position() { return next_pos_ - 1; } | 455 int position() { return next_pos_ - 1; } |
455 bool failed() { return failed_; } | 456 bool failed() { return failed_; } |
456 | 457 |
457 static bool IsSyntaxCharacter(uc32 c); | 458 static bool IsSyntaxCharacter(uc32 c); |
458 | 459 |
459 static const int kMaxCaptures = 1 << 16; | 460 static const int kMaxCaptures = 1 << 16; |
460 static const uc32 kEndMarker = (1 << 21); | 461 static const uc32 kEndMarker = (1 << 21); |
461 | 462 |
462 private: | 463 private: |
463 enum SubexpressionType { | 464 enum SubexpressionType { |
464 INITIAL, | 465 INITIAL, |
465 CAPTURE, // All positive values represent captures. | 466 CAPTURE, // All positive values represent captures. |
466 POSITIVE_LOOKAHEAD, | 467 POSITIVE_LOOKAHEAD, |
467 NEGATIVE_LOOKAHEAD, | 468 NEGATIVE_LOOKAHEAD, |
468 GROUPING | 469 GROUPING |
469 }; | 470 }; |
470 | 471 |
471 class RegExpParserState : public ZoneObject { | 472 class RegExpParserState : public ZoneObject { |
472 public: | 473 public: |
473 RegExpParserState(RegExpParserState* previous_state, | 474 RegExpParserState(RegExpParserState* previous_state, |
474 SubexpressionType group_type, | 475 SubexpressionType group_type, |
475 int disjunction_capture_index, | 476 RegExpTree::ReadDirection read_direction, |
476 Zone* zone) | 477 int disjunction_capture_index, Zone* zone) |
477 : previous_state_(previous_state), | 478 : previous_state_(previous_state), |
478 builder_(new(zone) RegExpBuilder(zone)), | 479 builder_(new (zone) RegExpBuilder(zone, read_direction)), |
479 group_type_(group_type), | 480 group_type_(group_type), |
| 481 read_direction_(read_direction), |
480 disjunction_capture_index_(disjunction_capture_index) {} | 482 disjunction_capture_index_(disjunction_capture_index) {} |
481 // Parser state of containing expression, if any. | 483 // Parser state of containing expression, if any. |
482 RegExpParserState* previous_state() { return previous_state_; } | 484 RegExpParserState* previous_state() { return previous_state_; } |
483 bool IsSubexpression() { return previous_state_ != NULL; } | 485 bool IsSubexpression() { return previous_state_ != NULL; } |
484 // RegExpBuilder building this regexp's AST. | 486 // RegExpBuilder building this regexp's AST. |
485 RegExpBuilder* builder() { return builder_; } | 487 RegExpBuilder* builder() { return builder_; } |
486 // Type of regexp being parsed (parenthesized group or entire regexp). | 488 // Type of regexp being parsed (parenthesized group or entire regexp). |
487 SubexpressionType group_type() { return group_type_; } | 489 SubexpressionType group_type() { return group_type_; } |
| 490 // Lookahead or Lookbehind. |
| 491 RegExpTree::ReadDirection read_direction() { return read_direction_; } |
488 // Index in captures array of first capture in this sub-expression, if any. | 492 // Index in captures array of first capture in this sub-expression, if any. |
489 // Also the capture index of this sub-expression itself, if group_type | 493 // Also the capture index of this sub-expression itself, if group_type |
490 // is CAPTURE. | 494 // is CAPTURE. |
491 int capture_index() { return disjunction_capture_index_; } | 495 int capture_index() { return disjunction_capture_index_; } |
492 | 496 |
493 private: | 497 private: |
494 // Linked list implementation of stack of states. | 498 // Linked list implementation of stack of states. |
495 RegExpParserState* previous_state_; | 499 RegExpParserState* previous_state_; |
496 // Builder for the stored disjunction. | 500 // Builder for the stored disjunction. |
497 RegExpBuilder* builder_; | 501 RegExpBuilder* builder_; |
498 // Stored disjunction type (capture, look-ahead or grouping), if any. | 502 // Stored disjunction type (capture, look-ahead or grouping), if any. |
499 SubexpressionType group_type_; | 503 SubexpressionType group_type_; |
| 504 // Stored read direction. |
| 505 RegExpTree::ReadDirection read_direction_; |
500 // Stored disjunction's capture index (if any). | 506 // Stored disjunction's capture index (if any). |
501 int disjunction_capture_index_; | 507 int disjunction_capture_index_; |
502 }; | 508 }; |
503 | 509 |
| 510 // Return the 1-indexed RegExpCapture object, allocate if necessary. |
| 511 RegExpCapture* GetCapture(int index); |
| 512 |
504 Isolate* isolate() { return isolate_; } | 513 Isolate* isolate() { return isolate_; } |
505 Zone* zone() const { return zone_; } | 514 Zone* zone() const { return zone_; } |
506 | 515 |
507 uc32 current() { return current_; } | 516 uc32 current() { return current_; } |
508 bool has_more() { return has_more_; } | 517 bool has_more() { return has_more_; } |
509 bool has_next() { return next_pos_ < in()->length(); } | 518 bool has_next() { return next_pos_ < in()->length(); } |
510 uc32 Next(); | 519 uc32 Next(); |
511 FlatStringReader* in() { return in_; } | 520 FlatStringReader* in() { return in_; } |
512 void ScanForCaptures(); | 521 void ScanForCaptures(); |
513 | 522 |
514 Isolate* isolate_; | 523 Isolate* isolate_; |
515 Zone* zone_; | 524 Zone* zone_; |
516 Handle<String>* error_; | 525 Handle<String>* error_; |
517 ZoneList<RegExpCapture*>* captures_; | 526 ZoneList<RegExpCapture*>* captures_; |
518 FlatStringReader* in_; | 527 FlatStringReader* in_; |
519 uc32 current_; | 528 uc32 current_; |
520 int next_pos_; | 529 int next_pos_; |
| 530 int captures_started_; |
521 // The capture count is only valid after we have scanned for captures. | 531 // The capture count is only valid after we have scanned for captures. |
522 int capture_count_; | 532 int capture_count_; |
523 bool has_more_; | 533 bool has_more_; |
524 bool multiline_; | 534 bool multiline_; |
525 bool unicode_; | 535 bool unicode_; |
526 bool simple_; | 536 bool simple_; |
527 bool contains_anchor_; | 537 bool contains_anchor_; |
528 bool is_scanned_for_captures_; | 538 bool is_scanned_for_captures_; |
529 bool failed_; | 539 bool failed_; |
530 }; | 540 }; |
(...skipping 856 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1387 | 1397 |
1388 DoExpression* ParserTraits::ParseDoExpression(bool* ok) { | 1398 DoExpression* ParserTraits::ParseDoExpression(bool* ok) { |
1389 return parser_->ParseDoExpression(ok); | 1399 return parser_->ParseDoExpression(ok); |
1390 } | 1400 } |
1391 | 1401 |
1392 | 1402 |
1393 } // namespace internal | 1403 } // namespace internal |
1394 } // namespace v8 | 1404 } // namespace v8 |
1395 | 1405 |
1396 #endif // V8_PARSER_H_ | 1406 #endif // V8_PARSER_H_ |
OLD | NEW |