| OLD | NEW |
| 1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef V8_PARSER_H_ | 5 #ifndef V8_PARSER_H_ |
| 6 #define V8_PARSER_H_ | 6 #define V8_PARSER_H_ |
| 7 | 7 |
| 8 #include "src/allocation.h" | 8 #include "src/allocation.h" |
| 9 #include "src/ast.h" | 9 #include "src/ast.h" |
| 10 #include "src/compiler.h" // TODO(titzer): remove this include dependency | 10 #include "src/compiler.h" // TODO(titzer): remove this include dependency |
| (...skipping 352 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 363 | 363 |
| 364 private: | 364 private: |
| 365 ZoneList<T*>* list_; | 365 ZoneList<T*>* list_; |
| 366 T* last_; | 366 T* last_; |
| 367 }; | 367 }; |
| 368 | 368 |
| 369 | 369 |
| 370 // Accumulates RegExp atoms and assertions into lists of terms and alternatives. | 370 // Accumulates RegExp atoms and assertions into lists of terms and alternatives. |
| 371 class RegExpBuilder: public ZoneObject { | 371 class RegExpBuilder: public ZoneObject { |
| 372 public: | 372 public: |
| 373 explicit RegExpBuilder(Zone* zone); | 373 RegExpBuilder(Zone* zone, RegExpTree::ReadDirection read_direction); |
| 374 void AddCharacter(uc16 character); | 374 void AddCharacter(uc16 character); |
| 375 // "Adds" an empty expression. Does nothing except consume a | 375 // "Adds" an empty expression. Does nothing except consume a |
| 376 // following quantifier | 376 // following quantifier |
| 377 void AddEmpty(); | 377 void AddEmpty(); |
| 378 void AddAtom(RegExpTree* tree); | 378 void AddAtom(RegExpTree* tree); |
| 379 void AddAssertion(RegExpTree* tree); | 379 void AddAssertion(RegExpTree* tree); |
| 380 void NewAlternative(); // '|' | 380 void NewAlternative(); // '|' |
| 381 void AddQuantifierToAtom( | 381 void AddQuantifierToAtom( |
| 382 int min, int max, RegExpQuantifier::QuantifierType type); | 382 int min, int max, RegExpQuantifier::QuantifierType type); |
| 383 RegExpTree* ToRegExp(); | 383 RegExpTree* ToRegExp(); |
| 384 | 384 |
| 385 private: | 385 private: |
| 386 void FlushCharacters(); | 386 void FlushCharacters(); |
| 387 void FlushText(); | 387 void FlushText(); |
| 388 void FlushTerms(); | 388 void FlushTerms(); |
| 389 Zone* zone() const { return zone_; } | 389 Zone* zone() const { return zone_; } |
| 390 | 390 |
| 391 Zone* zone_; | 391 Zone* zone_; |
| 392 bool pending_empty_; | 392 bool pending_empty_; |
| 393 ZoneList<uc16>* characters_; | 393 ZoneList<uc16>* characters_; |
| 394 BufferedZoneList<RegExpTree, 2> terms_; | 394 BufferedZoneList<RegExpTree, 2> terms_; |
| 395 BufferedZoneList<RegExpTree, 2> text_; | 395 BufferedZoneList<RegExpTree, 2> text_; |
| 396 BufferedZoneList<RegExpTree, 2> alternatives_; | 396 BufferedZoneList<RegExpTree, 2> alternatives_; |
| 397 RegExpTree::ReadDirection read_direction_; |
| 397 #ifdef DEBUG | 398 #ifdef DEBUG |
| 398 enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_; | 399 enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_; |
| 399 #define LAST(x) last_added_ = x; | 400 #define LAST(x) last_added_ = x; |
| 400 #else | 401 #else |
| 401 #define LAST(x) | 402 #define LAST(x) |
| 402 #endif | 403 #endif |
| 403 }; | 404 }; |
| 404 | 405 |
| 405 | 406 |
| 406 class RegExpParser BASE_EMBEDDED { | 407 class RegExpParser BASE_EMBEDDED { |
| 407 public: | 408 public: |
| 408 RegExpParser(FlatStringReader* in, Handle<String>* error, bool multiline_mode, | 409 RegExpParser(FlatStringReader* in, Handle<String>* error, bool multiline_mode, |
| 409 bool unicode, Isolate* isolate, Zone* zone); | 410 bool unicode, Isolate* isolate, Zone* zone); |
| 410 | 411 |
| 411 static bool ParseRegExp(Isolate* isolate, Zone* zone, FlatStringReader* input, | 412 static bool ParseRegExp(Isolate* isolate, Zone* zone, FlatStringReader* input, |
| 412 bool multiline, bool unicode, | 413 bool multiline, bool unicode, |
| 413 RegExpCompileData* result); | 414 RegExpCompileData* result); |
| 414 | 415 |
| 415 RegExpTree* ParsePattern(); | 416 RegExpTree* ParsePattern(); |
| 416 RegExpTree* ParseDisjunction(); | 417 RegExpTree* ParseDisjunction(); |
| 417 RegExpTree* ParseGroup(); | 418 RegExpTree* ParseGroup(); |
| 418 RegExpTree* ParseCharacterClass(); | 419 RegExpTree* ParseCharacterClass(RegExpTree::ReadDirection read_direction); |
| 419 | 420 |
| 420 // Parses a {...,...} quantifier and stores the range in the given | 421 // Parses a {...,...} quantifier and stores the range in the given |
| 421 // out parameters. | 422 // out parameters. |
| 422 bool ParseIntervalQuantifier(int* min_out, int* max_out); | 423 bool ParseIntervalQuantifier(int* min_out, int* max_out); |
| 423 | 424 |
| 424 // Parses and returns a single escaped character. The character | 425 // Parses and returns a single escaped character. The character |
| 425 // must not be 'b' or 'B' since they are usually handle specially. | 426 // must not be 'b' or 'B' since they are usually handle specially. |
| 426 uc32 ParseClassCharacterEscape(); | 427 uc32 ParseClassCharacterEscape(); |
| 427 | 428 |
| 428 // Checks whether the following is a length-digit hexadecimal number, | 429 // Checks whether the following is a length-digit hexadecimal number, |
| (...skipping 14 matching lines...) Expand all Loading... |
| 443 RegExpTree* ReportError(Vector<const char> message); | 444 RegExpTree* ReportError(Vector<const char> message); |
| 444 void Advance(); | 445 void Advance(); |
| 445 void Advance(int dist); | 446 void Advance(int dist); |
| 446 void Reset(int pos); | 447 void Reset(int pos); |
| 447 | 448 |
| 448 // Reports whether the pattern might be used as a literal search string. | 449 // Reports whether the pattern might be used as a literal search string. |
| 449 // Only use if the result of the parse is a single atom node. | 450 // Only use if the result of the parse is a single atom node. |
| 450 bool simple(); | 451 bool simple(); |
| 451 bool contains_anchor() { return contains_anchor_; } | 452 bool contains_anchor() { return contains_anchor_; } |
| 452 void set_contains_anchor() { contains_anchor_ = true; } | 453 void set_contains_anchor() { contains_anchor_ = true; } |
| 453 int captures_started() { return captures_ == NULL ? 0 : captures_->length(); } | 454 int captures_started() { return captures_started_; } |
| 454 int position() { return next_pos_ - 1; } | 455 int position() { return next_pos_ - 1; } |
| 455 bool failed() { return failed_; } | 456 bool failed() { return failed_; } |
| 456 | 457 |
| 457 static bool IsSyntaxCharacter(uc32 c); | 458 static bool IsSyntaxCharacter(uc32 c); |
| 458 | 459 |
| 459 static const int kMaxCaptures = 1 << 16; | 460 static const int kMaxCaptures = 1 << 16; |
| 460 static const uc32 kEndMarker = (1 << 21); | 461 static const uc32 kEndMarker = (1 << 21); |
| 461 | 462 |
| 462 private: | 463 private: |
| 463 enum SubexpressionType { | 464 enum SubexpressionType { |
| 464 INITIAL, | 465 INITIAL, |
| 465 CAPTURE, // All positive values represent captures. | 466 CAPTURE, // All positive values represent captures. |
| 466 POSITIVE_LOOKAHEAD, | 467 POSITIVE_LOOKAHEAD, |
| 467 NEGATIVE_LOOKAHEAD, | 468 NEGATIVE_LOOKAHEAD, |
| 468 GROUPING | 469 GROUPING |
| 469 }; | 470 }; |
| 470 | 471 |
| 471 class RegExpParserState : public ZoneObject { | 472 class RegExpParserState : public ZoneObject { |
| 472 public: | 473 public: |
| 473 RegExpParserState(RegExpParserState* previous_state, | 474 RegExpParserState(RegExpParserState* previous_state, |
| 474 SubexpressionType group_type, | 475 SubexpressionType group_type, |
| 475 int disjunction_capture_index, | 476 RegExpTree::ReadDirection read_direction, |
| 476 Zone* zone) | 477 int disjunction_capture_index, Zone* zone) |
| 477 : previous_state_(previous_state), | 478 : previous_state_(previous_state), |
| 478 builder_(new(zone) RegExpBuilder(zone)), | 479 builder_(new (zone) RegExpBuilder(zone, read_direction)), |
| 479 group_type_(group_type), | 480 group_type_(group_type), |
| 481 read_direction_(read_direction), |
| 480 disjunction_capture_index_(disjunction_capture_index) {} | 482 disjunction_capture_index_(disjunction_capture_index) {} |
| 481 // Parser state of containing expression, if any. | 483 // Parser state of containing expression, if any. |
| 482 RegExpParserState* previous_state() { return previous_state_; } | 484 RegExpParserState* previous_state() { return previous_state_; } |
| 483 bool IsSubexpression() { return previous_state_ != NULL; } | 485 bool IsSubexpression() { return previous_state_ != NULL; } |
| 484 // RegExpBuilder building this regexp's AST. | 486 // RegExpBuilder building this regexp's AST. |
| 485 RegExpBuilder* builder() { return builder_; } | 487 RegExpBuilder* builder() { return builder_; } |
| 486 // Type of regexp being parsed (parenthesized group or entire regexp). | 488 // Type of regexp being parsed (parenthesized group or entire regexp). |
| 487 SubexpressionType group_type() { return group_type_; } | 489 SubexpressionType group_type() { return group_type_; } |
| 490 // Lookahead or Lookbehind. |
| 491 RegExpTree::ReadDirection read_direction() { return read_direction_; } |
| 488 // Index in captures array of first capture in this sub-expression, if any. | 492 // Index in captures array of first capture in this sub-expression, if any. |
| 489 // Also the capture index of this sub-expression itself, if group_type | 493 // Also the capture index of this sub-expression itself, if group_type |
| 490 // is CAPTURE. | 494 // is CAPTURE. |
| 491 int capture_index() { return disjunction_capture_index_; } | 495 int capture_index() { return disjunction_capture_index_; } |
| 492 | 496 |
| 493 private: | 497 private: |
| 494 // Linked list implementation of stack of states. | 498 // Linked list implementation of stack of states. |
| 495 RegExpParserState* previous_state_; | 499 RegExpParserState* previous_state_; |
| 496 // Builder for the stored disjunction. | 500 // Builder for the stored disjunction. |
| 497 RegExpBuilder* builder_; | 501 RegExpBuilder* builder_; |
| 498 // Stored disjunction type (capture, look-ahead or grouping), if any. | 502 // Stored disjunction type (capture, look-ahead or grouping), if any. |
| 499 SubexpressionType group_type_; | 503 SubexpressionType group_type_; |
| 504 // Stored read direction. |
| 505 RegExpTree::ReadDirection read_direction_; |
| 500 // Stored disjunction's capture index (if any). | 506 // Stored disjunction's capture index (if any). |
| 501 int disjunction_capture_index_; | 507 int disjunction_capture_index_; |
| 502 }; | 508 }; |
| 503 | 509 |
| 510 // Return the 1-indexed RegExpCapture object, allocate if necessary. |
| 511 RegExpCapture* GetCapture(int index); |
| 512 |
| 504 Isolate* isolate() { return isolate_; } | 513 Isolate* isolate() { return isolate_; } |
| 505 Zone* zone() const { return zone_; } | 514 Zone* zone() const { return zone_; } |
| 506 | 515 |
| 507 uc32 current() { return current_; } | 516 uc32 current() { return current_; } |
| 508 bool has_more() { return has_more_; } | 517 bool has_more() { return has_more_; } |
| 509 bool has_next() { return next_pos_ < in()->length(); } | 518 bool has_next() { return next_pos_ < in()->length(); } |
| 510 uc32 Next(); | 519 uc32 Next(); |
| 511 FlatStringReader* in() { return in_; } | 520 FlatStringReader* in() { return in_; } |
| 512 void ScanForCaptures(); | 521 void ScanForCaptures(); |
| 513 | 522 |
| 514 Isolate* isolate_; | 523 Isolate* isolate_; |
| 515 Zone* zone_; | 524 Zone* zone_; |
| 516 Handle<String>* error_; | 525 Handle<String>* error_; |
| 517 ZoneList<RegExpCapture*>* captures_; | 526 ZoneList<RegExpCapture*>* captures_; |
| 518 FlatStringReader* in_; | 527 FlatStringReader* in_; |
| 519 uc32 current_; | 528 uc32 current_; |
| 520 int next_pos_; | 529 int next_pos_; |
| 530 int captures_started_; |
| 521 // The capture count is only valid after we have scanned for captures. | 531 // The capture count is only valid after we have scanned for captures. |
| 522 int capture_count_; | 532 int capture_count_; |
| 523 bool has_more_; | 533 bool has_more_; |
| 524 bool multiline_; | 534 bool multiline_; |
| 525 bool unicode_; | 535 bool unicode_; |
| 526 bool simple_; | 536 bool simple_; |
| 527 bool contains_anchor_; | 537 bool contains_anchor_; |
| 528 bool is_scanned_for_captures_; | 538 bool is_scanned_for_captures_; |
| 529 bool failed_; | 539 bool failed_; |
| 530 }; | 540 }; |
| (...skipping 856 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1387 | 1397 |
| 1388 DoExpression* ParserTraits::ParseDoExpression(bool* ok) { | 1398 DoExpression* ParserTraits::ParseDoExpression(bool* ok) { |
| 1389 return parser_->ParseDoExpression(ok); | 1399 return parser_->ParseDoExpression(ok); |
| 1390 } | 1400 } |
| 1391 | 1401 |
| 1392 | 1402 |
| 1393 } // namespace internal | 1403 } // namespace internal |
| 1394 } // namespace v8 | 1404 } // namespace v8 |
| 1395 | 1405 |
| 1396 #endif // V8_PARSER_H_ | 1406 #endif // V8_PARSER_H_ |
| OLD | NEW |