Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1915)

Side by Side Diff: src/parser.h

Issue 1418963009: Experimental support for RegExp lookbehind. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: fixed test cases Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef V8_PARSER_H_ 5 #ifndef V8_PARSER_H_
6 #define V8_PARSER_H_ 6 #define V8_PARSER_H_
7 7
8 #include "src/allocation.h" 8 #include "src/allocation.h"
9 #include "src/ast.h" 9 #include "src/ast.h"
10 #include "src/compiler.h" // TODO(titzer): remove this include dependency 10 #include "src/compiler.h" // TODO(titzer): remove this include dependency
(...skipping 352 matching lines...) Expand 10 before | Expand all | Expand 10 after
363 363
364 private: 364 private:
365 ZoneList<T*>* list_; 365 ZoneList<T*>* list_;
366 T* last_; 366 T* last_;
367 }; 367 };
368 368
369 369
370 // Accumulates RegExp atoms and assertions into lists of terms and alternatives. 370 // Accumulates RegExp atoms and assertions into lists of terms and alternatives.
371 class RegExpBuilder: public ZoneObject { 371 class RegExpBuilder: public ZoneObject {
372 public: 372 public:
373 explicit RegExpBuilder(Zone* zone); 373 RegExpBuilder(Zone* zone, RegExpTree::ReadDirection read_direction);
374 void AddCharacter(uc16 character); 374 void AddCharacter(uc16 character);
375 // "Adds" an empty expression. Does nothing except consume a 375 // "Adds" an empty expression. Does nothing except consume a
376 // following quantifier 376 // following quantifier
377 void AddEmpty(); 377 void AddEmpty();
378 void AddAtom(RegExpTree* tree); 378 void AddAtom(RegExpTree* tree);
379 void AddAssertion(RegExpTree* tree); 379 void AddAssertion(RegExpTree* tree);
380 void NewAlternative(); // '|' 380 void NewAlternative(); // '|'
381 void AddQuantifierToAtom( 381 void AddQuantifierToAtom(
382 int min, int max, RegExpQuantifier::QuantifierType type); 382 int min, int max, RegExpQuantifier::QuantifierType type);
383 RegExpTree* ToRegExp(); 383 RegExpTree* ToRegExp();
384 384
385 private: 385 private:
386 void FlushCharacters(); 386 void FlushCharacters();
387 void FlushText(); 387 void FlushText();
388 void FlushTerms(); 388 void FlushTerms();
389 Zone* zone() const { return zone_; } 389 Zone* zone() const { return zone_; }
390 390
391 Zone* zone_; 391 Zone* zone_;
392 bool pending_empty_; 392 bool pending_empty_;
393 ZoneList<uc16>* characters_; 393 ZoneList<uc16>* characters_;
394 BufferedZoneList<RegExpTree, 2> terms_; 394 BufferedZoneList<RegExpTree, 2> terms_;
395 BufferedZoneList<RegExpTree, 2> text_; 395 BufferedZoneList<RegExpTree, 2> text_;
396 BufferedZoneList<RegExpTree, 2> alternatives_; 396 BufferedZoneList<RegExpTree, 2> alternatives_;
397 RegExpTree::ReadDirection read_direction_;
397 #ifdef DEBUG 398 #ifdef DEBUG
398 enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_; 399 enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_;
399 #define LAST(x) last_added_ = x; 400 #define LAST(x) last_added_ = x;
400 #else 401 #else
401 #define LAST(x) 402 #define LAST(x)
402 #endif 403 #endif
403 }; 404 };
404 405
405 406
406 class RegExpParser BASE_EMBEDDED { 407 class RegExpParser BASE_EMBEDDED {
407 public: 408 public:
408 RegExpParser(FlatStringReader* in, Handle<String>* error, bool multiline_mode, 409 RegExpParser(FlatStringReader* in, Handle<String>* error, bool multiline_mode,
409 bool unicode, Isolate* isolate, Zone* zone); 410 bool unicode, Isolate* isolate, Zone* zone);
410 411
411 static bool ParseRegExp(Isolate* isolate, Zone* zone, FlatStringReader* input, 412 static bool ParseRegExp(Isolate* isolate, Zone* zone, FlatStringReader* input,
412 bool multiline, bool unicode, 413 bool multiline, bool unicode,
413 RegExpCompileData* result); 414 RegExpCompileData* result);
414 415
415 RegExpTree* ParsePattern(); 416 RegExpTree* ParsePattern();
416 RegExpTree* ParseDisjunction(); 417 RegExpTree* ParseDisjunction();
417 RegExpTree* ParseGroup(); 418 RegExpTree* ParseGroup();
418 RegExpTree* ParseCharacterClass(); 419 RegExpTree* ParseCharacterClass(RegExpTree::ReadDirection read_direction);
419 420
420 // Parses a {...,...} quantifier and stores the range in the given 421 // Parses a {...,...} quantifier and stores the range in the given
421 // out parameters. 422 // out parameters.
422 bool ParseIntervalQuantifier(int* min_out, int* max_out); 423 bool ParseIntervalQuantifier(int* min_out, int* max_out);
423 424
424 // Parses and returns a single escaped character. The character 425 // Parses and returns a single escaped character. The character
425 // must not be 'b' or 'B' since they are usually handle specially. 426 // must not be 'b' or 'B' since they are usually handle specially.
426 uc32 ParseClassCharacterEscape(); 427 uc32 ParseClassCharacterEscape();
427 428
428 // Checks whether the following is a length-digit hexadecimal number, 429 // Checks whether the following is a length-digit hexadecimal number,
(...skipping 14 matching lines...) Expand all
443 RegExpTree* ReportError(Vector<const char> message); 444 RegExpTree* ReportError(Vector<const char> message);
444 void Advance(); 445 void Advance();
445 void Advance(int dist); 446 void Advance(int dist);
446 void Reset(int pos); 447 void Reset(int pos);
447 448
448 // Reports whether the pattern might be used as a literal search string. 449 // Reports whether the pattern might be used as a literal search string.
449 // Only use if the result of the parse is a single atom node. 450 // Only use if the result of the parse is a single atom node.
450 bool simple(); 451 bool simple();
451 bool contains_anchor() { return contains_anchor_; } 452 bool contains_anchor() { return contains_anchor_; }
452 void set_contains_anchor() { contains_anchor_ = true; } 453 void set_contains_anchor() { contains_anchor_ = true; }
453 int captures_started() { return captures_ == NULL ? 0 : captures_->length(); } 454 int captures_started() { return captures_started_; }
454 int position() { return next_pos_ - 1; } 455 int position() { return next_pos_ - 1; }
455 bool failed() { return failed_; } 456 bool failed() { return failed_; }
456 457
457 static bool IsSyntaxCharacter(uc32 c); 458 static bool IsSyntaxCharacter(uc32 c);
458 459
459 static const int kMaxCaptures = 1 << 16; 460 static const int kMaxCaptures = 1 << 16;
460 static const uc32 kEndMarker = (1 << 21); 461 static const uc32 kEndMarker = (1 << 21);
461 462
462 private: 463 private:
463 enum SubexpressionType { 464 enum SubexpressionType {
464 INITIAL, 465 INITIAL,
465 CAPTURE, // All positive values represent captures. 466 CAPTURE, // All positive values represent captures.
466 POSITIVE_LOOKAHEAD, 467 POSITIVE_LOOKAHEAD,
467 NEGATIVE_LOOKAHEAD, 468 NEGATIVE_LOOKAHEAD,
468 GROUPING 469 GROUPING
469 }; 470 };
470 471
471 class RegExpParserState : public ZoneObject { 472 class RegExpParserState : public ZoneObject {
472 public: 473 public:
473 RegExpParserState(RegExpParserState* previous_state, 474 RegExpParserState(RegExpParserState* previous_state,
474 SubexpressionType group_type, 475 SubexpressionType group_type,
475 int disjunction_capture_index, 476 RegExpTree::ReadDirection read_direction,
476 Zone* zone) 477 int disjunction_capture_index, Zone* zone)
477 : previous_state_(previous_state), 478 : previous_state_(previous_state),
478 builder_(new(zone) RegExpBuilder(zone)), 479 builder_(new (zone) RegExpBuilder(zone, read_direction)),
479 group_type_(group_type), 480 group_type_(group_type),
481 read_direction_(read_direction),
480 disjunction_capture_index_(disjunction_capture_index) {} 482 disjunction_capture_index_(disjunction_capture_index) {}
481 // Parser state of containing expression, if any. 483 // Parser state of containing expression, if any.
482 RegExpParserState* previous_state() { return previous_state_; } 484 RegExpParserState* previous_state() { return previous_state_; }
483 bool IsSubexpression() { return previous_state_ != NULL; } 485 bool IsSubexpression() { return previous_state_ != NULL; }
484 // RegExpBuilder building this regexp's AST. 486 // RegExpBuilder building this regexp's AST.
485 RegExpBuilder* builder() { return builder_; } 487 RegExpBuilder* builder() { return builder_; }
486 // Type of regexp being parsed (parenthesized group or entire regexp). 488 // Type of regexp being parsed (parenthesized group or entire regexp).
487 SubexpressionType group_type() { return group_type_; } 489 SubexpressionType group_type() { return group_type_; }
490 // Lookahead or Lookbehind.
491 RegExpTree::ReadDirection read_direction() { return read_direction_; }
488 // Index in captures array of first capture in this sub-expression, if any. 492 // Index in captures array of first capture in this sub-expression, if any.
489 // Also the capture index of this sub-expression itself, if group_type 493 // Also the capture index of this sub-expression itself, if group_type
490 // is CAPTURE. 494 // is CAPTURE.
491 int capture_index() { return disjunction_capture_index_; } 495 int capture_index() { return disjunction_capture_index_; }
492 496
493 private: 497 private:
494 // Linked list implementation of stack of states. 498 // Linked list implementation of stack of states.
495 RegExpParserState* previous_state_; 499 RegExpParserState* previous_state_;
496 // Builder for the stored disjunction. 500 // Builder for the stored disjunction.
497 RegExpBuilder* builder_; 501 RegExpBuilder* builder_;
498 // Stored disjunction type (capture, look-ahead or grouping), if any. 502 // Stored disjunction type (capture, look-ahead or grouping), if any.
499 SubexpressionType group_type_; 503 SubexpressionType group_type_;
504 // Stored read direction.
505 RegExpTree::ReadDirection read_direction_;
500 // Stored disjunction's capture index (if any). 506 // Stored disjunction's capture index (if any).
501 int disjunction_capture_index_; 507 int disjunction_capture_index_;
502 }; 508 };
503 509
510 // Return the 1-indexed RegExpCapture object, allocate if necessary.
511 RegExpCapture* GetCapture(int index);
512
504 Isolate* isolate() { return isolate_; } 513 Isolate* isolate() { return isolate_; }
505 Zone* zone() const { return zone_; } 514 Zone* zone() const { return zone_; }
506 515
507 uc32 current() { return current_; } 516 uc32 current() { return current_; }
508 bool has_more() { return has_more_; } 517 bool has_more() { return has_more_; }
509 bool has_next() { return next_pos_ < in()->length(); } 518 bool has_next() { return next_pos_ < in()->length(); }
510 uc32 Next(); 519 uc32 Next();
511 FlatStringReader* in() { return in_; } 520 FlatStringReader* in() { return in_; }
512 void ScanForCaptures(); 521 void ScanForCaptures();
513 522
514 Isolate* isolate_; 523 Isolate* isolate_;
515 Zone* zone_; 524 Zone* zone_;
516 Handle<String>* error_; 525 Handle<String>* error_;
517 ZoneList<RegExpCapture*>* captures_; 526 ZoneList<RegExpCapture*>* captures_;
518 FlatStringReader* in_; 527 FlatStringReader* in_;
519 uc32 current_; 528 uc32 current_;
520 int next_pos_; 529 int next_pos_;
530 int captures_started_;
521 // The capture count is only valid after we have scanned for captures. 531 // The capture count is only valid after we have scanned for captures.
522 int capture_count_; 532 int capture_count_;
523 bool has_more_; 533 bool has_more_;
524 bool multiline_; 534 bool multiline_;
525 bool unicode_; 535 bool unicode_;
526 bool simple_; 536 bool simple_;
527 bool contains_anchor_; 537 bool contains_anchor_;
528 bool is_scanned_for_captures_; 538 bool is_scanned_for_captures_;
529 bool failed_; 539 bool failed_;
530 }; 540 };
(...skipping 856 matching lines...) Expand 10 before | Expand all | Expand 10 after
1387 1397
1388 DoExpression* ParserTraits::ParseDoExpression(bool* ok) { 1398 DoExpression* ParserTraits::ParseDoExpression(bool* ok) {
1389 return parser_->ParseDoExpression(ok); 1399 return parser_->ParseDoExpression(ok);
1390 } 1400 }
1391 1401
1392 1402
1393 } // namespace internal 1403 } // namespace internal
1394 } // namespace v8 1404 } // namespace v8
1395 1405
1396 #endif // V8_PARSER_H_ 1406 #endif // V8_PARSER_H_
OLDNEW
« no previous file with comments | « src/flag-definitions.h ('k') | src/parser.cc » ('j') | src/regexp/jsregexp.cc » ('J')

Powered by Google App Engine
This is Rietveld 408576698