| OLD | NEW | 
|---|
| 1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. | 
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be | 
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. | 
| 4 | 4 | 
| 5 #ifndef V8_PARSING_PARSER_H_ | 5 #ifndef V8_PARSING_PARSER_H_ | 
| 6 #define V8_PARSING_PARSER_H_ | 6 #define V8_PARSING_PARSER_H_ | 
| 7 | 7 | 
| 8 #include "src/allocation.h" | 8 #include "src/allocation.h" | 
| 9 #include "src/ast/ast.h" | 9 #include "src/ast/ast.h" | 
| 10 #include "src/ast/scopes.h" | 10 #include "src/ast/scopes.h" | 
| (...skipping 271 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 282     return script_data_->length() / sizeof(unsigned); | 282     return script_data_->length() / sizeof(unsigned); | 
| 283   } | 283   } | 
| 284 | 284 | 
| 285   ScriptData* script_data_; | 285   ScriptData* script_data_; | 
| 286   int function_index_; | 286   int function_index_; | 
| 287 | 287 | 
| 288   DISALLOW_COPY_AND_ASSIGN(ParseData); | 288   DISALLOW_COPY_AND_ASSIGN(ParseData); | 
| 289 }; | 289 }; | 
| 290 | 290 | 
| 291 // ---------------------------------------------------------------------------- | 291 // ---------------------------------------------------------------------------- | 
| 292 // REGEXP PARSING |  | 
| 293 |  | 
| 294 // A BufferedZoneList is an automatically growing list, just like (and backed |  | 
| 295 // by) a ZoneList, that is optimized for the case of adding and removing |  | 
| 296 // a single element. The last element added is stored outside the backing list, |  | 
| 297 // and if no more than one element is ever added, the ZoneList isn't even |  | 
| 298 // allocated. |  | 
| 299 // Elements must not be NULL pointers. |  | 
| 300 template <typename T, int initial_size> |  | 
| 301 class BufferedZoneList { |  | 
| 302  public: |  | 
| 303   BufferedZoneList() : list_(NULL), last_(NULL) {} |  | 
| 304 |  | 
| 305   // Adds element at end of list. This element is buffered and can |  | 
| 306   // be read using last() or removed using RemoveLast until a new Add or until |  | 
| 307   // RemoveLast or GetList has been called. |  | 
| 308   void Add(T* value, Zone* zone) { |  | 
| 309     if (last_ != NULL) { |  | 
| 310       if (list_ == NULL) { |  | 
| 311         list_ = new(zone) ZoneList<T*>(initial_size, zone); |  | 
| 312       } |  | 
| 313       list_->Add(last_, zone); |  | 
| 314     } |  | 
| 315     last_ = value; |  | 
| 316   } |  | 
| 317 |  | 
| 318   T* last() { |  | 
| 319     DCHECK(last_ != NULL); |  | 
| 320     return last_; |  | 
| 321   } |  | 
| 322 |  | 
| 323   T* RemoveLast() { |  | 
| 324     DCHECK(last_ != NULL); |  | 
| 325     T* result = last_; |  | 
| 326     if ((list_ != NULL) && (list_->length() > 0)) |  | 
| 327       last_ = list_->RemoveLast(); |  | 
| 328     else |  | 
| 329       last_ = NULL; |  | 
| 330     return result; |  | 
| 331   } |  | 
| 332 |  | 
| 333   T* Get(int i) { |  | 
| 334     DCHECK((0 <= i) && (i < length())); |  | 
| 335     if (list_ == NULL) { |  | 
| 336       DCHECK_EQ(0, i); |  | 
| 337       return last_; |  | 
| 338     } else { |  | 
| 339       if (i == list_->length()) { |  | 
| 340         DCHECK(last_ != NULL); |  | 
| 341         return last_; |  | 
| 342       } else { |  | 
| 343         return list_->at(i); |  | 
| 344       } |  | 
| 345     } |  | 
| 346   } |  | 
| 347 |  | 
| 348   void Clear() { |  | 
| 349     list_ = NULL; |  | 
| 350     last_ = NULL; |  | 
| 351   } |  | 
| 352 |  | 
| 353   int length() { |  | 
| 354     int length = (list_ == NULL) ? 0 : list_->length(); |  | 
| 355     return length + ((last_ == NULL) ? 0 : 1); |  | 
| 356   } |  | 
| 357 |  | 
| 358   ZoneList<T*>* GetList(Zone* zone) { |  | 
| 359     if (list_ == NULL) { |  | 
| 360       list_ = new(zone) ZoneList<T*>(initial_size, zone); |  | 
| 361     } |  | 
| 362     if (last_ != NULL) { |  | 
| 363       list_->Add(last_, zone); |  | 
| 364       last_ = NULL; |  | 
| 365     } |  | 
| 366     return list_; |  | 
| 367   } |  | 
| 368 |  | 
| 369  private: |  | 
| 370   ZoneList<T*>* list_; |  | 
| 371   T* last_; |  | 
| 372 }; |  | 
| 373 |  | 
| 374 |  | 
| 375 // Accumulates RegExp atoms and assertions into lists of terms and alternatives. |  | 
| 376 class RegExpBuilder: public ZoneObject { |  | 
| 377  public: |  | 
| 378   explicit RegExpBuilder(Zone* zone); |  | 
| 379   void AddCharacter(uc16 character); |  | 
| 380   // "Adds" an empty expression. Does nothing except consume a |  | 
| 381   // following quantifier |  | 
| 382   void AddEmpty(); |  | 
| 383   void AddAtom(RegExpTree* tree); |  | 
| 384   void AddAssertion(RegExpTree* tree); |  | 
| 385   void NewAlternative();  // '|' |  | 
| 386   void AddQuantifierToAtom( |  | 
| 387       int min, int max, RegExpQuantifier::QuantifierType type); |  | 
| 388   RegExpTree* ToRegExp(); |  | 
| 389 |  | 
| 390  private: |  | 
| 391   void FlushCharacters(); |  | 
| 392   void FlushText(); |  | 
| 393   void FlushTerms(); |  | 
| 394   Zone* zone() const { return zone_; } |  | 
| 395 |  | 
| 396   Zone* zone_; |  | 
| 397   bool pending_empty_; |  | 
| 398   ZoneList<uc16>* characters_; |  | 
| 399   BufferedZoneList<RegExpTree, 2> terms_; |  | 
| 400   BufferedZoneList<RegExpTree, 2> text_; |  | 
| 401   BufferedZoneList<RegExpTree, 2> alternatives_; |  | 
| 402 #ifdef DEBUG |  | 
| 403   enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_; |  | 
| 404 #define LAST(x) last_added_ = x; |  | 
| 405 #else |  | 
| 406 #define LAST(x) |  | 
| 407 #endif |  | 
| 408 }; |  | 
| 409 |  | 
| 410 |  | 
| 411 class RegExpParser BASE_EMBEDDED { |  | 
| 412  public: |  | 
| 413   RegExpParser(FlatStringReader* in, Handle<String>* error, bool multiline_mode, |  | 
| 414                bool unicode, Isolate* isolate, Zone* zone); |  | 
| 415 |  | 
| 416   static bool ParseRegExp(Isolate* isolate, Zone* zone, FlatStringReader* input, |  | 
| 417                           bool multiline, bool unicode, |  | 
| 418                           RegExpCompileData* result); |  | 
| 419 |  | 
| 420   RegExpTree* ParsePattern(); |  | 
| 421   RegExpTree* ParseDisjunction(); |  | 
| 422   RegExpTree* ParseGroup(); |  | 
| 423   RegExpTree* ParseCharacterClass(); |  | 
| 424 |  | 
| 425   // Parses a {...,...} quantifier and stores the range in the given |  | 
| 426   // out parameters. |  | 
| 427   bool ParseIntervalQuantifier(int* min_out, int* max_out); |  | 
| 428 |  | 
| 429   // Parses and returns a single escaped character.  The character |  | 
| 430   // must not be 'b' or 'B' since they are usually handle specially. |  | 
| 431   uc32 ParseClassCharacterEscape(); |  | 
| 432 |  | 
| 433   // Checks whether the following is a length-digit hexadecimal number, |  | 
| 434   // and sets the value if it is. |  | 
| 435   bool ParseHexEscape(int length, uc32* value); |  | 
| 436   bool ParseUnicodeEscape(uc32* value); |  | 
| 437   bool ParseUnlimitedLengthHexNumber(int max_value, uc32* value); |  | 
| 438 |  | 
| 439   uc32 ParseOctalLiteral(); |  | 
| 440 |  | 
| 441   // Tries to parse the input as a back reference.  If successful it |  | 
| 442   // stores the result in the output parameter and returns true.  If |  | 
| 443   // it fails it will push back the characters read so the same characters |  | 
| 444   // can be reparsed. |  | 
| 445   bool ParseBackReferenceIndex(int* index_out); |  | 
| 446 |  | 
| 447   CharacterRange ParseClassAtom(uc16* char_class); |  | 
| 448   RegExpTree* ReportError(Vector<const char> message); |  | 
| 449   void Advance(); |  | 
| 450   void Advance(int dist); |  | 
| 451   void Reset(int pos); |  | 
| 452 |  | 
| 453   // Reports whether the pattern might be used as a literal search string. |  | 
| 454   // Only use if the result of the parse is a single atom node. |  | 
| 455   bool simple(); |  | 
| 456   bool contains_anchor() { return contains_anchor_; } |  | 
| 457   void set_contains_anchor() { contains_anchor_ = true; } |  | 
| 458   int captures_started() { return captures_started_; } |  | 
| 459   int position() { return next_pos_ - 1; } |  | 
| 460   bool failed() { return failed_; } |  | 
| 461 |  | 
| 462   static bool IsSyntaxCharacter(uc32 c); |  | 
| 463 |  | 
| 464   static const int kMaxCaptures = 1 << 16; |  | 
| 465   static const uc32 kEndMarker = (1 << 21); |  | 
| 466 |  | 
| 467  private: |  | 
| 468   enum SubexpressionType { |  | 
| 469     INITIAL, |  | 
| 470     CAPTURE,  // All positive values represent captures. |  | 
| 471     POSITIVE_LOOKAROUND, |  | 
| 472     NEGATIVE_LOOKAROUND, |  | 
| 473     GROUPING |  | 
| 474   }; |  | 
| 475 |  | 
| 476   class RegExpParserState : public ZoneObject { |  | 
| 477    public: |  | 
| 478     RegExpParserState(RegExpParserState* previous_state, |  | 
| 479                       SubexpressionType group_type, |  | 
| 480                       RegExpLookaround::Type lookaround_type, |  | 
| 481                       int disjunction_capture_index, Zone* zone) |  | 
| 482         : previous_state_(previous_state), |  | 
| 483           builder_(new (zone) RegExpBuilder(zone)), |  | 
| 484           group_type_(group_type), |  | 
| 485           lookaround_type_(lookaround_type), |  | 
| 486           disjunction_capture_index_(disjunction_capture_index) {} |  | 
| 487     // Parser state of containing expression, if any. |  | 
| 488     RegExpParserState* previous_state() { return previous_state_; } |  | 
| 489     bool IsSubexpression() { return previous_state_ != NULL; } |  | 
| 490     // RegExpBuilder building this regexp's AST. |  | 
| 491     RegExpBuilder* builder() { return builder_; } |  | 
| 492     // Type of regexp being parsed (parenthesized group or entire regexp). |  | 
| 493     SubexpressionType group_type() { return group_type_; } |  | 
| 494     // Lookahead or Lookbehind. |  | 
| 495     RegExpLookaround::Type lookaround_type() { return lookaround_type_; } |  | 
| 496     // Index in captures array of first capture in this sub-expression, if any. |  | 
| 497     // Also the capture index of this sub-expression itself, if group_type |  | 
| 498     // is CAPTURE. |  | 
| 499     int capture_index() { return disjunction_capture_index_; } |  | 
| 500 |  | 
| 501     // Check whether the parser is inside a capture group with the given index. |  | 
| 502     bool IsInsideCaptureGroup(int index); |  | 
| 503 |  | 
| 504    private: |  | 
| 505     // Linked list implementation of stack of states. |  | 
| 506     RegExpParserState* previous_state_; |  | 
| 507     // Builder for the stored disjunction. |  | 
| 508     RegExpBuilder* builder_; |  | 
| 509     // Stored disjunction type (capture, look-ahead or grouping), if any. |  | 
| 510     SubexpressionType group_type_; |  | 
| 511     // Stored read direction. |  | 
| 512     RegExpLookaround::Type lookaround_type_; |  | 
| 513     // Stored disjunction's capture index (if any). |  | 
| 514     int disjunction_capture_index_; |  | 
| 515   }; |  | 
| 516 |  | 
| 517   // Return the 1-indexed RegExpCapture object, allocate if necessary. |  | 
| 518   RegExpCapture* GetCapture(int index); |  | 
| 519 |  | 
| 520   Isolate* isolate() { return isolate_; } |  | 
| 521   Zone* zone() const { return zone_; } |  | 
| 522 |  | 
| 523   uc32 current() { return current_; } |  | 
| 524   bool has_more() { return has_more_; } |  | 
| 525   bool has_next() { return next_pos_ < in()->length(); } |  | 
| 526   uc32 Next(); |  | 
| 527   FlatStringReader* in() { return in_; } |  | 
| 528   void ScanForCaptures(); |  | 
| 529 |  | 
| 530   Isolate* isolate_; |  | 
| 531   Zone* zone_; |  | 
| 532   Handle<String>* error_; |  | 
| 533   ZoneList<RegExpCapture*>* captures_; |  | 
| 534   FlatStringReader* in_; |  | 
| 535   uc32 current_; |  | 
| 536   int next_pos_; |  | 
| 537   int captures_started_; |  | 
| 538   // The capture count is only valid after we have scanned for captures. |  | 
| 539   int capture_count_; |  | 
| 540   bool has_more_; |  | 
| 541   bool multiline_; |  | 
| 542   bool unicode_; |  | 
| 543   bool simple_; |  | 
| 544   bool contains_anchor_; |  | 
| 545   bool is_scanned_for_captures_; |  | 
| 546   bool failed_; |  | 
| 547 }; |  | 
| 548 |  | 
| 549 // ---------------------------------------------------------------------------- |  | 
| 550 // JAVASCRIPT PARSING | 292 // JAVASCRIPT PARSING | 
| 551 | 293 | 
| 552 class Parser; | 294 class Parser; | 
| 553 class SingletonLogger; | 295 class SingletonLogger; | 
| 554 | 296 | 
| 555 | 297 | 
| 556 struct ParserFormalParameters : FormalParametersBase { | 298 struct ParserFormalParameters : FormalParametersBase { | 
| 557   struct Parameter { | 299   struct Parameter { | 
| 558     Parameter(const AstRawString* name, Expression* pattern, | 300     Parameter(const AstRawString* name, Expression* pattern, | 
| 559               Expression* initializer, int initializer_end_position, | 301               Expression* initializer, int initializer_end_position, | 
| (...skipping 890 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 1450 | 1192 | 
| 1451 DoExpression* ParserTraits::ParseDoExpression(bool* ok) { | 1193 DoExpression* ParserTraits::ParseDoExpression(bool* ok) { | 
| 1452   return parser_->ParseDoExpression(ok); | 1194   return parser_->ParseDoExpression(ok); | 
| 1453 } | 1195 } | 
| 1454 | 1196 | 
| 1455 | 1197 | 
| 1456 }  // namespace internal | 1198 }  // namespace internal | 
| 1457 }  // namespace v8 | 1199 }  // namespace v8 | 
| 1458 | 1200 | 
| 1459 #endif  // V8_PARSING_PARSER_H_ | 1201 #endif  // V8_PARSING_PARSER_H_ | 
| OLD | NEW | 
|---|