| OLD | NEW |
| 1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef V8_PARSING_PARSER_H_ | 5 #ifndef V8_PARSING_PARSER_H_ |
| 6 #define V8_PARSING_PARSER_H_ | 6 #define V8_PARSING_PARSER_H_ |
| 7 | 7 |
| 8 #include "src/allocation.h" | 8 #include "src/allocation.h" |
| 9 #include "src/ast/ast.h" | 9 #include "src/ast/ast.h" |
| 10 #include "src/ast/scopes.h" | 10 #include "src/ast/scopes.h" |
| (...skipping 271 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 282 return script_data_->length() / sizeof(unsigned); | 282 return script_data_->length() / sizeof(unsigned); |
| 283 } | 283 } |
| 284 | 284 |
| 285 ScriptData* script_data_; | 285 ScriptData* script_data_; |
| 286 int function_index_; | 286 int function_index_; |
| 287 | 287 |
| 288 DISALLOW_COPY_AND_ASSIGN(ParseData); | 288 DISALLOW_COPY_AND_ASSIGN(ParseData); |
| 289 }; | 289 }; |
| 290 | 290 |
| 291 // ---------------------------------------------------------------------------- | 291 // ---------------------------------------------------------------------------- |
| 292 // REGEXP PARSING | |
| 293 | |
| 294 // A BufferedZoneList is an automatically growing list, just like (and backed | |
| 295 // by) a ZoneList, that is optimized for the case of adding and removing | |
| 296 // a single element. The last element added is stored outside the backing list, | |
| 297 // and if no more than one element is ever added, the ZoneList isn't even | |
| 298 // allocated. | |
| 299 // Elements must not be NULL pointers. | |
| 300 template <typename T, int initial_size> | |
| 301 class BufferedZoneList { | |
| 302 public: | |
| 303 BufferedZoneList() : list_(NULL), last_(NULL) {} | |
| 304 | |
| 305 // Adds element at end of list. This element is buffered and can | |
| 306 // be read using last() or removed using RemoveLast until a new Add or until | |
| 307 // RemoveLast or GetList has been called. | |
| 308 void Add(T* value, Zone* zone) { | |
| 309 if (last_ != NULL) { | |
| 310 if (list_ == NULL) { | |
| 311 list_ = new(zone) ZoneList<T*>(initial_size, zone); | |
| 312 } | |
| 313 list_->Add(last_, zone); | |
| 314 } | |
| 315 last_ = value; | |
| 316 } | |
| 317 | |
| 318 T* last() { | |
| 319 DCHECK(last_ != NULL); | |
| 320 return last_; | |
| 321 } | |
| 322 | |
| 323 T* RemoveLast() { | |
| 324 DCHECK(last_ != NULL); | |
| 325 T* result = last_; | |
| 326 if ((list_ != NULL) && (list_->length() > 0)) | |
| 327 last_ = list_->RemoveLast(); | |
| 328 else | |
| 329 last_ = NULL; | |
| 330 return result; | |
| 331 } | |
| 332 | |
| 333 T* Get(int i) { | |
| 334 DCHECK((0 <= i) && (i < length())); | |
| 335 if (list_ == NULL) { | |
| 336 DCHECK_EQ(0, i); | |
| 337 return last_; | |
| 338 } else { | |
| 339 if (i == list_->length()) { | |
| 340 DCHECK(last_ != NULL); | |
| 341 return last_; | |
| 342 } else { | |
| 343 return list_->at(i); | |
| 344 } | |
| 345 } | |
| 346 } | |
| 347 | |
| 348 void Clear() { | |
| 349 list_ = NULL; | |
| 350 last_ = NULL; | |
| 351 } | |
| 352 | |
| 353 int length() { | |
| 354 int length = (list_ == NULL) ? 0 : list_->length(); | |
| 355 return length + ((last_ == NULL) ? 0 : 1); | |
| 356 } | |
| 357 | |
| 358 ZoneList<T*>* GetList(Zone* zone) { | |
| 359 if (list_ == NULL) { | |
| 360 list_ = new(zone) ZoneList<T*>(initial_size, zone); | |
| 361 } | |
| 362 if (last_ != NULL) { | |
| 363 list_->Add(last_, zone); | |
| 364 last_ = NULL; | |
| 365 } | |
| 366 return list_; | |
| 367 } | |
| 368 | |
| 369 private: | |
| 370 ZoneList<T*>* list_; | |
| 371 T* last_; | |
| 372 }; | |
| 373 | |
| 374 | |
| 375 // Accumulates RegExp atoms and assertions into lists of terms and alternatives. | |
| 376 class RegExpBuilder: public ZoneObject { | |
| 377 public: | |
| 378 explicit RegExpBuilder(Zone* zone); | |
| 379 void AddCharacter(uc16 character); | |
| 380 // "Adds" an empty expression. Does nothing except consume a | |
| 381 // following quantifier | |
| 382 void AddEmpty(); | |
| 383 void AddAtom(RegExpTree* tree); | |
| 384 void AddAssertion(RegExpTree* tree); | |
| 385 void NewAlternative(); // '|' | |
| 386 void AddQuantifierToAtom( | |
| 387 int min, int max, RegExpQuantifier::QuantifierType type); | |
| 388 RegExpTree* ToRegExp(); | |
| 389 | |
| 390 private: | |
| 391 void FlushCharacters(); | |
| 392 void FlushText(); | |
| 393 void FlushTerms(); | |
| 394 Zone* zone() const { return zone_; } | |
| 395 | |
| 396 Zone* zone_; | |
| 397 bool pending_empty_; | |
| 398 ZoneList<uc16>* characters_; | |
| 399 BufferedZoneList<RegExpTree, 2> terms_; | |
| 400 BufferedZoneList<RegExpTree, 2> text_; | |
| 401 BufferedZoneList<RegExpTree, 2> alternatives_; | |
| 402 #ifdef DEBUG | |
| 403 enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_; | |
| 404 #define LAST(x) last_added_ = x; | |
| 405 #else | |
| 406 #define LAST(x) | |
| 407 #endif | |
| 408 }; | |
| 409 | |
| 410 | |
| 411 class RegExpParser BASE_EMBEDDED { | |
| 412 public: | |
| 413 RegExpParser(FlatStringReader* in, Handle<String>* error, bool multiline_mode, | |
| 414 bool unicode, Isolate* isolate, Zone* zone); | |
| 415 | |
| 416 static bool ParseRegExp(Isolate* isolate, Zone* zone, FlatStringReader* input, | |
| 417 bool multiline, bool unicode, | |
| 418 RegExpCompileData* result); | |
| 419 | |
| 420 RegExpTree* ParsePattern(); | |
| 421 RegExpTree* ParseDisjunction(); | |
| 422 RegExpTree* ParseGroup(); | |
| 423 RegExpTree* ParseCharacterClass(); | |
| 424 | |
| 425 // Parses a {...,...} quantifier and stores the range in the given | |
| 426 // out parameters. | |
| 427 bool ParseIntervalQuantifier(int* min_out, int* max_out); | |
| 428 | |
| 429 // Parses and returns a single escaped character. The character | |
| 430 // must not be 'b' or 'B' since they are usually handle specially. | |
| 431 uc32 ParseClassCharacterEscape(); | |
| 432 | |
| 433 // Checks whether the following is a length-digit hexadecimal number, | |
| 434 // and sets the value if it is. | |
| 435 bool ParseHexEscape(int length, uc32* value); | |
| 436 bool ParseUnicodeEscape(uc32* value); | |
| 437 bool ParseUnlimitedLengthHexNumber(int max_value, uc32* value); | |
| 438 | |
| 439 uc32 ParseOctalLiteral(); | |
| 440 | |
| 441 // Tries to parse the input as a back reference. If successful it | |
| 442 // stores the result in the output parameter and returns true. If | |
| 443 // it fails it will push back the characters read so the same characters | |
| 444 // can be reparsed. | |
| 445 bool ParseBackReferenceIndex(int* index_out); | |
| 446 | |
| 447 CharacterRange ParseClassAtom(uc16* char_class); | |
| 448 RegExpTree* ReportError(Vector<const char> message); | |
| 449 void Advance(); | |
| 450 void Advance(int dist); | |
| 451 void Reset(int pos); | |
| 452 | |
| 453 // Reports whether the pattern might be used as a literal search string. | |
| 454 // Only use if the result of the parse is a single atom node. | |
| 455 bool simple(); | |
| 456 bool contains_anchor() { return contains_anchor_; } | |
| 457 void set_contains_anchor() { contains_anchor_ = true; } | |
| 458 int captures_started() { return captures_started_; } | |
| 459 int position() { return next_pos_ - 1; } | |
| 460 bool failed() { return failed_; } | |
| 461 | |
| 462 static bool IsSyntaxCharacter(uc32 c); | |
| 463 | |
| 464 static const int kMaxCaptures = 1 << 16; | |
| 465 static const uc32 kEndMarker = (1 << 21); | |
| 466 | |
| 467 private: | |
| 468 enum SubexpressionType { | |
| 469 INITIAL, | |
| 470 CAPTURE, // All positive values represent captures. | |
| 471 POSITIVE_LOOKAROUND, | |
| 472 NEGATIVE_LOOKAROUND, | |
| 473 GROUPING | |
| 474 }; | |
| 475 | |
| 476 class RegExpParserState : public ZoneObject { | |
| 477 public: | |
| 478 RegExpParserState(RegExpParserState* previous_state, | |
| 479 SubexpressionType group_type, | |
| 480 RegExpLookaround::Type lookaround_type, | |
| 481 int disjunction_capture_index, Zone* zone) | |
| 482 : previous_state_(previous_state), | |
| 483 builder_(new (zone) RegExpBuilder(zone)), | |
| 484 group_type_(group_type), | |
| 485 lookaround_type_(lookaround_type), | |
| 486 disjunction_capture_index_(disjunction_capture_index) {} | |
| 487 // Parser state of containing expression, if any. | |
| 488 RegExpParserState* previous_state() { return previous_state_; } | |
| 489 bool IsSubexpression() { return previous_state_ != NULL; } | |
| 490 // RegExpBuilder building this regexp's AST. | |
| 491 RegExpBuilder* builder() { return builder_; } | |
| 492 // Type of regexp being parsed (parenthesized group or entire regexp). | |
| 493 SubexpressionType group_type() { return group_type_; } | |
| 494 // Lookahead or Lookbehind. | |
| 495 RegExpLookaround::Type lookaround_type() { return lookaround_type_; } | |
| 496 // Index in captures array of first capture in this sub-expression, if any. | |
| 497 // Also the capture index of this sub-expression itself, if group_type | |
| 498 // is CAPTURE. | |
| 499 int capture_index() { return disjunction_capture_index_; } | |
| 500 | |
| 501 // Check whether the parser is inside a capture group with the given index. | |
| 502 bool IsInsideCaptureGroup(int index); | |
| 503 | |
| 504 private: | |
| 505 // Linked list implementation of stack of states. | |
| 506 RegExpParserState* previous_state_; | |
| 507 // Builder for the stored disjunction. | |
| 508 RegExpBuilder* builder_; | |
| 509 // Stored disjunction type (capture, look-ahead or grouping), if any. | |
| 510 SubexpressionType group_type_; | |
| 511 // Stored read direction. | |
| 512 RegExpLookaround::Type lookaround_type_; | |
| 513 // Stored disjunction's capture index (if any). | |
| 514 int disjunction_capture_index_; | |
| 515 }; | |
| 516 | |
| 517 // Return the 1-indexed RegExpCapture object, allocate if necessary. | |
| 518 RegExpCapture* GetCapture(int index); | |
| 519 | |
| 520 Isolate* isolate() { return isolate_; } | |
| 521 Zone* zone() const { return zone_; } | |
| 522 | |
| 523 uc32 current() { return current_; } | |
| 524 bool has_more() { return has_more_; } | |
| 525 bool has_next() { return next_pos_ < in()->length(); } | |
| 526 uc32 Next(); | |
| 527 FlatStringReader* in() { return in_; } | |
| 528 void ScanForCaptures(); | |
| 529 | |
| 530 Isolate* isolate_; | |
| 531 Zone* zone_; | |
| 532 Handle<String>* error_; | |
| 533 ZoneList<RegExpCapture*>* captures_; | |
| 534 FlatStringReader* in_; | |
| 535 uc32 current_; | |
| 536 int next_pos_; | |
| 537 int captures_started_; | |
| 538 // The capture count is only valid after we have scanned for captures. | |
| 539 int capture_count_; | |
| 540 bool has_more_; | |
| 541 bool multiline_; | |
| 542 bool unicode_; | |
| 543 bool simple_; | |
| 544 bool contains_anchor_; | |
| 545 bool is_scanned_for_captures_; | |
| 546 bool failed_; | |
| 547 }; | |
| 548 | |
| 549 // ---------------------------------------------------------------------------- | |
| 550 // JAVASCRIPT PARSING | 292 // JAVASCRIPT PARSING |
| 551 | 293 |
| 552 class Parser; | 294 class Parser; |
| 553 class SingletonLogger; | 295 class SingletonLogger; |
| 554 | 296 |
| 555 | 297 |
| 556 struct ParserFormalParameters : FormalParametersBase { | 298 struct ParserFormalParameters : FormalParametersBase { |
| 557 struct Parameter { | 299 struct Parameter { |
| 558 Parameter(const AstRawString* name, Expression* pattern, | 300 Parameter(const AstRawString* name, Expression* pattern, |
| 559 Expression* initializer, int initializer_end_position, | 301 Expression* initializer, int initializer_end_position, |
| (...skipping 890 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1450 | 1192 |
| 1451 DoExpression* ParserTraits::ParseDoExpression(bool* ok) { | 1193 DoExpression* ParserTraits::ParseDoExpression(bool* ok) { |
| 1452 return parser_->ParseDoExpression(ok); | 1194 return parser_->ParseDoExpression(ok); |
| 1453 } | 1195 } |
| 1454 | 1196 |
| 1455 | 1197 |
| 1456 } // namespace internal | 1198 } // namespace internal |
| 1457 } // namespace v8 | 1199 } // namespace v8 |
| 1458 | 1200 |
| 1459 #endif // V8_PARSING_PARSER_H_ | 1201 #endif // V8_PARSING_PARSER_H_ |
| OLD | NEW |