 Chromium Code Reviews
 Chromium Code Reviews Issue 549207:
  Added validating JSON parser mode to parser.  (Closed)
    
  
    Issue 549207:
  Added validating JSON parser mode to parser.  (Closed) 
  | OLD | NEW | 
|---|---|
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without | 
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are | 
| 4 // met: | 4 // met: | 
| 5 // | 5 // | 
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright | 
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. | 
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above | 
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following | 
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided | 
| (...skipping 305 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 316 UNREACHABLE(); | 316 UNREACHABLE(); | 
| 317 } | 317 } | 
| 318 // On fallthrough, it's a failure. | 318 // On fallthrough, it's a failure. | 
| 319 state_ = UNMATCHABLE; | 319 state_ = UNMATCHABLE; | 
| 320 } | 320 } | 
| 321 | 321 | 
| 322 | 322 | 
| 323 // ---------------------------------------------------------------------------- | 323 // ---------------------------------------------------------------------------- | 
| 324 // Scanner | 324 // Scanner | 
| 325 | 325 | 
| 326 Scanner::Scanner(bool pre) : stack_overflow_(false), is_pre_parsing_(pre) { } | 326 Scanner::Scanner(ParserMode pre) | 
| 327 : stack_overflow_(false), is_pre_parsing_(pre == PREPARSE) { } | |
| 327 | 328 | 
| 328 | 329 | 
| 329 void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream, | 330 void Scanner::Init(Handle<String> source, | 
| 330 int position) { | 331 unibrow::CharacterStream* stream, | 
| 332 int position, | |
| 333 ParserLanguage language) { | |
| 331 // Initialize the source buffer. | 334 // Initialize the source buffer. | 
| 332 if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) { | 335 if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) { | 
| 333 two_byte_string_buffer_.Initialize( | 336 two_byte_string_buffer_.Initialize( | 
| 334 Handle<ExternalTwoByteString>::cast(source)); | 337 Handle<ExternalTwoByteString>::cast(source)); | 
| 335 source_ = &two_byte_string_buffer_; | 338 source_ = &two_byte_string_buffer_; | 
| 336 } else { | 339 } else { | 
| 337 char_stream_buffer_.Initialize(source, stream); | 340 char_stream_buffer_.Initialize(source, stream); | 
| 338 source_ = &char_stream_buffer_; | 341 source_ = &char_stream_buffer_; | 
| 339 } | 342 } | 
| 340 | 343 | 
| 341 position_ = position; | 344 position_ = position; | 
| 345 is_parsing_json_ = (language == JSON); | |
| 342 | 346 | 
| 343 // Set c0_ (one character ahead) | 347 // Set c0_ (one character ahead) | 
| 344 ASSERT(kCharacterLookaheadBufferSize == 1); | 348 ASSERT(kCharacterLookaheadBufferSize == 1); | 
| 345 Advance(); | 349 Advance(); | 
| 346 // Initializer current_ to not refer to a literal buffer. | 350 // Initializer current_ to not refer to a literal buffer. | 
| 347 current_.literal_buffer = NULL; | 351 current_.literal_buffer = NULL; | 
| 348 | 352 | 
| 349 // Skip initial whitespace allowing HTML comment ends just like | 353 // Skip initial whitespace allowing HTML comment ends just like | 
| 350 // after a newline and scan first token. | 354 // after a newline and scan first token. | 
| 351 has_line_terminator_before_next_ = true; | 355 has_line_terminator_before_next_ = true; | 
| (...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 409 // Unicode character; this implies that in a Unicode context the | 413 // Unicode character; this implies that in a Unicode context the | 
| 410 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF | 414 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF | 
| 411 // character expressed in little-endian byte order (since it could | 415 // character expressed in little-endian byte order (since it could | 
| 412 // not be a U+FFFE character expressed in big-endian byte | 416 // not be a U+FFFE character expressed in big-endian byte | 
| 413 // order). Nevertheless, we check for it to be compatible with | 417 // order). Nevertheless, we check for it to be compatible with | 
| 414 // Spidermonkey. | 418 // Spidermonkey. | 
| 415 return c == 0xFEFF || c == 0xFFFE; | 419 return c == 0xFEFF || c == 0xFFFE; | 
| 416 } | 420 } | 
| 417 | 421 | 
| 418 | 422 | 
| 419 bool Scanner::SkipWhiteSpace() { | 423 bool Scanner::SkipJsonWhiteSpace() { | 
| 424 int start_position = source_pos(); | |
| 425 // JSON WhiteSpace is tab, carrige-return, newline and space. | |
| 426 while (c0_ == ' ' || c0_ == '\n' || c0_ == '\r' || c0_ == '\t') { | |
| 427 Advance(); | |
| 428 } | |
| 429 return source_pos() != start_position; | |
| 430 } | |
| 431 | |
| 432 | |
| 433 bool Scanner::SkipJavaScriptWhiteSpace() { | |
| 420 int start_position = source_pos(); | 434 int start_position = source_pos(); | 
| 421 | 435 | 
| 422 while (true) { | 436 while (true) { | 
| 423 // We treat byte-order marks (BOMs) as whitespace for better | 437 // We treat byte-order marks (BOMs) as whitespace for better | 
| 424 // compatibility with Spidermonkey and other JavaScript engines. | 438 // compatibility with Spidermonkey and other JavaScript engines. | 
| 425 while (kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) { | 439 while (kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) { | 
| 426 // IsWhiteSpace() includes line terminators! | 440 // IsWhiteSpace() includes line terminators! | 
| 427 if (kIsLineTerminator.get(c0_)) { | 441 if (kIsLineTerminator.get(c0_)) { | 
| 428 // Ignore line terminators, but remember them. This is necessary | 442 // Ignore line terminators, but remember them. This is necessary | 
| 429 // for automatic semicolon insertion. | 443 // for automatic semicolon insertion. | 
| (...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 505 Advance(); | 519 Advance(); | 
| 506 if (c0_ == '-') return SkipSingleLineComment(); | 520 if (c0_ == '-') return SkipSingleLineComment(); | 
| 507 PushBack('-'); // undo Advance() | 521 PushBack('-'); // undo Advance() | 
| 508 } | 522 } | 
| 509 PushBack('!'); // undo Advance() | 523 PushBack('!'); // undo Advance() | 
| 510 ASSERT(c0_ == '!'); | 524 ASSERT(c0_ == '!'); | 
| 511 return Token::LT; | 525 return Token::LT; | 
| 512 } | 526 } | 
| 513 | 527 | 
| 514 | 528 | 
| 515 void Scanner::Scan() { | 529 | 
| 530 void Scanner::ScanJson() { | |
| 516 next_.literal_buffer = NULL; | 531 next_.literal_buffer = NULL; | 
| 517 Token::Value token; | 532 Token::Value token; | 
| 518 has_line_terminator_before_next_ = false; | 533 has_line_terminator_before_next_ = false; | 
| 534 do { | |
| 535 // Remember the position of the next token | |
| 536 next_.location.beg_pos = source_pos(); | |
| 537 switch (c0_) { | |
| 538 case '\t': | |
| 539 case '\r': | |
| 540 case '\n': | |
| 541 case ' ': | |
| 542 Advance(); | |
| 543 token = Token::WHITESPACE; | |
| 544 break; | |
| 545 case '{': | |
| 546 Advance(); | |
| 547 token = Token::LBRACE; | |
| 548 break; | |
| 549 case '}': | |
| 550 Advance(); | |
| 551 token = Token::RBRACE; | |
| 552 break; | |
| 553 case '[': | |
| 554 Advance(); | |
| 555 token = Token::LBRACK; | |
| 556 break; | |
| 557 case ']': | |
| 558 Advance(); | |
| 559 token = Token::RBRACK; | |
| 560 break; | |
| 561 case ':': | |
| 562 Advance(); | |
| 563 token = Token::COLON; | |
| 564 break; | |
| 565 case ',': | |
| 566 Advance(); | |
| 567 token = Token::COMMA; | |
| 568 break; | |
| 569 case '"': | |
| 570 token = ScanJsonString(); | |
| 571 break; | |
| 572 case '-': | |
| 573 case '0': | |
| 574 case '1': | |
| 575 case '2': | |
| 576 case '3': | |
| 577 case '4': | |
| 578 case '5': | |
| 579 case '6': | |
| 580 case '7': | |
| 581 case '8': | |
| 582 case '9': | |
| 583 token = ScanJsonNumber(); | |
| 584 break; | |
| 585 case 't': | |
| 586 token = ScanJsonIdentifier("true", Token::TRUE_LITERAL); | |
| 587 break; | |
| 588 case 'f': | |
| 589 token = ScanJsonIdentifier("false", Token::FALSE_LITERAL); | |
| 590 break; | |
| 591 case 'n': | |
| 592 token = ScanJsonIdentifier("null", Token::NULL_LITERAL); | |
| 593 break; | |
| 594 default: | |
| 595 if (c0_ < 0) { | |
| 596 Advance(); | |
| 597 token = Token::EOS; | |
| 598 } else { | |
| 599 Advance(); | |
| 600 token = Select(Token::ILLEGAL); | |
| 601 } | |
| 602 } | |
| 603 } while (token == Token::WHITESPACE); | |
| 604 | |
| 605 next_.location.end_pos = source_pos(); | |
| 606 next_.token = token; | |
| 607 } | |
| 608 | |
| 609 | |
| 610 Token::Value Scanner::ScanJsonString() { | |
| 611 ASSERT_EQ('"', c0_); | |
| 612 Advance(); | |
| 613 StartLiteral(); | |
| 614 while (c0_ != '"' && c0_ > 0) { | |
| 615 // Check for control character (0x00-0x1f) or unterminated string (<0). | |
| 616 if (c0_ < 0x20) return Token::ILLEGAL; | |
| 617 if (c0_ != '\\') { | |
| 618 AddCharAdvance(); | |
| 619 } else { | |
| 620 Advance(); | |
| 621 switch (c0_) { | |
| 622 case '"': | |
| 623 case '\\': | |
| 624 case '/': | |
| 625 AddChar(c0_); | |
| 626 break; | |
| 627 case 'b': | |
| 628 AddChar('\x08'); | |
| 629 break; | |
| 630 case 'f': | |
| 631 AddChar('\x0c'); | |
| 632 break; | |
| 633 case 'n': | |
| 634 AddChar('\x0a'); | |
| 635 break; | |
| 636 case 'r': | |
| 637 AddChar('\x0d'); | |
| 638 break; | |
| 639 case 't': | |
| 640 AddChar('\x09'); | |
| 641 break; | |
| 642 case 'u': { | |
| 643 uc32 value = 0; | |
| 644 for (int i = 0; i < 4; i++) { | |
| 645 Advance(); | |
| 646 int digit = HexValue(c0_); | |
| 647 if (digit < 0) return Token::ILLEGAL; | |
| 648 value = value * 16 + digit; | |
| 649 } | |
| 650 AddChar(value); | |
| 651 break; | |
| 652 } | |
| 653 default: | |
| 654 return Token::ILLEGAL; | |
| 655 } | |
| 656 Advance(); | |
| 657 } | |
| 658 } | |
| 659 if (c0_ != '"') { | |
| 660 return Token::ILLEGAL; | |
| 661 } | |
| 662 TerminateLiteral(); | |
| 663 Advance(); | |
| 664 return Token::STRING; | |
| 665 } | |
| 666 | |
| 667 | |
| 668 Token::Value Scanner::ScanJsonNumber() { | |
| 669 StartLiteral(); | |
| 670 if (c0_ == '-') AddCharAdvance(); | |
| 671 if (c0_ == '0') { | |
| 672 AddCharAdvance(); | |
| 673 // Prefix zero is only allowed if it's the only digit before | |
| 674 // a decimal point or exponent. | |
| 675 if ('0' <= c0_ && c0_ <= '9') return Token::ILLEGAL; | |
| 676 } else { | |
| 677 if (c0_ < '1' || c0_ > '9') return Token::ILLEGAL; | |
| 678 do { | |
| 679 AddCharAdvance(); | |
| 680 } while (c0_ >= '0' && c0_ <= '9'); | |
| 681 } | |
| 682 if (c0_ == '.') { | |
| 683 AddCharAdvance(); | |
| 684 if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL; | |
| 685 do { | |
| 686 AddCharAdvance(); | |
| 687 } while (c0_ >= '0' && c0_ <= '9'); | |
| 688 } | |
| 689 if ((c0_ | 0x20) == 'e') { | |
| 690 AddCharAdvance(); | |
| 691 if (c0_ == '-' || c0_ == '+') AddCharAdvance(); | |
| 692 if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL; | |
| 
Rico
2010/02/01 10:40:58
According to spec this can actually be 0 (Exponent
 
Lasse Reichstein
2010/02/01 12:17:30
This code does allow a zero digit after the "e".
A
 | |
| 693 do { | |
| 694 AddCharAdvance(); | |
| 695 } while (c0_ >= '0' && c0_ <= '9'); | |
| 696 } | |
| 697 TerminateLiteral(); | |
| 698 return Token::NUMBER; | |
| 699 } | |
| 700 | |
| 701 | |
| 702 Token::Value Scanner::ScanJsonIdentifier(const char* text, | |
| 703 Token::Value token) { | |
| 704 StartLiteral(); | |
| 705 while (*text != '\0') { | |
| 706 if (c0_ != *text) return Token::ILLEGAL; | |
| 707 Advance(); | |
| 708 text++; | |
| 709 } | |
| 710 if (kIsIdentifierPart.get(c0_)) return Token::ILLEGAL; | |
| 711 TerminateLiteral(); | |
| 712 return token; | |
| 713 } | |
| 714 | |
| 715 | |
| 716 void Scanner::ScanJavaScript() { | |
| 717 next_.literal_buffer = NULL; | |
| 718 Token::Value token; | |
| 719 has_line_terminator_before_next_ = false; | |
| 519 do { | 720 do { | 
| 520 // Remember the position of the next token | 721 // Remember the position of the next token | 
| 521 next_.location.beg_pos = source_pos(); | 722 next_.location.beg_pos = source_pos(); | 
| 522 | 723 | 
| 523 switch (c0_) { | 724 switch (c0_) { | 
| 524 case ' ': | 725 case ' ': | 
| 525 case '\t': | 726 case '\t': | 
| 526 Advance(); | 727 Advance(); | 
| 527 token = Token::WHITESPACE; | 728 token = Token::WHITESPACE; | 
| 528 break; | 729 break; | 
| (...skipping 560 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1089 } | 1290 } | 
| 1090 AddCharAdvance(); | 1291 AddCharAdvance(); | 
| 1091 } | 1292 } | 
| 1092 TerminateLiteral(); | 1293 TerminateLiteral(); | 
| 1093 | 1294 | 
| 1094 next_.location.end_pos = source_pos() - 1; | 1295 next_.location.end_pos = source_pos() - 1; | 
| 1095 return true; | 1296 return true; | 
| 1096 } | 1297 } | 
| 1097 | 1298 | 
| 1098 } } // namespace v8::internal | 1299 } } // namespace v8::internal | 
| OLD | NEW |