OLD | NEW |
---|---|
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 305 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
316 UNREACHABLE(); | 316 UNREACHABLE(); |
317 } | 317 } |
318 // On fallthrough, it's a failure. | 318 // On fallthrough, it's a failure. |
319 state_ = UNMATCHABLE; | 319 state_ = UNMATCHABLE; |
320 } | 320 } |
321 | 321 |
322 | 322 |
323 // ---------------------------------------------------------------------------- | 323 // ---------------------------------------------------------------------------- |
324 // Scanner | 324 // Scanner |
325 | 325 |
326 Scanner::Scanner(bool pre) : stack_overflow_(false), is_pre_parsing_(pre) { } | 326 Scanner::Scanner(ParserMode pre) |
327 : stack_overflow_(false), is_pre_parsing_(pre == PREPARSE) { } | |
327 | 328 |
328 | 329 |
329 void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream, | 330 void Scanner::Init(Handle<String> source, |
330 int position) { | 331 unibrow::CharacterStream* stream, |
332 int position, | |
333 ParserLanguage language) { | |
331 // Initialize the source buffer. | 334 // Initialize the source buffer. |
332 if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) { | 335 if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) { |
333 two_byte_string_buffer_.Initialize( | 336 two_byte_string_buffer_.Initialize( |
334 Handle<ExternalTwoByteString>::cast(source)); | 337 Handle<ExternalTwoByteString>::cast(source)); |
335 source_ = &two_byte_string_buffer_; | 338 source_ = &two_byte_string_buffer_; |
336 } else { | 339 } else { |
337 char_stream_buffer_.Initialize(source, stream); | 340 char_stream_buffer_.Initialize(source, stream); |
338 source_ = &char_stream_buffer_; | 341 source_ = &char_stream_buffer_; |
339 } | 342 } |
340 | 343 |
341 position_ = position; | 344 position_ = position; |
345 is_parsing_json_ = (language == JSON); | |
342 | 346 |
343 // Set c0_ (one character ahead) | 347 // Set c0_ (one character ahead) |
344 ASSERT(kCharacterLookaheadBufferSize == 1); | 348 ASSERT(kCharacterLookaheadBufferSize == 1); |
345 Advance(); | 349 Advance(); |
346 // Initializer current_ to not refer to a literal buffer. | 350 // Initializer current_ to not refer to a literal buffer. |
347 current_.literal_buffer = NULL; | 351 current_.literal_buffer = NULL; |
348 | 352 |
349 // Skip initial whitespace allowing HTML comment ends just like | 353 // Skip initial whitespace allowing HTML comment ends just like |
350 // after a newline and scan first token. | 354 // after a newline and scan first token. |
351 has_line_terminator_before_next_ = true; | 355 has_line_terminator_before_next_ = true; |
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
409 // Unicode character; this implies that in a Unicode context the | 413 // Unicode character; this implies that in a Unicode context the |
410 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF | 414 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF |
411 // character expressed in little-endian byte order (since it could | 415 // character expressed in little-endian byte order (since it could |
412 // not be a U+FFFE character expressed in big-endian byte | 416 // not be a U+FFFE character expressed in big-endian byte |
413 // order). Nevertheless, we check for it to be compatible with | 417 // order). Nevertheless, we check for it to be compatible with |
414 // Spidermonkey. | 418 // Spidermonkey. |
415 return c == 0xFEFF || c == 0xFFFE; | 419 return c == 0xFEFF || c == 0xFFFE; |
416 } | 420 } |
417 | 421 |
418 | 422 |
419 bool Scanner::SkipWhiteSpace() { | 423 bool Scanner::SkipJsonWhiteSpace() { |
424 int start_position = source_pos(); | |
425 // JSON WhiteSpace is tab, carrige-return, newline and space. | |
426 while (c0_ == ' ' || c0_ == '\n' || c0_ == '\r' || c0_ == '\t') { | |
427 Advance(); | |
428 } | |
429 return source_pos() != start_position; | |
430 } | |
431 | |
432 | |
433 bool Scanner::SkipJavaScriptWhiteSpace() { | |
420 int start_position = source_pos(); | 434 int start_position = source_pos(); |
421 | 435 |
422 while (true) { | 436 while (true) { |
423 // We treat byte-order marks (BOMs) as whitespace for better | 437 // We treat byte-order marks (BOMs) as whitespace for better |
424 // compatibility with Spidermonkey and other JavaScript engines. | 438 // compatibility with Spidermonkey and other JavaScript engines. |
425 while (kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) { | 439 while (kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) { |
426 // IsWhiteSpace() includes line terminators! | 440 // IsWhiteSpace() includes line terminators! |
427 if (kIsLineTerminator.get(c0_)) { | 441 if (kIsLineTerminator.get(c0_)) { |
428 // Ignore line terminators, but remember them. This is necessary | 442 // Ignore line terminators, but remember them. This is necessary |
429 // for automatic semicolon insertion. | 443 // for automatic semicolon insertion. |
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
505 Advance(); | 519 Advance(); |
506 if (c0_ == '-') return SkipSingleLineComment(); | 520 if (c0_ == '-') return SkipSingleLineComment(); |
507 PushBack('-'); // undo Advance() | 521 PushBack('-'); // undo Advance() |
508 } | 522 } |
509 PushBack('!'); // undo Advance() | 523 PushBack('!'); // undo Advance() |
510 ASSERT(c0_ == '!'); | 524 ASSERT(c0_ == '!'); |
511 return Token::LT; | 525 return Token::LT; |
512 } | 526 } |
513 | 527 |
514 | 528 |
515 void Scanner::Scan() { | 529 |
530 void Scanner::ScanJson() { | |
516 next_.literal_buffer = NULL; | 531 next_.literal_buffer = NULL; |
517 Token::Value token; | 532 Token::Value token; |
518 has_line_terminator_before_next_ = false; | 533 has_line_terminator_before_next_ = false; |
534 do { | |
535 // Remember the position of the next token | |
536 next_.location.beg_pos = source_pos(); | |
537 switch (c0_) { | |
538 case '\t': | |
539 case '\r': | |
540 case '\n': | |
541 case ' ': | |
542 Advance(); | |
543 token = Token::WHITESPACE; | |
544 break; | |
545 case '{': | |
546 Advance(); | |
547 token = Token::LBRACE; | |
548 break; | |
549 case '}': | |
550 Advance(); | |
551 token = Token::RBRACE; | |
552 break; | |
553 case '[': | |
554 Advance(); | |
555 token = Token::LBRACK; | |
556 break; | |
557 case ']': | |
558 Advance(); | |
559 token = Token::RBRACK; | |
560 break; | |
561 case ':': | |
562 Advance(); | |
563 token = Token::COLON; | |
564 break; | |
565 case ',': | |
566 Advance(); | |
567 token = Token::COMMA; | |
568 break; | |
569 case '"': | |
570 token = ScanJsonString(); | |
571 break; | |
572 case '-': | |
573 case '0': | |
574 case '1': | |
575 case '2': | |
576 case '3': | |
577 case '4': | |
578 case '5': | |
579 case '6': | |
580 case '7': | |
581 case '8': | |
582 case '9': | |
583 token = ScanJsonNumber(); | |
584 break; | |
585 case 't': | |
586 token = ScanJsonIdentifier("true", Token::TRUE_LITERAL); | |
587 break; | |
588 case 'f': | |
589 token = ScanJsonIdentifier("false", Token::FALSE_LITERAL); | |
590 break; | |
591 case 'n': | |
592 token = ScanJsonIdentifier("null", Token::NULL_LITERAL); | |
593 break; | |
594 default: | |
595 if (c0_ < 0) { | |
596 Advance(); | |
597 token = Token::EOS; | |
598 } else { | |
599 Advance(); | |
600 token = Select(Token::ILLEGAL); | |
601 } | |
602 } | |
603 } while (token == Token::WHITESPACE); | |
604 | |
605 next_.location.end_pos = source_pos(); | |
606 next_.token = token; | |
607 } | |
608 | |
609 | |
610 Token::Value Scanner::ScanJsonString() { | |
611 ASSERT_EQ('"', c0_); | |
612 Advance(); | |
613 StartLiteral(); | |
614 while (c0_ != '"' && c0_ > 0) { | |
615 // Check for control character (0x00-0x1f) or unterminated string (<0). | |
616 if (c0_ < 0x20) return Token::ILLEGAL; | |
617 if (c0_ != '\\') { | |
618 AddCharAdvance(); | |
619 } else { | |
620 Advance(); | |
621 switch (c0_) { | |
622 case '"': | |
623 case '\\': | |
624 case '/': | |
625 AddChar(c0_); | |
626 break; | |
627 case 'b': | |
628 AddChar('\x08'); | |
629 break; | |
630 case 'f': | |
631 AddChar('\x0c'); | |
632 break; | |
633 case 'n': | |
634 AddChar('\x0a'); | |
635 break; | |
636 case 'r': | |
637 AddChar('\x0d'); | |
638 break; | |
639 case 't': | |
640 AddChar('\x09'); | |
641 break; | |
642 case 'u': { | |
643 uc32 value = 0; | |
644 for (int i = 0; i < 4; i++) { | |
645 Advance(); | |
646 int digit = HexValue(c0_); | |
647 if (digit < 0) return Token::ILLEGAL; | |
648 value = value * 16 + digit; | |
649 } | |
650 AddChar(value); | |
651 break; | |
652 } | |
653 default: | |
654 return Token::ILLEGAL; | |
655 } | |
656 Advance(); | |
657 } | |
658 } | |
659 if (c0_ != '"') { | |
660 return Token::ILLEGAL; | |
661 } | |
662 TerminateLiteral(); | |
663 Advance(); | |
664 return Token::STRING; | |
665 } | |
666 | |
667 | |
668 Token::Value Scanner::ScanJsonNumber() { | |
669 StartLiteral(); | |
670 if (c0_ == '-') AddCharAdvance(); | |
671 if (c0_ == '0') { | |
672 AddCharAdvance(); | |
673 // Prefix zero is only allowed if it's the only digit before | |
674 // a decimal point or exponent. | |
675 if ('0' <= c0_ && c0_ <= '9') return Token::ILLEGAL; | |
676 } else { | |
677 if (c0_ < '1' || c0_ > '9') return Token::ILLEGAL; | |
678 do { | |
679 AddCharAdvance(); | |
680 } while (c0_ >= '0' && c0_ <= '9'); | |
681 } | |
682 if (c0_ == '.') { | |
683 AddCharAdvance(); | |
684 if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL; | |
685 do { | |
686 AddCharAdvance(); | |
687 } while (c0_ >= '0' && c0_ <= '9'); | |
688 } | |
689 if ((c0_ | 0x20) == 'e') { | |
690 AddCharAdvance(); | |
691 if (c0_ == '-' || c0_ == '+') AddCharAdvance(); | |
692 if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL; | |
Rico
2010/02/01 10:40:58
According to spec this can actually be 0 (Exponent
Lasse Reichstein
2010/02/01 12:17:30
This code does allow a zero digit after the "e".
A
| |
693 do { | |
694 AddCharAdvance(); | |
695 } while (c0_ >= '0' && c0_ <= '9'); | |
696 } | |
697 TerminateLiteral(); | |
698 return Token::NUMBER; | |
699 } | |
700 | |
701 | |
702 Token::Value Scanner::ScanJsonIdentifier(const char* text, | |
703 Token::Value token) { | |
704 StartLiteral(); | |
705 while (*text != '\0') { | |
706 if (c0_ != *text) return Token::ILLEGAL; | |
707 Advance(); | |
708 text++; | |
709 } | |
710 if (kIsIdentifierPart.get(c0_)) return Token::ILLEGAL; | |
711 TerminateLiteral(); | |
712 return token; | |
713 } | |
714 | |
715 | |
716 void Scanner::ScanJavaScript() { | |
717 next_.literal_buffer = NULL; | |
718 Token::Value token; | |
719 has_line_terminator_before_next_ = false; | |
519 do { | 720 do { |
520 // Remember the position of the next token | 721 // Remember the position of the next token |
521 next_.location.beg_pos = source_pos(); | 722 next_.location.beg_pos = source_pos(); |
522 | 723 |
523 switch (c0_) { | 724 switch (c0_) { |
524 case ' ': | 725 case ' ': |
525 case '\t': | 726 case '\t': |
526 Advance(); | 727 Advance(); |
527 token = Token::WHITESPACE; | 728 token = Token::WHITESPACE; |
528 break; | 729 break; |
(...skipping 560 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1089 } | 1290 } |
1090 AddCharAdvance(); | 1291 AddCharAdvance(); |
1091 } | 1292 } |
1092 TerminateLiteral(); | 1293 TerminateLiteral(); |
1093 | 1294 |
1094 next_.location.end_pos = source_pos() - 1; | 1295 next_.location.end_pos = source_pos() - 1; |
1095 return true; | 1296 return true; |
1096 } | 1297 } |
1097 | 1298 |
1098 } } // namespace v8::internal | 1299 } } // namespace v8::internal |
OLD | NEW |