Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Side by Side Diff: src/scanner.cc

Issue 549207: Added validating JSON parser mode to parser. (Closed)
Patch Set: Created 10 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 305 matching lines...) Expand 10 before | Expand all | Expand 10 after
316 UNREACHABLE(); 316 UNREACHABLE();
317 } 317 }
318 // On fallthrough, it's a failure. 318 // On fallthrough, it's a failure.
319 state_ = UNMATCHABLE; 319 state_ = UNMATCHABLE;
320 } 320 }
321 321
322 322
323 // ---------------------------------------------------------------------------- 323 // ----------------------------------------------------------------------------
324 // Scanner 324 // Scanner
325 325
326 Scanner::Scanner(bool pre) : stack_overflow_(false), is_pre_parsing_(pre) { } 326 Scanner::Scanner(ParserMode pre)
327 : stack_overflow_(false), is_pre_parsing_(pre == PREPARSE) { }
327 328
328 329
329 void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream, 330 void Scanner::Init(Handle<String> source,
330 int position) { 331 unibrow::CharacterStream* stream,
332 int position,
333 ParserLanguage language) {
331 // Initialize the source buffer. 334 // Initialize the source buffer.
332 if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) { 335 if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) {
333 two_byte_string_buffer_.Initialize( 336 two_byte_string_buffer_.Initialize(
334 Handle<ExternalTwoByteString>::cast(source)); 337 Handle<ExternalTwoByteString>::cast(source));
335 source_ = &two_byte_string_buffer_; 338 source_ = &two_byte_string_buffer_;
336 } else { 339 } else {
337 char_stream_buffer_.Initialize(source, stream); 340 char_stream_buffer_.Initialize(source, stream);
338 source_ = &char_stream_buffer_; 341 source_ = &char_stream_buffer_;
339 } 342 }
340 343
341 position_ = position; 344 position_ = position;
345 is_parsing_json_ = (language == JSON);
342 346
343 // Set c0_ (one character ahead) 347 // Set c0_ (one character ahead)
344 ASSERT(kCharacterLookaheadBufferSize == 1); 348 ASSERT(kCharacterLookaheadBufferSize == 1);
345 Advance(); 349 Advance();
346 // Initializer current_ to not refer to a literal buffer. 350 // Initializer current_ to not refer to a literal buffer.
347 current_.literal_buffer = NULL; 351 current_.literal_buffer = NULL;
348 352
349 // Skip initial whitespace allowing HTML comment ends just like 353 // Skip initial whitespace allowing HTML comment ends just like
350 // after a newline and scan first token. 354 // after a newline and scan first token.
351 has_line_terminator_before_next_ = true; 355 has_line_terminator_before_next_ = true;
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after
409 // Unicode character; this implies that in a Unicode context the 413 // Unicode character; this implies that in a Unicode context the
410 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF 414 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF
411 // character expressed in little-endian byte order (since it could 415 // character expressed in little-endian byte order (since it could
412 // not be a U+FFFE character expressed in big-endian byte 416 // not be a U+FFFE character expressed in big-endian byte
413 // order). Nevertheless, we check for it to be compatible with 417 // order). Nevertheless, we check for it to be compatible with
414 // Spidermonkey. 418 // Spidermonkey.
415 return c == 0xFEFF || c == 0xFFFE; 419 return c == 0xFEFF || c == 0xFFFE;
416 } 420 }
417 421
418 422
419 bool Scanner::SkipWhiteSpace() { 423 bool Scanner::SkipJsonWhiteSpace() {
424 int start_position = source_pos();
425 // JSON WhiteSpace is tab, carrige-return, newline and space.
426 while (c0_ == ' ' || c0_ == '\n' || c0_ == '\r' || c0_ == '\t') {
427 Advance();
428 }
429 return source_pos() != start_position;
430 }
431
432
433 bool Scanner::SkipJavaScriptWhiteSpace() {
420 int start_position = source_pos(); 434 int start_position = source_pos();
421 435
422 while (true) { 436 while (true) {
423 // We treat byte-order marks (BOMs) as whitespace for better 437 // We treat byte-order marks (BOMs) as whitespace for better
424 // compatibility with Spidermonkey and other JavaScript engines. 438 // compatibility with Spidermonkey and other JavaScript engines.
425 while (kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) { 439 while (kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) {
426 // IsWhiteSpace() includes line terminators! 440 // IsWhiteSpace() includes line terminators!
427 if (kIsLineTerminator.get(c0_)) { 441 if (kIsLineTerminator.get(c0_)) {
428 // Ignore line terminators, but remember them. This is necessary 442 // Ignore line terminators, but remember them. This is necessary
429 // for automatic semicolon insertion. 443 // for automatic semicolon insertion.
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after
505 Advance(); 519 Advance();
506 if (c0_ == '-') return SkipSingleLineComment(); 520 if (c0_ == '-') return SkipSingleLineComment();
507 PushBack('-'); // undo Advance() 521 PushBack('-'); // undo Advance()
508 } 522 }
509 PushBack('!'); // undo Advance() 523 PushBack('!'); // undo Advance()
510 ASSERT(c0_ == '!'); 524 ASSERT(c0_ == '!');
511 return Token::LT; 525 return Token::LT;
512 } 526 }
513 527
514 528
515 void Scanner::Scan() { 529
530 void Scanner::ScanJson() {
516 next_.literal_buffer = NULL; 531 next_.literal_buffer = NULL;
517 Token::Value token; 532 Token::Value token;
518 has_line_terminator_before_next_ = false; 533 has_line_terminator_before_next_ = false;
534 do {
535 // Remember the position of the next token
536 next_.location.beg_pos = source_pos();
537 switch (c0_) {
538 case '\t':
539 case '\r':
540 case '\n':
541 case ' ':
542 Advance();
543 token = Token::WHITESPACE;
544 break;
545 case '{':
546 Advance();
547 token = Token::LBRACE;
548 break;
549 case '}':
550 Advance();
551 token = Token::RBRACE;
552 break;
553 case '[':
554 Advance();
555 token = Token::LBRACK;
556 break;
557 case ']':
558 Advance();
559 token = Token::RBRACK;
560 break;
561 case ':':
562 Advance();
563 token = Token::COLON;
564 break;
565 case ',':
566 Advance();
567 token = Token::COMMA;
568 break;
569 case '"':
570 token = ScanJsonString();
571 break;
572 case '-':
573 case '0':
574 case '1':
575 case '2':
576 case '3':
577 case '4':
578 case '5':
579 case '6':
580 case '7':
581 case '8':
582 case '9':
583 token = ScanJsonNumber();
584 break;
585 case 't':
586 token = ScanJsonIdentifier("true", Token::TRUE_LITERAL);
587 break;
588 case 'f':
589 token = ScanJsonIdentifier("false", Token::FALSE_LITERAL);
590 break;
591 case 'n':
592 token = ScanJsonIdentifier("null", Token::NULL_LITERAL);
593 break;
594 default:
595 if (c0_ < 0) {
596 Advance();
597 token = Token::EOS;
598 } else {
599 Advance();
600 token = Select(Token::ILLEGAL);
601 }
602 }
603 } while (token == Token::WHITESPACE);
604
605 next_.location.end_pos = source_pos();
606 next_.token = token;
607 }
608
609
610 Token::Value Scanner::ScanJsonString() {
611 ASSERT_EQ('"', c0_);
612 Advance();
613 StartLiteral();
614 while (c0_ != '"' && c0_ > 0) {
615 // Check for control character (0x00-0x1f) or unterminated string (<0).
616 if (c0_ < 0x20) return Token::ILLEGAL;
617 if (c0_ != '\\') {
618 AddCharAdvance();
619 } else {
620 Advance();
621 switch (c0_) {
622 case '"':
623 case '\\':
624 case '/':
625 AddChar(c0_);
626 break;
627 case 'b':
628 AddChar('\x08');
629 break;
630 case 'f':
631 AddChar('\x0c');
632 break;
633 case 'n':
634 AddChar('\x0a');
635 break;
636 case 'r':
637 AddChar('\x0d');
638 break;
639 case 't':
640 AddChar('\x09');
641 break;
642 case 'u': {
643 uc32 value = 0;
644 for (int i = 0; i < 4; i++) {
645 Advance();
646 int digit = HexValue(c0_);
647 if (digit < 0) return Token::ILLEGAL;
648 value = value * 16 + digit;
649 }
650 AddChar(value);
651 break;
652 }
653 default:
654 return Token::ILLEGAL;
655 }
656 Advance();
657 }
658 }
659 if (c0_ != '"') {
660 return Token::ILLEGAL;
661 }
662 TerminateLiteral();
663 Advance();
664 return Token::STRING;
665 }
666
667
668 Token::Value Scanner::ScanJsonNumber() {
669 StartLiteral();
670 if (c0_ == '-') AddCharAdvance();
671 if (c0_ == '0') {
672 AddCharAdvance();
673 // Prefix zero is only allowed if it's the only digit before
674 // a decimal point or exponent.
675 if ('0' <= c0_ && c0_ <= '9') return Token::ILLEGAL;
676 } else {
677 if (c0_ < '1' || c0_ > '9') return Token::ILLEGAL;
678 do {
679 AddCharAdvance();
680 } while (c0_ >= '0' && c0_ <= '9');
681 }
682 if (c0_ == '.') {
683 AddCharAdvance();
684 if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL;
685 do {
686 AddCharAdvance();
687 } while (c0_ >= '0' && c0_ <= '9');
688 }
689 if ((c0_ | 0x20) == 'e') {
690 AddCharAdvance();
691 if (c0_ == '-' || c0_ == '+') AddCharAdvance();
692 if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL;
Rico 2010/02/01 10:40:58 According to spec this can actually be 0 (Exponent
Lasse Reichstein 2010/02/01 12:17:30 This code does allow a zero digit after the "e". A
693 do {
694 AddCharAdvance();
695 } while (c0_ >= '0' && c0_ <= '9');
696 }
697 TerminateLiteral();
698 return Token::NUMBER;
699 }
700
701
702 Token::Value Scanner::ScanJsonIdentifier(const char* text,
703 Token::Value token) {
704 StartLiteral();
705 while (*text != '\0') {
706 if (c0_ != *text) return Token::ILLEGAL;
707 Advance();
708 text++;
709 }
710 if (kIsIdentifierPart.get(c0_)) return Token::ILLEGAL;
711 TerminateLiteral();
712 return token;
713 }
714
715
716 void Scanner::ScanJavaScript() {
717 next_.literal_buffer = NULL;
718 Token::Value token;
719 has_line_terminator_before_next_ = false;
519 do { 720 do {
520 // Remember the position of the next token 721 // Remember the position of the next token
521 next_.location.beg_pos = source_pos(); 722 next_.location.beg_pos = source_pos();
522 723
523 switch (c0_) { 724 switch (c0_) {
524 case ' ': 725 case ' ':
525 case '\t': 726 case '\t':
526 Advance(); 727 Advance();
527 token = Token::WHITESPACE; 728 token = Token::WHITESPACE;
528 break; 729 break;
(...skipping 560 matching lines...) Expand 10 before | Expand all | Expand 10 after
1089 } 1290 }
1090 AddCharAdvance(); 1291 AddCharAdvance();
1091 } 1292 }
1092 TerminateLiteral(); 1293 TerminateLiteral();
1093 1294
1094 next_.location.end_pos = source_pos() - 1; 1295 next_.location.end_pos = source_pos() - 1;
1095 return true; 1296 return true;
1096 } 1297 }
1097 1298
1098 } } // namespace v8::internal 1299 } } // namespace v8::internal
OLDNEW
« src/scanner.h ('K') | « src/scanner.h ('k') | test/mjsunit/debug-compile-event.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698