src/scanner.cc - Issue 549207: Added validating JSON parser mode to parser.

Side by Side Diff: src/scanner.cc

Issue 549207: Added validating JSON parser mode to parser. (Closed)

Patch Set: Created 10 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved.	1 // Copyright 2006-2008 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 305 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
316 UNREACHABLE();	316 UNREACHABLE();

317 }	317 }

318 // On fallthrough, it's a failure.	318 // On fallthrough, it's a failure.

319 state_ = UNMATCHABLE;	319 state_ = UNMATCHABLE;

320 }	320 }

321	321

322	322

323 // ----------------------------------------------------------------------------	323 // ----------------------------------------------------------------------------

324 // Scanner	324 // Scanner

325	325

326 Scanner::Scanner(bool pre) : stack_overflow_(false), is_pre_parsing_(pre) { }	326 Scanner::Scanner(ParserMode pre)

	327 : stack_overflow_(false), is_pre_parsing_(pre == PREPARSE) { }

327	328

328	329

329 void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream,	330 void Scanner::Init(Handle<String> source,

330 int position) {	331 unibrow::CharacterStream* stream,

	332 int position,

	333 ParserLanguage language) {

331 // Initialize the source buffer.	334 // Initialize the source buffer.

332 if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) {	335 if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) {

333 two_byte_string_buffer_.Initialize(	336 two_byte_string_buffer_.Initialize(

334 Handle<ExternalTwoByteString>::cast(source));	337 Handle<ExternalTwoByteString>::cast(source));

335 source_ = &two_byte_string_buffer_;	338 source_ = &two_byte_string_buffer_;

336 } else {	339 } else {

337 char_stream_buffer_.Initialize(source, stream);	340 char_stream_buffer_.Initialize(source, stream);

338 source_ = &char_stream_buffer_;	341 source_ = &char_stream_buffer_;

339 }	342 }

340	343

341 position_ = position;	344 position_ = position;

	345 is_parsing_json_ = (language == JSON);

342	346

343 // Set c0_ (one character ahead)	347 // Set c0_ (one character ahead)

344 ASSERT(kCharacterLookaheadBufferSize == 1);	348 ASSERT(kCharacterLookaheadBufferSize == 1);

345 Advance();	349 Advance();

346 // Initializer current_ to not refer to a literal buffer.	350 // Initializer current_ to not refer to a literal buffer.

347 current_.literal_buffer = NULL;	351 current_.literal_buffer = NULL;

348	352

349 // Skip initial whitespace allowing HTML comment ends just like	353 // Skip initial whitespace allowing HTML comment ends just like

350 // after a newline and scan first token.	354 // after a newline and scan first token.

351 has_line_terminator_before_next_ = true;	355 has_line_terminator_before_next_ = true;

(...skipping 57 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
409 // Unicode character; this implies that in a Unicode context the	413 // Unicode character; this implies that in a Unicode context the

410 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF	414 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF

411 // character expressed in little-endian byte order (since it could	415 // character expressed in little-endian byte order (since it could

412 // not be a U+FFFE character expressed in big-endian byte	416 // not be a U+FFFE character expressed in big-endian byte

413 // order). Nevertheless, we check for it to be compatible with	417 // order). Nevertheless, we check for it to be compatible with

414 // Spidermonkey.	418 // Spidermonkey.

415 return c == 0xFEFF \|\| c == 0xFFFE;	419 return c == 0xFEFF \|\| c == 0xFFFE;

416 }	420 }

417	421

418	422

419 bool Scanner::SkipWhiteSpace() {	423 bool Scanner::SkipJsonWhiteSpace() {

	424 int start_position = source_pos();

	425 // JSON WhiteSpace is tab, carrige-return, newline and space.

	426 while (c0_ == ' ' \|\| c0_ == '\n' \|\| c0_ == '\r' \|\| c0_ == '\t') {

	427 Advance();

	428 }

	429 return source_pos() != start_position;

	430 }

	431

	432

	433 bool Scanner::SkipJavaScriptWhiteSpace() {

420 int start_position = source_pos();	434 int start_position = source_pos();

421	435

422 while (true) {	436 while (true) {

423 // We treat byte-order marks (BOMs) as whitespace for better	437 // We treat byte-order marks (BOMs) as whitespace for better

424 // compatibility with Spidermonkey and other JavaScript engines.	438 // compatibility with Spidermonkey and other JavaScript engines.

425 while (kIsWhiteSpace.get(c0_) \|\| IsByteOrderMark(c0_)) {	439 while (kIsWhiteSpace.get(c0_) \|\| IsByteOrderMark(c0_)) {

426 // IsWhiteSpace() includes line terminators!	440 // IsWhiteSpace() includes line terminators!

427 if (kIsLineTerminator.get(c0_)) {	441 if (kIsLineTerminator.get(c0_)) {

428 // Ignore line terminators, but remember them. This is necessary	442 // Ignore line terminators, but remember them. This is necessary

429 // for automatic semicolon insertion.	443 // for automatic semicolon insertion.

(...skipping 75 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
505 Advance();	519 Advance();

506 if (c0_ == '-') return SkipSingleLineComment();	520 if (c0_ == '-') return SkipSingleLineComment();

507 PushBack('-'); // undo Advance()	521 PushBack('-'); // undo Advance()

508 }	522 }

509 PushBack('!'); // undo Advance()	523 PushBack('!'); // undo Advance()

510 ASSERT(c0_ == '!');	524 ASSERT(c0_ == '!');

511 return Token::LT;	525 return Token::LT;

512 }	526 }

513	527

514	528

515 void Scanner::Scan() {	529

	530 void Scanner::ScanJson() {

516 next_.literal_buffer = NULL;	531 next_.literal_buffer = NULL;

517 Token::Value token;	532 Token::Value token;

518 has_line_terminator_before_next_ = false;	533 has_line_terminator_before_next_ = false;

	534 do {

	535 // Remember the position of the next token

	536 next_.location.beg_pos = source_pos();

	537 switch (c0_) {

	538 case '\t':

	539 case '\r':

	540 case '\n':

	541 case ' ':

	542 Advance();

	543 token = Token::WHITESPACE;

	544 break;

	545 case '{':

	546 Advance();

	547 token = Token::LBRACE;

	548 break;

	549 case '}':

	550 Advance();

	551 token = Token::RBRACE;

	552 break;

	553 case '[':

	554 Advance();

	555 token = Token::LBRACK;

	556 break;

	557 case ']':

	558 Advance();

	559 token = Token::RBRACK;

	560 break;

	561 case ':':

	562 Advance();

	563 token = Token::COLON;

	564 break;

	565 case ',':

	566 Advance();

	567 token = Token::COMMA;

	568 break;

	569 case '"':

	570 token = ScanJsonString();

	571 break;

	572 case '-':

	573 case '0':

	574 case '1':

	575 case '2':

	576 case '3':

	577 case '4':

	578 case '5':

	579 case '6':

	580 case '7':

	581 case '8':

	582 case '9':

	583 token = ScanJsonNumber();

	584 break;

	585 case 't':

	586 token = ScanJsonIdentifier("true", Token::TRUE_LITERAL);

	587 break;

	588 case 'f':

	589 token = ScanJsonIdentifier("false", Token::FALSE_LITERAL);

	590 break;

	591 case 'n':

	592 token = ScanJsonIdentifier("null", Token::NULL_LITERAL);

	593 break;

	594 default:

	595 if (c0_ < 0) {

	596 Advance();

	597 token = Token::EOS;

	598 } else {

	599 Advance();

	600 token = Select(Token::ILLEGAL);

	601 }

	602 }

	603 } while (token == Token::WHITESPACE);

	604

	605 next_.location.end_pos = source_pos();

	606 next_.token = token;

	607 }

	608

	609

	610 Token::Value Scanner::ScanJsonString() {

	611 ASSERT_EQ('"', c0_);

	612 Advance();

	613 StartLiteral();

	614 while (c0_ != '"' && c0_ > 0) {

	615 // Check for control character (0x00-0x1f) or unterminated string (<0).

	616 if (c0_ < 0x20) return Token::ILLEGAL;

	617 if (c0_ != '\\') {

	618 AddCharAdvance();

	619 } else {

	620 Advance();

	621 switch (c0_) {

	622 case '"':

	623 case '\\':

	624 case '/':

	625 AddChar(c0_);

	626 break;

	627 case 'b':

	628 AddChar('\x08');

	629 break;

	630 case 'f':

	631 AddChar('\x0c');

	632 break;

	633 case 'n':

	634 AddChar('\x0a');

	635 break;

	636 case 'r':

	637 AddChar('\x0d');

	638 break;

	639 case 't':

	640 AddChar('\x09');

	641 break;

	642 case 'u': {

	643 uc32 value = 0;

	644 for (int i = 0; i < 4; i++) {

	645 Advance();

	646 int digit = HexValue(c0_);

	647 if (digit < 0) return Token::ILLEGAL;

	648 value = value * 16 + digit;

	649 }

	650 AddChar(value);

	651 break;

	652 }

	653 default:

	654 return Token::ILLEGAL;

	655 }

	656 Advance();

	657 }

	658 }

	659 if (c0_ != '"') {

	660 return Token::ILLEGAL;

	661 }

	662 TerminateLiteral();

	663 Advance();

	664 return Token::STRING;

	665 }

	666

	667

	668 Token::Value Scanner::ScanJsonNumber() {

	669 StartLiteral();

	670 if (c0_ == '-') AddCharAdvance();

	671 if (c0_ == '0') {

	672 AddCharAdvance();

	673 // Prefix zero is only allowed if it's the only digit before

	674 // a decimal point or exponent.

	675 if ('0' <= c0_ && c0_ <= '9') return Token::ILLEGAL;

	676 } else {

	677 if (c0_ < '1' \|\| c0_ > '9') return Token::ILLEGAL;

	678 do {

	679 AddCharAdvance();

	680 } while (c0_ >= '0' && c0_ <= '9');

	681 }

	682 if (c0_ == '.') {

	683 AddCharAdvance();

	684 if (c0_ < '0' \|\| c0_ > '9') return Token::ILLEGAL;

	685 do {

	686 AddCharAdvance();

	687 } while (c0_ >= '0' && c0_ <= '9');

	688 }

	689 if ((c0_ \| 0x20) == 'e') {

	690 AddCharAdvance();

	691 if (c0_ == '-' \|\| c0_ == '+') AddCharAdvance();

	692 if (c0_ < '0' \|\| c0_ > '9') return Token::ILLEGAL;
	Rico 2010/02/01 10:40:58 According to spec this can actually be 0 (Exponent According to spec this can actually be 0 (ExponentPart is used here and allows arbitrary DecimalDigits) - although I agree that this does not make much sense. Lasse Reichstein 2010/02/01 12:17:30 This code does allow a zero digit after the "e". A This code does allow a zero digit after the "e". As for sense ... I guess you can't rule it out in automatically generated code JSON. In retrospect, I should probably have used an IsDigit predicate for readablity.
	693 do {

	694 AddCharAdvance();

	695 } while (c0_ >= '0' && c0_ <= '9');

	696 }

	697 TerminateLiteral();

	698 return Token::NUMBER;

	699 }

	700

	701

	702 Token::Value Scanner::ScanJsonIdentifier(const char* text,

	703 Token::Value token) {

	704 StartLiteral();

	705 while (*text != '\0') {

	706 if (c0_ != *text) return Token::ILLEGAL;

	707 Advance();

	708 text++;

	709 }

	710 if (kIsIdentifierPart.get(c0_)) return Token::ILLEGAL;

	711 TerminateLiteral();

	712 return token;

	713 }

	714

	715

	716 void Scanner::ScanJavaScript() {

	717 next_.literal_buffer = NULL;

	718 Token::Value token;

	719 has_line_terminator_before_next_ = false;

519 do {	720 do {

520 // Remember the position of the next token	721 // Remember the position of the next token

521 next_.location.beg_pos = source_pos();	722 next_.location.beg_pos = source_pos();

522	723

523 switch (c0_) {	724 switch (c0_) {

524 case ' ':	725 case ' ':

525 case '\t':	726 case '\t':

526 Advance();	727 Advance();

527 token = Token::WHITESPACE;	728 token = Token::WHITESPACE;

528 break;	729 break;

(...skipping 560 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1089 }	1290 }

1090 AddCharAdvance();	1291 AddCharAdvance();

1091 }	1292 }

1092 TerminateLiteral();	1293 TerminateLiteral();

1093	1294

1094 next_.location.end_pos = source_pos() - 1;	1295 next_.location.end_pos = source_pos() - 1;

1095 return true;	1296 return true;

1096 }	1297 }

1097	1298

1098 } } // namespace v8::internal	1299 } } // namespace v8::internal

OLD	NEW

« src/scanner.h ('K') | « src/scanner.h ('k') | test/mjsunit/debug-compile-event.js » ('j') | no next file with comments »