src/scanner.cc - Issue 7039037: Create stand-alone json parser (including scanner).

Side by Side Diff: src/scanner.cc

Issue 7039037: Create stand-alone json parser (including scanner). (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/

Patch Set: '' Created 9 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 324 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
335 // in object literals.	335 // in object literals.

336 Init();	336 Init();

337 // Skip initial whitespace allowing HTML comment ends just like	337 // Skip initial whitespace allowing HTML comment ends just like

338 // after a newline and scan first token.	338 // after a newline and scan first token.

339 has_line_terminator_before_next_ = true;	339 has_line_terminator_before_next_ = true;

340 SkipWhiteSpace();	340 SkipWhiteSpace();

341 Scan();	341 Scan();

342 }	342 }

343	343

344	344

345 // ----------------------------------------------------------------------------

346 // JsonScanner

347

348 JsonScanner::JsonScanner(UnicodeCache* unicode_cache)

349 : Scanner(unicode_cache) { }

350

351

352 void JsonScanner::Initialize(UC16CharacterStream* source) {

353 source_ = source;

354 Init();

355 // Skip initial whitespace.

356 SkipJsonWhiteSpace();

357 // Preload first token as look-ahead.

358 ScanJson();

359 }

360

361

362 Token::Value JsonScanner::Next() {

363 // BUG 1215673: Find a thread safe way to set a stack limit in

364 // pre-parse mode. Otherwise, we cannot safely pre-parse from other

365 // threads.

366 current_ = next_;

367 // Check for stack-overflow before returning any tokens.

368 ScanJson();

369 return current_.token;

370 }

371

372

373 bool JsonScanner::SkipJsonWhiteSpace() {

374 int start_position = source_pos();

375 // JSON WhiteSpace is tab, carrige-return, newline and space.

376 while (c0_ == ' ' \|\| c0_ == '\n' \|\| c0_ == '\r' \|\| c0_ == '\t') {

377 Advance();

378 }

379 return source_pos() != start_position;

380 }

381

382

383 void JsonScanner::ScanJson() {

384 next_.literal_chars = NULL;

385 Token::Value token;

386 do {

387 // Remember the position of the next token

388 next_.location.beg_pos = source_pos();

389 switch (c0_) {

390 case '\t':

391 case '\r':

392 case '\n':

393 case ' ':

394 Advance();

395 token = Token::WHITESPACE;

396 break;

397 case '{':

398 Advance();

399 token = Token::LBRACE;

400 break;

401 case '}':

402 Advance();

403 token = Token::RBRACE;

404 break;

405 case '[':

406 Advance();

407 token = Token::LBRACK;

408 break;

409 case ']':

410 Advance();

411 token = Token::RBRACK;

412 break;

413 case ':':

414 Advance();

415 token = Token::COLON;

416 break;

417 case ',':

418 Advance();

419 token = Token::COMMA;

420 break;

421 case '"':

422 token = ScanJsonString();

423 break;

424 case '-':

425 case '0':

426 case '1':

427 case '2':

428 case '3':

429 case '4':

430 case '5':

431 case '6':

432 case '7':

433 case '8':

434 case '9':

435 token = ScanJsonNumber();

436 break;

437 case 't':

438 token = ScanJsonIdentifier("true", Token::TRUE_LITERAL);

439 break;

440 case 'f':

441 token = ScanJsonIdentifier("false", Token::FALSE_LITERAL);

442 break;

443 case 'n':

444 token = ScanJsonIdentifier("null", Token::NULL_LITERAL);

445 break;

446 default:

447 if (c0_ < 0) {

448 Advance();

449 token = Token::EOS;

450 } else {

451 Advance();

452 token = Select(Token::ILLEGAL);

453 }

454 }

455 } while (token == Token::WHITESPACE);

456

457 next_.location.end_pos = source_pos();

458 next_.token = token;

459 }

460

461

462 Token::Value JsonScanner::ScanJsonString() {

463 ASSERT_EQ('"', c0_);

464 Advance();

465 LiteralScope literal(this);

466 while (c0_ != '"') {

467 // Check for control character (0x00-0x1f) or unterminated string (<0).

468 if (c0_ < 0x20) return Token::ILLEGAL;

469 if (c0_ != '\\') {

470 AddLiteralCharAdvance();

471 } else {

472 Advance();

473 switch (c0_) {

474 case '"':

475 case '\\':

476 case '/':

477 AddLiteralChar(c0_);

478 break;

479 case 'b':

480 AddLiteralChar('\x08');

481 break;

482 case 'f':

483 AddLiteralChar('\x0c');

484 break;

485 case 'n':

486 AddLiteralChar('\x0a');

487 break;

488 case 'r':

489 AddLiteralChar('\x0d');

490 break;

491 case 't':

492 AddLiteralChar('\x09');

493 break;

494 case 'u': {

495 uc32 value = 0;

496 for (int i = 0; i < 4; i++) {

497 Advance();

498 int digit = HexValue(c0_);

499 if (digit < 0) {

500 return Token::ILLEGAL;

501 }

502 value = value * 16 + digit;

503 }

504 AddLiteralChar(value);

505 break;

506 }

507 default:

508 return Token::ILLEGAL;

509 }

510 Advance();

511 }

512 }

513 literal.Complete();

514 Advance();

515 return Token::STRING;

516 }

517

518

519 Token::Value JsonScanner::ScanJsonNumber() {

520 LiteralScope literal(this);

521 bool negative = false;

522

523 if (c0_ == '-') {

524 AddLiteralCharAdvance();

525 negative = true;

526 }

527 if (c0_ == '0') {

528 AddLiteralCharAdvance();

529 // Prefix zero is only allowed if it's the only digit before

530 // a decimal point or exponent.

531 if ('0' <= c0_ && c0_ <= '9') return Token::ILLEGAL;

532 } else {

533 int i = 0;

534 int digits = 0;

535 if (c0_ < '1' \|\| c0_ > '9') return Token::ILLEGAL;

536 do {

537 i = i * 10 + c0_ - '0';

538 digits++;

539 AddLiteralCharAdvance();

540 } while (c0_ >= '0' && c0_ <= '9');

541 if (c0_ != '.' && c0_ != 'e' && c0_ != 'E' && digits < 10) {

542 number_ = (negative ? -i : i);

543 return Token::NUMBER;

544 }

545 }

546 if (c0_ == '.') {

547 AddLiteralCharAdvance();

548 if (c0_ < '0' \|\| c0_ > '9') return Token::ILLEGAL;

549 do {

550 AddLiteralCharAdvance();

551 } while (c0_ >= '0' && c0_ <= '9');

552 }

553 if (AsciiAlphaToLower(c0_) == 'e') {

554 AddLiteralCharAdvance();

555 if (c0_ == '-' \|\| c0_ == '+') AddLiteralCharAdvance();

556 if (c0_ < '0' \|\| c0_ > '9') return Token::ILLEGAL;

557 do {

558 AddLiteralCharAdvance();

559 } while (c0_ >= '0' && c0_ <= '9');

560 }

561 literal.Complete();

562 ASSERT_NOT_NULL(next_.literal_chars);

563 number_ = StringToDouble(unicode_cache_,

564 next_.literal_chars->ascii_literal(),

565 NO_FLAGS, // Hex, octal or trailing junk.

566 OS::nan_value());

567 return Token::NUMBER;

568 }

569

570

571 Token::Value JsonScanner::ScanJsonIdentifier(const char* text,

572 Token::Value token) {

573 LiteralScope literal(this);

574 while (*text != '\0') {

575 if (c0_ != *text) return Token::ILLEGAL;

576 Advance();

577 text++;

578 }

579 if (unicode_cache_->IsIdentifierPart(c0_)) return Token::ILLEGAL;

580 literal.Complete();

581 return token;

582 }

583

584

585 } } // namespace v8::internal	345 } } // namespace v8::internal

OLD	NEW

« no previous file with comments | « src/scanner.h ('k') | no next file » | no next file with comments »