src/parsing/scanner.cc - Issue 2366573002: [parser] Use Back2() where appropriate.

Side by Side Diff: src/parsing/scanner.cc

Issue 2366573002: [parser] Use Back2() where appropriate. (Closed)

Patch Set: Add comments. Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Features shared by parsing and pre-parsing scanners.	5 // Features shared by parsing and pre-parsing scanners.

6	6

7 #include "src/parsing/scanner.h"	7 #include "src/parsing/scanner.h"

8	8

9 #include <stdint.h>	9 #include <stdint.h>

10	10

(...skipping 322 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
333 // The Unicode value U+FFFE is guaranteed never to be assigned as a	333 // The Unicode value U+FFFE is guaranteed never to be assigned as a

334 // Unicode character; this implies that in a Unicode context the	334 // Unicode character; this implies that in a Unicode context the

335 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF	335 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF

336 // character expressed in little-endian byte order (since it could	336 // character expressed in little-endian byte order (since it could

337 // not be a U+FFFE character expressed in big-endian byte	337 // not be a U+FFFE character expressed in big-endian byte

338 // order). Nevertheless, we check for it to be compatible with	338 // order). Nevertheless, we check for it to be compatible with

339 // Spidermonkey.	339 // Spidermonkey.

340 return c == 0xFFFE;	340 return c == 0xFFFE;

341 }	341 }

342	342

343

344 bool Scanner::SkipWhiteSpace() {	343 bool Scanner::SkipWhiteSpace() {

345 int start_position = source_pos();	344 int start_position = source_pos();

346	345

347 while (true) {	346 while (true) {

348 while (true) {	347 while (true) {

349 // The unicode cache accepts unsigned inputs.	348 // Don't skip behind the end of input.

350 if (c0_ == kEndOfInput) break;	349 if (c0_ == kEndOfInput) break;

	350

351 // Advance as long as character is a WhiteSpace or LineTerminator.	351 // Advance as long as character is a WhiteSpace or LineTerminator.

352 // Remember if the latter is the case.	352 // Remember if the latter is the case.

353 if (unicode_cache_->IsLineTerminator(c0_)) {	353 if (unicode_cache_->IsLineTerminator(c0_)) {

354 has_line_terminator_before_next_ = true;	354 has_line_terminator_before_next_ = true;

355 } else if (!unicode_cache_->IsWhiteSpace(c0_) &&	355 } else if (!unicode_cache_->IsWhiteSpace(c0_) &&

356 !IsLittleEndianByteOrderMark(c0_)) {	356 !IsLittleEndianByteOrderMark(c0_)) {

357 break;	357 break;

358 }	358 }

359 Advance();	359 Advance();

360 }	360 }

361	361

362 // If there is an HTML comment end '-->' at the beginning of a	362 // If there is an HTML comment end '-->' at the beginning of a

363 // line (with only whitespace in front of it), we treat the rest	363 // line (with only whitespace in front of it), we treat the rest

364 // of the line as a comment. This is in line with the way	364 // of the line as a comment. This is in line with the way

365 // SpiderMonkey handles it.	365 // SpiderMonkey handles it.

366 if (c0_ == '-' && has_line_terminator_before_next_) {	366 if (c0_ != '-' \|\| !has_line_terminator_before_next_) break;

367 Advance();	367

368 if (c0_ == '-') {	368 Advance();

369 Advance();	369 if (c0_ != '-') {

370 if (c0_ == '>') {

371 // Treat the rest of the line as a comment.

372 SkipSingleLineComment();

373 // Continue skipping white space after the comment.

374 continue;

375 }

376 PushBack('-'); // undo Advance()

377 }

378 PushBack('-'); // undo Advance()	370 PushBack('-'); // undo Advance()

	371 break;

379 }	372 }

380 // Return whether or not we skipped any characters.	373

381 return source_pos() != start_position;	374 Advance();

	375 if (c0_ != '>') {

	376 PushBack2('-', '-'); // undo 2x Advance();

	377 break;

	378 }

	379

	380 // Treat the rest of the line as a comment.

	381 SkipSingleLineComment();

382 }	382 }

	383

	384 // Return whether or not we skipped any characters.

	385 return source_pos() != start_position;

383 }	386 }

384	387

385

386 Token::Value Scanner::SkipSingleLineComment() {	388 Token::Value Scanner::SkipSingleLineComment() {

387 Advance();	389 Advance();

388	390

389 // The line terminator at the end of the line is not considered	391 // The line terminator at the end of the line is not considered

390 // to be part of the single-line comment; it is recognized	392 // to be part of the single-line comment; it is recognized

391 // separately by the lexical grammar and becomes part of the	393 // separately by the lexical grammar and becomes part of the

392 // stream of input elements for the syntactic grammar (see	394 // stream of input elements for the syntactic grammar (see

393 // ECMA-262, section 7.4).	395 // ECMA-262, section 7.4).

394 while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) {	396 while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) {

395 Advance();	397 Advance();

(...skipping 82 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
478 if (ch == '*' && c0_ == '/') {	480 if (ch == '*' && c0_ == '/') {

479 c0_ = ' ';	481 c0_ = ' ';

480 return Token::WHITESPACE;	482 return Token::WHITESPACE;

481 }	483 }

482 }	484 }

483	485

484 // Unterminated multi-line comment.	486 // Unterminated multi-line comment.

485 return Token::ILLEGAL;	487 return Token::ILLEGAL;

486 }	488 }

487	489

488

489 Token::Value Scanner::ScanHtmlComment() {	490 Token::Value Scanner::ScanHtmlComment() {

490 // Check for <!-- comments.	491 // Check for <!-- comments.

491 DCHECK(c0_ == '!');	492 DCHECK(c0_ == '!');

492 Advance();	493 Advance();

493 if (c0_ == '-') {	494 if (c0_ != '-') {

494 Advance();	495 PushBack('!'); // undo Advance()

495 if (c0_ == '-') {	496 return Token::LT;

496 found_html_comment_ = true;

497 return SkipSingleLineComment();

498 }

499 PushBack('-'); // undo Advance()

500 }	497 }

501 PushBack('!'); // undo Advance()	498

502 DCHECK(c0_ == '!');	499 Advance();

503 return Token::LT;	500 if (c0_ != '-') {

	501 PushBack2('-', '!'); // undo 2x Advance()

	502 return Token::LT;

	503 }

	504

	505 found_html_comment_ = true;

	506 return SkipSingleLineComment();

504 }	507 }

505	508

506

507 void Scanner::Scan() {	509 void Scanner::Scan() {

508 next_.literal_chars = NULL;	510 next_.literal_chars = NULL;

509 next_.raw_literal_chars = NULL;	511 next_.raw_literal_chars = NULL;

510 Token::Value token;	512 Token::Value token;

511 do {	513 do {

512 // Remember the position of the next token	514 // Remember the position of the next token

513 next_.location.beg_pos = source_pos();	515 next_.location.beg_pos = source_pos();

514	516

515 switch (c0_) {	517 switch (c0_) {

516 case ' ':	518 case ' ':

(...skipping 1117 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1634 // 2, reset the source to the desired position,	1636 // 2, reset the source to the desired position,

1635 source_->Seek(position);	1637 source_->Seek(position);

1636 // 3, re-scan, by scanning the look-ahead char + 1 token (next_).	1638 // 3, re-scan, by scanning the look-ahead char + 1 token (next_).

1637 c0_ = source_->Advance();	1639 c0_ = source_->Advance();

1638 Next();	1640 Next();

1639 DCHECK_EQ(next_.location.beg_pos, position);	1641 DCHECK_EQ(next_.location.beg_pos, position);

1640 }	1642 }

1641	1643

1642 } // namespace internal	1644 } // namespace internal

1643 } // namespace v8	1645 } // namespace v8

OLD	NEW

« no previous file with comments | « src/parsing/scanner.h ('k') | test/cctest/parsing/test-scanner.cc » ('j') | no next file with comments »