src/scanner.h - Issue 8384003: Merged Scanner and JavaScriptScanner.

Side by Side Diff: src/scanner.h

Issue 8384003: Merged Scanner and JavaScriptScanner. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge

Patch Set: Created 9 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 242 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
253	253

254 bool is_ascii_;	254 bool is_ascii_;

255 int position_;	255 int position_;

256 Vector<byte> backing_store_;	256 Vector<byte> backing_store_;

257	257

258 DISALLOW_COPY_AND_ASSIGN(LiteralBuffer);	258 DISALLOW_COPY_AND_ASSIGN(LiteralBuffer);

259 };	259 };

260	260

261	261

262 // ----------------------------------------------------------------------------	262 // ----------------------------------------------------------------------------

263 // Scanner base-class.	263 // JavaScript Scanner.

264	264

265 // Generic functionality used by both JSON and JavaScript scanners.

266 class Scanner {	265 class Scanner {

267 public:	266 public:

268 // -1 is outside of the range of any real source code.	267 // Scoped helper for literal recording. Automatically drops the literal

269 static const int kNoOctalLocation = -1;	268 // if aborting the scanning before it's complete.

270

271 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;

272

273 class LiteralScope {	269 class LiteralScope {

274 public:	270 public:

275 explicit LiteralScope(Scanner* self);	271 explicit LiteralScope(Scanner* self)

276 ~LiteralScope();	272 : scanner_(self), complete_(false) {

277 void Complete();	273 scanner_->StartLiteral();

	274 }

	275 ~LiteralScope() {

	276 if (!complete_) scanner_->DropLiteral();

	277 }

	278 void Complete() {

	279 scanner_->TerminateLiteral();

	280 complete_ = true;

	281 }

278	282

279 private:	283 private:

280 Scanner* scanner_;	284 Scanner* scanner_;

281 bool complete_;	285 bool complete_;

282 };	286 };

283	287

284 explicit Scanner(UnicodeCache* scanner_contants);	288 // Representation of an interval of source positions.

285

286 // Returns the current token again.

287 Token::Value current_token() { return current_.token; }

288

289 // One token look-ahead (past the token returned by Next()).

290 Token::Value peek() const { return next_.token; }

291

292 struct Location {	289 struct Location {

293 Location(int b, int e) : beg_pos(b), end_pos(e) { }	290 Location(int b, int e) : beg_pos(b), end_pos(e) { }

294 Location() : beg_pos(0), end_pos(0) { }	291 Location() : beg_pos(0), end_pos(0) { }

295	292

296 bool IsValid() const {	293 bool IsValid() const {

297 return beg_pos >= 0 && end_pos >= beg_pos;	294 return beg_pos >= 0 && end_pos >= beg_pos;

298 }	295 }

299	296

300 static Location invalid() { return Location(-1, -1); }	297 static Location invalid() { return Location(-1, -1); }

301	298

302 int beg_pos;	299 int beg_pos;

303 int end_pos;	300 int end_pos;

304 };	301 };

305	302

	303 // -1 is outside of the range of any real source code.

	304 static const int kNoOctalLocation = -1;

	305

	306 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;

	307

	308 explicit Scanner(UnicodeCache* scanner_contants);

	309

	310 void Initialize(UC16CharacterStream* source);

	311

	312 // Returns the next token and advances input.

	313 Token::Value Next();

	314 // Returns the current token again.

	315 Token::Value current_token() { return current_.token; }

306 // Returns the location information for the current token	316 // Returns the location information for the current token

307 // (the token returned by Next()).	317 // (the token last returned by Next()).

308 Location location() const { return current_.location; }	318 Location location() const { return current_.location; }

309 Location peek_location() const { return next_.location; }

310

311 // Returns the literal string, if any, for the current token (the	319 // Returns the literal string, if any, for the current token (the

312 // token returned by Next()). The string is 0-terminated and in	320 // token last returned by Next()). The string is 0-terminated.

313 // UTF-8 format; they may contain 0-characters. Literal strings are	321 // Literal strings are collected for identifiers, strings, and

314 // collected for identifiers, strings, and numbers.	322 // numbers.

315 // These functions only give the correct result if the literal	323 // These functions only give the correct result if the literal

316 // was scanned between calls to StartLiteral() and TerminateLiteral().	324 // was scanned between calls to StartLiteral() and TerminateLiteral().

317 bool is_literal_ascii() {

318 ASSERT_NOT_NULL(current_.literal_chars);

319 return current_.literal_chars->is_ascii();

320 }

321 Vector<const char> literal_ascii_string() {	325 Vector<const char> literal_ascii_string() {

322 ASSERT_NOT_NULL(current_.literal_chars);	326 ASSERT_NOT_NULL(current_.literal_chars);

323 return current_.literal_chars->ascii_literal();	327 return current_.literal_chars->ascii_literal();

324 }	328 }

325 Vector<const uc16> literal_uc16_string() {	329 Vector<const uc16> literal_uc16_string() {

326 ASSERT_NOT_NULL(current_.literal_chars);	330 ASSERT_NOT_NULL(current_.literal_chars);

327 return current_.literal_chars->uc16_literal();	331 return current_.literal_chars->uc16_literal();

328 }	332 }

	333 bool is_literal_ascii() {

	334 ASSERT_NOT_NULL(current_.literal_chars);

	335 return current_.literal_chars->is_ascii();

	336 }

329 int literal_length() const {	337 int literal_length() const {

330 ASSERT_NOT_NULL(current_.literal_chars);	338 ASSERT_NOT_NULL(current_.literal_chars);

331 return current_.literal_chars->length();	339 return current_.literal_chars->length();

332 }	340 }

333	341

334 bool literal_contains_escapes() const {	342 bool literal_contains_escapes() const {

335 Location location = current_.location;	343 Location location = current_.location;

336 int source_length = (location.end_pos - location.beg_pos);	344 int source_length = (location.end_pos - location.beg_pos);

337 if (current_.token == Token::STRING) {	345 if (current_.token == Token::STRING) {

338 // Subtract delimiters.	346 // Subtract delimiters.

339 source_length -= 2;	347 source_length -= 2;

340 }	348 }

341 return current_.literal_chars->length() != source_length;	349 return current_.literal_chars->length() != source_length;

342 }	350 }

343	351

	352 // Similar functions for the upcoming token.

	353

	354 // One token look-ahead (past the token returned by Next()).

	355 Token::Value peek() const { return next_.token; }

	356

	357 Location peek_location() const { return next_.location; }

	358

344 // Returns the literal string for the next token (the token that	359 // Returns the literal string for the next token (the token that

345 // would be returned if Next() were called).	360 // would be returned if Next() were called).

346 bool is_next_literal_ascii() {

347 ASSERT_NOT_NULL(next_.literal_chars);

348 return next_.literal_chars->is_ascii();

349 }

350 Vector<const char> next_literal_ascii_string() {	361 Vector<const char> next_literal_ascii_string() {

351 ASSERT_NOT_NULL(next_.literal_chars);	362 ASSERT_NOT_NULL(next_.literal_chars);

352 return next_.literal_chars->ascii_literal();	363 return next_.literal_chars->ascii_literal();

353 }	364 }

354 Vector<const uc16> next_literal_uc16_string() {	365 Vector<const uc16> next_literal_uc16_string() {

355 ASSERT_NOT_NULL(next_.literal_chars);	366 ASSERT_NOT_NULL(next_.literal_chars);

356 return next_.literal_chars->uc16_literal();	367 return next_.literal_chars->uc16_literal();

357 }	368 }

	369 bool is_next_literal_ascii() {

	370 ASSERT_NOT_NULL(next_.literal_chars);

	371 return next_.literal_chars->is_ascii();

	372 }

358 int next_literal_length() const {	373 int next_literal_length() const {

359 ASSERT_NOT_NULL(next_.literal_chars);	374 ASSERT_NOT_NULL(next_.literal_chars);

360 return next_.literal_chars->length();	375 return next_.literal_chars->length();

361 }	376 }

362	377

363 UnicodeCache* unicode_cache() { return unicode_cache_; }	378 UnicodeCache* unicode_cache() { return unicode_cache_; }

364	379

365 static const int kCharacterLookaheadBufferSize = 1;	380 static const int kCharacterLookaheadBufferSize = 1;

366	381

367 protected:	382 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence.

	383 uc32 ScanOctalEscape(uc32 c, int length);

	384

	385 // Returns the location of the last seen octal literal.

	386 Location octal_position() const { return octal_pos_; }

	387 void clear_octal_position() { octal_pos_ = Location::invalid(); }

	388

	389 // Seek forward to the given position. This operation does not

	390 // work in general, for instance when there are pushed back

	391 // characters, but works for seeking forward until simple delimiter

	392 // tokens, which is what it is used for.

	393 void SeekForward(int pos);

	394

	395 bool HarmonyScoping() const {

	396 return harmony_scoping_;

	397 }

	398 void SetHarmonyScoping(bool block_scoping) {

	399 harmony_scoping_ = block_scoping;

	400 }

	401

	402

	403 // Returns true if there was a line terminator before the peek'ed token,

	404 // possibly inside a multi-line comment.

	405 bool HasAnyLineTerminatorBeforeNext() const {

	406 return has_line_terminator_before_next_ \|\|

	407 has_multiline_comment_before_next_;

	408 }

	409

	410 // Scans the input as a regular expression pattern, previous

	411 // character(s) must be /(=). Returns true if a pattern is scanned.

	412 bool ScanRegExpPattern(bool seen_equal);

	413 // Returns true if regexp flags are scanned (always since flags can

	414 // be empty).

	415 bool ScanRegExpFlags();

	416

	417 // Tells whether the buffer contains an identifier (no escapes).

	418 // Used for checking if a property name is an identifier.

	419 static bool IsIdentifier(unibrow::CharacterStream* buffer);

	420

	421 private:

368 // The current and look-ahead token.	422 // The current and look-ahead token.

369 struct TokenDesc {	423 struct TokenDesc {

370 Token::Value token;	424 Token::Value token;

371 Location location;	425 Location location;

372 LiteralBuffer* literal_chars;	426 LiteralBuffer* literal_chars;

373 };	427 };

374	428

375 // Call this after setting source_ to the input.	429 // Call this after setting source_ to the input.

376 void Init() {	430 void Init() {

377 // Set c0_ (one character ahead)	431 // Set c0_ (one character ahead)

(...skipping 49 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
427 if (c0_ == next) {	481 if (c0_ == next) {

428 Advance();	482 Advance();

429 return then;	483 return then;

430 } else {	484 } else {

431 return else_;	485 return else_;

432 }	486 }

433 }	487 }

434	488

435 uc32 ScanHexNumber(int expected_length);	489 uc32 ScanHexNumber(int expected_length);

436	490

437 // Return the current source position.

438 int source_pos() {

439 return source_->pos() - kCharacterLookaheadBufferSize;

440 }

441

442 UnicodeCache* unicode_cache_;

443

444 // Buffers collecting literal strings, numbers, etc.

445 LiteralBuffer literal_buffer1_;

446 LiteralBuffer literal_buffer2_;

447

448 TokenDesc current_; // desc for current token (as returned by Next())

449 TokenDesc next_; // desc for next token (one token look-ahead)

450

451 // Input stream. Must be initialized to an UC16CharacterStream.

452 UC16CharacterStream* source_;

453

454 // One Unicode character look-ahead; c0_ < 0 at the end of the input.

455 uc32 c0_;

456 };

457

458 // ----------------------------------------------------------------------------

459 // JavaScriptScanner - base logic for JavaScript scanning.

460

461 class JavaScriptScanner : public Scanner {

462 public:

463 // A LiteralScope that disables recording of some types of JavaScript

464 // literals. If the scanner is configured to not record the specific

465 // type of literal, the scope will not call StartLiteral.

466 class LiteralScope {

467 public:

468 explicit LiteralScope(JavaScriptScanner* self)

469 : scanner_(self), complete_(false) {

470 scanner_->StartLiteral();

471 }

472 ~LiteralScope() {

473 if (!complete_) scanner_->DropLiteral();

474 }

475 void Complete() {

476 scanner_->TerminateLiteral();

477 complete_ = true;

478 }

479

480 private:

481 JavaScriptScanner* scanner_;

482 bool complete_;

483 };

484

485 explicit JavaScriptScanner(UnicodeCache* scanner_contants);

486

487 void Initialize(UC16CharacterStream* source);

488

489 // Returns the next token.

490 Token::Value Next();

491

492 // Returns true if there was a line terminator before the peek'ed token,

493 // possibly inside a multi-line comment.

494 bool HasAnyLineTerminatorBeforeNext() const {

495 return has_line_terminator_before_next_ \|\|

496 has_multiline_comment_before_next_;

497 }

498

499 // Scans the input as a regular expression pattern, previous

500 // character(s) must be /(=). Returns true if a pattern is scanned.

501 bool ScanRegExpPattern(bool seen_equal);

502 // Returns true if regexp flags are scanned (always since flags can

503 // be empty).

504 bool ScanRegExpFlags();

505

506 // Tells whether the buffer contains an identifier (no escapes).

507 // Used for checking if a property name is an identifier.

508 static bool IsIdentifier(unibrow::CharacterStream* buffer);

509

510 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence.

511 uc32 ScanOctalEscape(uc32 c, int length);

512

513 // Returns the location of the last seen octal literal

514 Location octal_position() const { return octal_pos_; }

515 void clear_octal_position() { octal_pos_ = Location::invalid(); }

516

517 // Seek forward to the given position. This operation does not

518 // work in general, for instance when there are pushed back

519 // characters, but works for seeking forward until simple delimiter

520 // tokens, which is what it is used for.

521 void SeekForward(int pos);

522

523 bool HarmonyScoping() const {

524 return harmony_scoping_;

525 }

526 void SetHarmonyScoping(bool block_scoping) {

527 harmony_scoping_ = block_scoping;

528 }

529

530

531 protected:

532 bool SkipWhiteSpace();

533 Token::Value SkipSingleLineComment();

534 Token::Value SkipMultiLineComment();

535

536 // Scans a single JavaScript token.	491 // Scans a single JavaScript token.

537 void Scan();	492 void Scan();

538	493

	494 bool SkipWhiteSpace();

	495 Token::Value SkipSingleLineComment();

	496 Token::Value SkipMultiLineComment();

	497 // Scans a possible HTML comment -- begins with '<!'.

	498 Token::Value ScanHtmlComment();

	499

539 void ScanDecimalDigits();	500 void ScanDecimalDigits();

540 Token::Value ScanNumber(bool seen_period);	501 Token::Value ScanNumber(bool seen_period);

541 Token::Value ScanIdentifierOrKeyword();	502 Token::Value ScanIdentifierOrKeyword();

542 Token::Value ScanIdentifierSuffix(LiteralScope* literal);	503 Token::Value ScanIdentifierSuffix(LiteralScope* literal);

543	504

544 void ScanEscape();	505 void ScanEscape();

545 Token::Value ScanString();	506 Token::Value ScanString();

546	507

547 // Scans a possible HTML comment -- begins with '<!'.

548 Token::Value ScanHtmlComment();

549

550 // Decodes a unicode escape-sequence which is part of an identifier.	508 // Decodes a unicode escape-sequence which is part of an identifier.

551 // If the escape sequence cannot be decoded the result is kBadChar.	509 // If the escape sequence cannot be decoded the result is kBadChar.

552 uc32 ScanIdentifierUnicodeEscape();	510 uc32 ScanIdentifierUnicodeEscape();

553 // Recognizes a uniocde escape-sequence and adds its characters,	511 // Recognizes a uniocde escape-sequence and adds its characters,

554 // uninterpreted, to the current literal. Used for parsing RegExp	512 // uninterpreted, to the current literal. Used for parsing RegExp

555 // flags.	513 // flags.

556 bool ScanLiteralUnicodeEscape();	514 bool ScanLiteralUnicodeEscape();

557	515

	516 // Return the current source position.

	517 int source_pos() {

	518 return source_->pos() - kCharacterLookaheadBufferSize;

	519 }

	520

	521 UnicodeCache* unicode_cache_;

	522

	523 // Buffers collecting literal strings, numbers, etc.

	524 LiteralBuffer literal_buffer1_;

	525 LiteralBuffer literal_buffer2_;

	526

	527 TokenDesc current_; // desc for current token (as returned by Next())

	528 TokenDesc next_; // desc for next token (one token look-ahead)

	529

	530 // Input stream. Must be initialized to an UC16CharacterStream.

	531 UC16CharacterStream* source_;

	532

	533

558 // Start position of the octal literal last scanned.	534 // Start position of the octal literal last scanned.

559 Location octal_pos_;	535 Location octal_pos_;

560	536

	537 // One Unicode character look-ahead; c0_ < 0 at the end of the input.

	538 uc32 c0_;

	539

561 // Whether there is a line terminator whitespace character after	540 // Whether there is a line terminator whitespace character after

562 // the current token, and before the next. Does not count newlines	541 // the current token, and before the next. Does not count newlines

563 // inside multiline comments.	542 // inside multiline comments.

564 bool has_line_terminator_before_next_;	543 bool has_line_terminator_before_next_;

565 // Whether there is a multi-line comment that contains a	544 // Whether there is a multi-line comment that contains a

566 // line-terminator after the current token, and before the next.	545 // line-terminator after the current token, and before the next.

567 bool has_multiline_comment_before_next_;	546 bool has_multiline_comment_before_next_;

568 // Whether we scan 'let' as a keyword for harmony block scoped	547 // Whether we scan 'let' as a keyword for harmony block scoped

569 // let bindings.	548 // let bindings.

570 bool harmony_scoping_;	549 bool harmony_scoping_;

571 };	550 };

572	551

573 } } // namespace v8::internal	552 } } // namespace v8::internal

574	553

575 #endif // V8_SCANNER_H_	554 #endif // V8_SCANNER_H_

OLD	NEW

« no previous file with comments | « src/preparser-api.cc ('k') | src/scanner.cc » ('j') | no next file with comments »