| OLD | NEW |
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 26 matching lines...) Expand all Loading... |
| 37 #include "hashmap.h" | 37 #include "hashmap.h" |
| 38 #include "list.h" | 38 #include "list.h" |
| 39 #include "token.h" | 39 #include "token.h" |
| 40 #include "unicode-inl.h" | 40 #include "unicode-inl.h" |
| 41 #include "utils.h" | 41 #include "utils.h" |
| 42 | 42 |
| 43 namespace v8 { | 43 namespace v8 { |
| 44 namespace internal { | 44 namespace internal { |
| 45 | 45 |
| 46 | 46 |
| 47 class ParserRecorder; |
| 48 |
| 49 |
| 47 // Returns the value (0 .. 15) of a hexadecimal character c. | 50 // Returns the value (0 .. 15) of a hexadecimal character c. |
| 48 // If c is not a legal hexadecimal character, returns a value < 0. | 51 // If c is not a legal hexadecimal character, returns a value < 0. |
| 49 inline int HexValue(uc32 c) { | 52 inline int HexValue(uc32 c) { |
| 50 c -= '0'; | 53 c -= '0'; |
| 51 if (static_cast<unsigned>(c) <= 9) return c; | 54 if (static_cast<unsigned>(c) <= 9) return c; |
| 52 c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36. | 55 c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36. |
| 53 if (static_cast<unsigned>(c) <= 5) return c + 10; | 56 if (static_cast<unsigned>(c) <= 5) return c + 10; |
| 54 return -1; | 57 return -1; |
| 55 } | 58 } |
| 56 | 59 |
| (...skipping 306 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 363 | 366 |
| 364 void Initialize(Utf16CharacterStream* source); | 367 void Initialize(Utf16CharacterStream* source); |
| 365 | 368 |
| 366 // Returns the next token and advances input. | 369 // Returns the next token and advances input. |
| 367 Token::Value Next(); | 370 Token::Value Next(); |
| 368 // Returns the current token again. | 371 // Returns the current token again. |
| 369 Token::Value current_token() { return current_.token; } | 372 Token::Value current_token() { return current_.token; } |
| 370 // Returns the location information for the current token | 373 // Returns the location information for the current token |
| 371 // (the token last returned by Next()). | 374 // (the token last returned by Next()). |
| 372 Location location() const { return current_.location; } | 375 Location location() const { return current_.location; } |
| 373 // Returns the literal string, if any, for the current token (the | 376 |
| 374 // token last returned by Next()). The string is 0-terminated. | 377 // Similar functions for the upcoming token. |
| 375 // Literal strings are collected for identifiers, strings, and | 378 |
| 376 // numbers. | 379 // One token look-ahead (past the token returned by Next()). |
| 377 // These functions only give the correct result if the literal | 380 Token::Value peek() const { return next_.token; } |
| 378 // was scanned between calls to StartLiteral() and TerminateLiteral(). | 381 |
| 379 Vector<const char> literal_one_byte_string() { | 382 Location peek_location() const { return next_.location; } |
| 380 ASSERT_NOT_NULL(current_.literal_chars); | |
| 381 return current_.literal_chars->one_byte_literal(); | |
| 382 } | |
| 383 Vector<const uc16> literal_utf16_string() { | |
| 384 ASSERT_NOT_NULL(current_.literal_chars); | |
| 385 return current_.literal_chars->utf16_literal(); | |
| 386 } | |
| 387 bool is_literal_one_byte() { | |
| 388 ASSERT_NOT_NULL(current_.literal_chars); | |
| 389 return current_.literal_chars->is_one_byte(); | |
| 390 } | |
| 391 bool is_literal_contextual_keyword(Vector<const char> keyword) { | |
| 392 ASSERT_NOT_NULL(current_.literal_chars); | |
| 393 return current_.literal_chars->is_contextual_keyword(keyword); | |
| 394 } | |
| 395 int literal_length() const { | |
| 396 ASSERT_NOT_NULL(current_.literal_chars); | |
| 397 return current_.literal_chars->length(); | |
| 398 } | |
| 399 | 383 |
| 400 bool literal_contains_escapes() const { | 384 bool literal_contains_escapes() const { |
| 401 Location location = current_.location; | 385 Location location = current_.location; |
| 402 int source_length = (location.end_pos - location.beg_pos); | 386 int source_length = (location.end_pos - location.beg_pos); |
| 403 if (current_.token == Token::STRING) { | 387 if (current_.token == Token::STRING) { |
| 404 // Subtract delimiters. | 388 // Subtract delimiters. |
| 405 source_length -= 2; | 389 source_length -= 2; |
| 406 } | 390 } |
| 407 return current_.literal_chars->length() != source_length; | 391 return current_.literal_chars->length() != source_length; |
| 408 } | 392 } |
| 409 | 393 bool is_literal_contextual_keyword(Vector<const char> keyword) { |
| 410 // Similar functions for the upcoming token. | 394 ASSERT_NOT_NULL(current_.literal_chars); |
| 411 | 395 return current_.literal_chars->is_contextual_keyword(keyword); |
| 412 // One token look-ahead (past the token returned by Next()). | |
| 413 Token::Value peek() const { return next_.token; } | |
| 414 | |
| 415 Location peek_location() const { return next_.location; } | |
| 416 | |
| 417 // Returns the literal string for the next token (the token that | |
| 418 // would be returned if Next() were called). | |
| 419 Vector<const char> next_literal_one_byte_string() { | |
| 420 ASSERT_NOT_NULL(next_.literal_chars); | |
| 421 return next_.literal_chars->one_byte_literal(); | |
| 422 } | |
| 423 Vector<const uc16> next_literal_utf16_string() { | |
| 424 ASSERT_NOT_NULL(next_.literal_chars); | |
| 425 return next_.literal_chars->utf16_literal(); | |
| 426 } | |
| 427 bool is_next_literal_one_byte() { | |
| 428 ASSERT_NOT_NULL(next_.literal_chars); | |
| 429 return next_.literal_chars->is_one_byte(); | |
| 430 } | 396 } |
| 431 bool is_next_contextual_keyword(Vector<const char> keyword) { | 397 bool is_next_contextual_keyword(Vector<const char> keyword) { |
| 432 ASSERT_NOT_NULL(next_.literal_chars); | 398 ASSERT_NOT_NULL(next_.literal_chars); |
| 433 return next_.literal_chars->is_contextual_keyword(keyword); | 399 return next_.literal_chars->is_contextual_keyword(keyword); |
| 434 } | 400 } |
| 435 int next_literal_length() const { | |
| 436 ASSERT_NOT_NULL(next_.literal_chars); | |
| 437 return next_.literal_chars->length(); | |
| 438 } | |
| 439 | 401 |
| 440 Handle<String> AllocateLiteralString(Isolate* isolate, PretenureFlag tenured); | |
| 441 Handle<String> AllocateNextLiteralString(Isolate* isolate, | 402 Handle<String> AllocateNextLiteralString(Isolate* isolate, |
| 442 PretenureFlag tenured); | 403 PretenureFlag tenured); |
| 443 Handle<String> AllocateInternalizedString(Isolate* isolate); | 404 Handle<String> AllocateInternalizedString(Isolate* isolate); |
| 444 | 405 |
| 445 double DoubleValue(); | 406 double DoubleValue(); |
| 446 bool UnescapedLiteralMatches(const char* data, int length) { | 407 bool UnescapedLiteralMatches(const char* data, int length) { |
| 447 if (is_literal_one_byte() && | 408 if (is_literal_one_byte() && |
| 448 literal_length() == length && | 409 literal_length() == length && |
| 449 !literal_contains_escapes()) { | 410 !literal_contains_escapes()) { |
| 450 return !strncmp(literal_one_byte_string().start(), data, length); | 411 return !strncmp(literal_one_byte_string().start(), data, length); |
| 451 } | 412 } |
| 452 return false; | 413 return false; |
| 453 } | 414 } |
| 454 void IsGetOrSet(bool* is_get, bool* is_set) { | 415 void IsGetOrSet(bool* is_get, bool* is_set) { |
| 455 if (is_literal_one_byte() && | 416 if (is_literal_one_byte() && |
| 456 literal_length() == 3 && | 417 literal_length() == 3 && |
| 457 !literal_contains_escapes()) { | 418 !literal_contains_escapes()) { |
| 458 const char* token = literal_one_byte_string().start(); | 419 const char* token = literal_one_byte_string().start(); |
| 459 *is_get = strncmp(token, "get", 3) == 0; | 420 *is_get = strncmp(token, "get", 3) == 0; |
| 460 *is_set = !*is_get && strncmp(token, "set", 3) == 0; | 421 *is_set = !*is_get && strncmp(token, "set", 3) == 0; |
| 461 } | 422 } |
| 462 } | 423 } |
| 463 | 424 |
| 425 int FindNumber(DuplicateFinder* finder, int value); |
| 426 int FindSymbol(DuplicateFinder* finder, int value); |
| 427 |
| 428 void LogSymbol(ParserRecorder* log, int position); |
| 429 |
| 464 UnicodeCache* unicode_cache() { return unicode_cache_; } | 430 UnicodeCache* unicode_cache() { return unicode_cache_; } |
| 465 | 431 |
| 466 static const int kCharacterLookaheadBufferSize = 1; | |
| 467 | |
| 468 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. | |
| 469 uc32 ScanOctalEscape(uc32 c, int length); | |
| 470 | |
| 471 // Returns the location of the last seen octal literal. | 432 // Returns the location of the last seen octal literal. |
| 472 Location octal_position() const { return octal_pos_; } | 433 Location octal_position() const { return octal_pos_; } |
| 473 void clear_octal_position() { octal_pos_ = Location::invalid(); } | 434 void clear_octal_position() { octal_pos_ = Location::invalid(); } |
| 474 | 435 |
| 475 // Seek forward to the given position. This operation does not | 436 // Seek forward to the given position. This operation does not |
| 476 // work in general, for instance when there are pushed back | 437 // work in general, for instance when there are pushed back |
| 477 // characters, but works for seeking forward until simple delimiter | 438 // characters, but works for seeking forward until simple delimiter |
| 478 // tokens, which is what it is used for. | 439 // tokens, which is what it is used for. |
| 479 void SeekForward(int pos); | 440 void SeekForward(int pos); |
| 480 | 441 |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 512 bool ScanRegExpFlags(); | 473 bool ScanRegExpFlags(); |
| 513 | 474 |
| 514 private: | 475 private: |
| 515 // The current and look-ahead token. | 476 // The current and look-ahead token. |
| 516 struct TokenDesc { | 477 struct TokenDesc { |
| 517 Token::Value token; | 478 Token::Value token; |
| 518 Location location; | 479 Location location; |
| 519 LiteralBuffer* literal_chars; | 480 LiteralBuffer* literal_chars; |
| 520 }; | 481 }; |
| 521 | 482 |
| 483 static const int kCharacterLookaheadBufferSize = 1; |
| 484 |
| 485 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. |
| 486 uc32 ScanOctalEscape(uc32 c, int length); |
| 487 |
| 522 // Call this after setting source_ to the input. | 488 // Call this after setting source_ to the input. |
| 523 void Init() { | 489 void Init() { |
| 524 // Set c0_ (one character ahead) | 490 // Set c0_ (one character ahead) |
| 525 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1); | 491 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1); |
| 526 Advance(); | 492 Advance(); |
| 527 // Initialize current_ to not refer to a literal. | 493 // Initialize current_ to not refer to a literal. |
| 528 current_.literal_chars = NULL; | 494 current_.literal_chars = NULL; |
| 529 } | 495 } |
| 530 | 496 |
| 531 // Literal buffer support | 497 // Literal buffer support |
| (...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 572 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) { | 538 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) { |
| 573 Advance(); | 539 Advance(); |
| 574 if (c0_ == next) { | 540 if (c0_ == next) { |
| 575 Advance(); | 541 Advance(); |
| 576 return then; | 542 return then; |
| 577 } else { | 543 } else { |
| 578 return else_; | 544 return else_; |
| 579 } | 545 } |
| 580 } | 546 } |
| 581 | 547 |
| 548 // Returns the literal string, if any, for the current token (the |
| 549 // token last returned by Next()). The string is 0-terminated. |
| 550 // Literal strings are collected for identifiers, strings, and |
| 551 // numbers. |
| 552 // These functions only give the correct result if the literal |
| 553 // was scanned between calls to StartLiteral() and TerminateLiteral(). |
| 554 Vector<const char> literal_one_byte_string() { |
| 555 ASSERT_NOT_NULL(current_.literal_chars); |
| 556 return current_.literal_chars->one_byte_literal(); |
| 557 } |
| 558 Vector<const uc16> literal_utf16_string() { |
| 559 ASSERT_NOT_NULL(current_.literal_chars); |
| 560 return current_.literal_chars->utf16_literal(); |
| 561 } |
| 562 bool is_literal_one_byte() { |
| 563 ASSERT_NOT_NULL(current_.literal_chars); |
| 564 return current_.literal_chars->is_one_byte(); |
| 565 } |
| 566 int literal_length() const { |
| 567 ASSERT_NOT_NULL(current_.literal_chars); |
| 568 return current_.literal_chars->length(); |
| 569 } |
| 570 // Returns the literal string for the next token (the token that |
| 571 // would be returned if Next() were called). |
| 572 Vector<const char> next_literal_one_byte_string() { |
| 573 ASSERT_NOT_NULL(next_.literal_chars); |
| 574 return next_.literal_chars->one_byte_literal(); |
| 575 } |
| 576 Vector<const uc16> next_literal_utf16_string() { |
| 577 ASSERT_NOT_NULL(next_.literal_chars); |
| 578 return next_.literal_chars->utf16_literal(); |
| 579 } |
| 580 bool is_next_literal_one_byte() { |
| 581 ASSERT_NOT_NULL(next_.literal_chars); |
| 582 return next_.literal_chars->is_one_byte(); |
| 583 } |
| 584 int next_literal_length() const { |
| 585 ASSERT_NOT_NULL(next_.literal_chars); |
| 586 return next_.literal_chars->length(); |
| 587 } |
| 588 |
| 582 uc32 ScanHexNumber(int expected_length); | 589 uc32 ScanHexNumber(int expected_length); |
| 583 | 590 |
| 584 // Scans a single JavaScript token. | 591 // Scans a single JavaScript token. |
| 585 void Scan(); | 592 void Scan(); |
| 586 | 593 |
| 587 bool SkipWhiteSpace(); | 594 bool SkipWhiteSpace(); |
| 588 Token::Value SkipSingleLineComment(); | 595 Token::Value SkipSingleLineComment(); |
| 589 Token::Value SkipMultiLineComment(); | 596 Token::Value SkipMultiLineComment(); |
| 590 // Scans a possible HTML comment -- begins with '<!'. | 597 // Scans a possible HTML comment -- begins with '<!'. |
| 591 Token::Value ScanHtmlComment(); | 598 Token::Value ScanHtmlComment(); |
| (...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 644 bool harmony_scoping_; | 651 bool harmony_scoping_; |
| 645 // Whether we scan 'module', 'import', 'export' as keywords. | 652 // Whether we scan 'module', 'import', 'export' as keywords. |
| 646 bool harmony_modules_; | 653 bool harmony_modules_; |
| 647 // Whether we scan 0o777 and 0b111 as numbers. | 654 // Whether we scan 0o777 and 0b111 as numbers. |
| 648 bool harmony_numeric_literals_; | 655 bool harmony_numeric_literals_; |
| 649 }; | 656 }; |
| 650 | 657 |
| 651 } } // namespace v8::internal | 658 } } // namespace v8::internal |
| 652 | 659 |
| 653 #endif // V8_SCANNER_H_ | 660 #endif // V8_SCANNER_H_ |
| OLD | NEW |