| OLD | NEW |
| 1 // Copyright 2010 the V8 project authors. All rights reserved. | 1 // Copyright 2010 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| 11 // with the distribution. | 11 // with the distribution. |
| 12 // * Neither the name of Google Inc. nor the names of its | 12 // * Neither the name of Google Inc. nor the names of its |
| 13 // contributors may be used to endorse or promote products derived | 13 // contributors may be used to endorse or promote products derived |
| 14 // from this software without specific prior written permission. | 14 // from this software without specific prior written permission. |
| 15 // | 15 // |
| 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 27 | 27 |
| 28 // Features shared by parsing and pre-parsing scanners. | 28 // Features shared by parsing and pre-parsing scanners. |
| 29 | 29 |
| 30 #include "v8.h" |
| 31 |
| 32 /* |
| 33 TODO(isolates): I incldue v8.h instead of these because we need Isolate and |
| 34 some classes (NativeAllocationChecker) are moved into isolate.h |
| 30 #include "../include/v8stdint.h" | 35 #include "../include/v8stdint.h" |
| 36 */ |
| 31 #include "scanner-base.h" | 37 #include "scanner-base.h" |
| 32 #include "char-predicates-inl.h" | 38 #include "char-predicates-inl.h" |
| 33 | 39 |
| 34 namespace v8 { | 40 namespace v8 { |
| 35 namespace internal { | 41 namespace internal { |
| 36 | 42 |
| 37 // ---------------------------------------------------------------------------- | 43 // ---------------------------------------------------------------------------- |
| 38 // Character predicates | |
| 39 | |
| 40 unibrow::Predicate<IdentifierStart, 128> ScannerConstants::kIsIdentifierStart; | |
| 41 unibrow::Predicate<IdentifierPart, 128> ScannerConstants::kIsIdentifierPart; | |
| 42 unibrow::Predicate<unibrow::WhiteSpace, 128> ScannerConstants::kIsWhiteSpace; | |
| 43 unibrow::Predicate<unibrow::LineTerminator, 128> | |
| 44 ScannerConstants::kIsLineTerminator; | |
| 45 | |
| 46 StaticResource<ScannerConstants::Utf8Decoder> ScannerConstants::utf8_decoder_; | |
| 47 | |
| 48 // Compound predicates. | 44 // Compound predicates. |
| 49 | 45 |
| 50 bool ScannerConstants::IsIdentifier(unibrow::CharacterStream* buffer) { | 46 bool ScannerConstants::IsIdentifier(unibrow::CharacterStream* buffer) { |
| 51 // Checks whether the buffer contains an identifier (no escape). | 47 // Checks whether the buffer contains an identifier (no escape). |
| 52 if (!buffer->has_more()) return false; | 48 if (!buffer->has_more()) return false; |
| 53 if (!kIsIdentifierStart.get(buffer->GetNext())) { | 49 if (!kIsIdentifierStart.get(buffer->GetNext())) { |
| 54 return false; | 50 return false; |
| 55 } | 51 } |
| 56 while (buffer->has_more()) { | 52 while (buffer->has_more()) { |
| 57 if (!kIsIdentifierPart.get(buffer->GetNext())) { | 53 if (!kIsIdentifierPart.get(buffer->GetNext())) { |
| 58 return false; | 54 return false; |
| 59 } | 55 } |
| 60 } | 56 } |
| 61 return true; | 57 return true; |
| 62 } | 58 } |
| 63 | 59 |
| 64 // ---------------------------------------------------------------------------- | 60 // ---------------------------------------------------------------------------- |
| 65 // Scanner | 61 // Scanner |
| 66 | 62 |
| 67 Scanner::Scanner() | 63 Scanner::Scanner(Isolate* isolate) |
| 68 : octal_pos_(kNoOctalLocation) { } | 64 : scanner_constants_(isolate->scanner_constants()), |
| 65 octal_pos_(kNoOctalLocation) { |
| 66 } |
| 69 | 67 |
| 70 | 68 |
| 71 uc32 Scanner::ScanHexEscape(uc32 c, int length) { | 69 uc32 Scanner::ScanHexEscape(uc32 c, int length) { |
| 72 ASSERT(length <= 4); // prevent overflow | 70 ASSERT(length <= 4); // prevent overflow |
| 73 | 71 |
| 74 uc32 digits[4]; | 72 uc32 digits[4]; |
| 75 uc32 x = 0; | 73 uc32 x = 0; |
| 76 for (int i = 0; i < length; i++) { | 74 for (int i = 0; i < length; i++) { |
| 77 digits[i] = c0_; | 75 digits[i] = c0_; |
| 78 int d = HexValue(c0_); | 76 int d = HexValue(c0_); |
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 115 if (c != '0' || i > 0) { | 113 if (c != '0' || i > 0) { |
| 116 octal_pos_ = source_pos() - i - 1; // Already advanced | 114 octal_pos_ = source_pos() - i - 1; // Already advanced |
| 117 } | 115 } |
| 118 return x; | 116 return x; |
| 119 } | 117 } |
| 120 | 118 |
| 121 | 119 |
| 122 // ---------------------------------------------------------------------------- | 120 // ---------------------------------------------------------------------------- |
| 123 // JavaScriptScanner | 121 // JavaScriptScanner |
| 124 | 122 |
| 125 JavaScriptScanner::JavaScriptScanner() : Scanner() {} | 123 JavaScriptScanner::JavaScriptScanner(Isolate* isolate) : Scanner(isolate) {} |
| 126 | 124 |
| 127 | 125 |
| 128 Token::Value JavaScriptScanner::Next() { | 126 Token::Value JavaScriptScanner::Next() { |
| 129 current_ = next_; | 127 current_ = next_; |
| 130 has_line_terminator_before_next_ = false; | 128 has_line_terminator_before_next_ = false; |
| 131 Scan(); | 129 Scan(); |
| 132 return current_.token; | 130 return current_.token; |
| 133 } | 131 } |
| 134 | 132 |
| 135 | 133 |
| 136 static inline bool IsByteOrderMark(uc32 c) { | 134 static inline bool IsByteOrderMark(uc32 c) { |
| 137 // The Unicode value U+FFFE is guaranteed never to be assigned as a | 135 // The Unicode value U+FFFE is guaranteed never to be assigned as a |
| 138 // Unicode character; this implies that in a Unicode context the | 136 // Unicode character; this implies that in a Unicode context the |
| 139 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF | 137 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF |
| 140 // character expressed in little-endian byte order (since it could | 138 // character expressed in little-endian byte order (since it could |
| 141 // not be a U+FFFE character expressed in big-endian byte | 139 // not be a U+FFFE character expressed in big-endian byte |
| 142 // order). Nevertheless, we check for it to be compatible with | 140 // order). Nevertheless, we check for it to be compatible with |
| 143 // Spidermonkey. | 141 // Spidermonkey. |
| 144 return c == 0xFEFF || c == 0xFFFE; | 142 return c == 0xFEFF || c == 0xFFFE; |
| 145 } | 143 } |
| 146 | 144 |
| 147 | 145 |
| 148 bool JavaScriptScanner::SkipWhiteSpace() { | 146 bool JavaScriptScanner::SkipWhiteSpace() { |
| 149 int start_position = source_pos(); | 147 int start_position = source_pos(); |
| 150 | 148 |
| 151 while (true) { | 149 while (true) { |
| 152 // We treat byte-order marks (BOMs) as whitespace for better | 150 // We treat byte-order marks (BOMs) as whitespace for better |
| 153 // compatibility with Spidermonkey and other JavaScript engines. | 151 // compatibility with Spidermonkey and other JavaScript engines. |
| 154 while (ScannerConstants::kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) { | 152 while (scanner_constants_->IsWhiteSpace(c0_) || IsByteOrderMark(c0_)) { |
| 155 // IsWhiteSpace() includes line terminators! | 153 // IsWhiteSpace() includes line terminators! |
| 156 if (ScannerConstants::kIsLineTerminator.get(c0_)) { | 154 if (scanner_constants_->IsLineTerminator(c0_)) { |
| 157 // Ignore line terminators, but remember them. This is necessary | 155 // Ignore line terminators, but remember them. This is necessary |
| 158 // for automatic semicolon insertion. | 156 // for automatic semicolon insertion. |
| 159 has_line_terminator_before_next_ = true; | 157 has_line_terminator_before_next_ = true; |
| 160 } | 158 } |
| 161 Advance(); | 159 Advance(); |
| 162 } | 160 } |
| 163 | 161 |
| 164 // If there is an HTML comment end '-->' at the beginning of a | 162 // If there is an HTML comment end '-->' at the beginning of a |
| 165 // line (with only whitespace in front of it), we treat the rest | 163 // line (with only whitespace in front of it), we treat the rest |
| 166 // of the line as a comment. This is in line with the way | 164 // of the line as a comment. This is in line with the way |
| (...skipping 19 matching lines...) Expand all Loading... |
| 186 | 184 |
| 187 | 185 |
| 188 Token::Value JavaScriptScanner::SkipSingleLineComment() { | 186 Token::Value JavaScriptScanner::SkipSingleLineComment() { |
| 189 Advance(); | 187 Advance(); |
| 190 | 188 |
| 191 // The line terminator at the end of the line is not considered | 189 // The line terminator at the end of the line is not considered |
| 192 // to be part of the single-line comment; it is recognized | 190 // to be part of the single-line comment; it is recognized |
| 193 // separately by the lexical grammar and becomes part of the | 191 // separately by the lexical grammar and becomes part of the |
| 194 // stream of input elements for the syntactic grammar (see | 192 // stream of input elements for the syntactic grammar (see |
| 195 // ECMA-262, section 7.4, page 12). | 193 // ECMA-262, section 7.4, page 12). |
| 196 while (c0_ >= 0 && !ScannerConstants::kIsLineTerminator.get(c0_)) { | 194 while (c0_ >= 0 && !scanner_constants_->IsLineTerminator(c0_)) { |
| 197 Advance(); | 195 Advance(); |
| 198 } | 196 } |
| 199 | 197 |
| 200 return Token::WHITESPACE; | 198 return Token::WHITESPACE; |
| 201 } | 199 } |
| 202 | 200 |
| 203 | 201 |
| 204 Token::Value JavaScriptScanner::SkipMultiLineComment() { | 202 Token::Value JavaScriptScanner::SkipMultiLineComment() { |
| 205 ASSERT(c0_ == '*'); | 203 ASSERT(c0_ == '*'); |
| 206 Advance(); | 204 Advance(); |
| (...skipping 244 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 451 | 449 |
| 452 case '?': | 450 case '?': |
| 453 token = Select(Token::CONDITIONAL); | 451 token = Select(Token::CONDITIONAL); |
| 454 break; | 452 break; |
| 455 | 453 |
| 456 case '~': | 454 case '~': |
| 457 token = Select(Token::BIT_NOT); | 455 token = Select(Token::BIT_NOT); |
| 458 break; | 456 break; |
| 459 | 457 |
| 460 default: | 458 default: |
| 461 if (ScannerConstants::kIsIdentifierStart.get(c0_)) { | 459 if (scanner_constants_->IsIdentifierStart(c0_)) { |
| 462 token = ScanIdentifierOrKeyword(); | 460 token = ScanIdentifierOrKeyword(); |
| 463 } else if (IsDecimalDigit(c0_)) { | 461 } else if (IsDecimalDigit(c0_)) { |
| 464 token = ScanNumber(false); | 462 token = ScanNumber(false); |
| 465 } else if (SkipWhiteSpace()) { | 463 } else if (SkipWhiteSpace()) { |
| 466 token = Token::WHITESPACE; | 464 token = Token::WHITESPACE; |
| 467 } else if (c0_ < 0) { | 465 } else if (c0_ < 0) { |
| 468 token = Token::EOS; | 466 token = Token::EOS; |
| 469 } else { | 467 } else { |
| 470 token = Select(Token::ILLEGAL); | 468 token = Select(Token::ILLEGAL); |
| 471 } | 469 } |
| (...skipping 27 matching lines...) Expand all Loading... |
| 499 } | 497 } |
| 500 Scan(); | 498 Scan(); |
| 501 } | 499 } |
| 502 | 500 |
| 503 | 501 |
| 504 void JavaScriptScanner::ScanEscape() { | 502 void JavaScriptScanner::ScanEscape() { |
| 505 uc32 c = c0_; | 503 uc32 c = c0_; |
| 506 Advance(); | 504 Advance(); |
| 507 | 505 |
| 508 // Skip escaped newlines. | 506 // Skip escaped newlines. |
| 509 if (ScannerConstants::kIsLineTerminator.get(c)) { | 507 if (scanner_constants_->IsLineTerminator(c)) { |
| 510 // Allow CR+LF newlines in multiline string literals. | 508 // Allow CR+LF newlines in multiline string literals. |
| 511 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); | 509 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); |
| 512 // Allow LF+CR newlines in multiline string literals. | 510 // Allow LF+CR newlines in multiline string literals. |
| 513 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); | 511 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); |
| 514 return; | 512 return; |
| 515 } | 513 } |
| 516 | 514 |
| 517 switch (c) { | 515 switch (c) { |
| 518 case '\'': // fall through | 516 case '\'': // fall through |
| 519 case '"' : // fall through | 517 case '"' : // fall through |
| (...skipping 22 matching lines...) Expand all Loading... |
| 542 AddLiteralChar(c); | 540 AddLiteralChar(c); |
| 543 } | 541 } |
| 544 | 542 |
| 545 | 543 |
| 546 Token::Value JavaScriptScanner::ScanString() { | 544 Token::Value JavaScriptScanner::ScanString() { |
| 547 uc32 quote = c0_; | 545 uc32 quote = c0_; |
| 548 Advance(); // consume quote | 546 Advance(); // consume quote |
| 549 | 547 |
| 550 LiteralScope literal(this); | 548 LiteralScope literal(this); |
| 551 while (c0_ != quote && c0_ >= 0 | 549 while (c0_ != quote && c0_ >= 0 |
| 552 && !ScannerConstants::kIsLineTerminator.get(c0_)) { | 550 && !scanner_constants_->IsLineTerminator(c0_)) { |
| 553 uc32 c = c0_; | 551 uc32 c = c0_; |
| 554 Advance(); | 552 Advance(); |
| 555 if (c == '\\') { | 553 if (c == '\\') { |
| 556 if (c0_ < 0) return Token::ILLEGAL; | 554 if (c0_ < 0) return Token::ILLEGAL; |
| 557 ScanEscape(); | 555 ScanEscape(); |
| 558 } else { | 556 } else { |
| 559 AddLiteralChar(c); | 557 AddLiteralChar(c); |
| 560 } | 558 } |
| 561 } | 559 } |
| 562 if (c0_ != quote) return Token::ILLEGAL; | 560 if (c0_ != quote) return Token::ILLEGAL; |
| (...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 641 // we must have at least one decimal digit after 'e'/'E' | 639 // we must have at least one decimal digit after 'e'/'E' |
| 642 return Token::ILLEGAL; | 640 return Token::ILLEGAL; |
| 643 } | 641 } |
| 644 ScanDecimalDigits(); | 642 ScanDecimalDigits(); |
| 645 } | 643 } |
| 646 | 644 |
| 647 // The source character immediately following a numeric literal must | 645 // The source character immediately following a numeric literal must |
| 648 // not be an identifier start or a decimal digit; see ECMA-262 | 646 // not be an identifier start or a decimal digit; see ECMA-262 |
| 649 // section 7.8.3, page 17 (note that we read only one decimal digit | 647 // section 7.8.3, page 17 (note that we read only one decimal digit |
| 650 // if the value is 0). | 648 // if the value is 0). |
| 651 if (IsDecimalDigit(c0_) || ScannerConstants::kIsIdentifierStart.get(c0_)) | 649 if (IsDecimalDigit(c0_) || scanner_constants_->IsIdentifierStart(c0_)) |
| 652 return Token::ILLEGAL; | 650 return Token::ILLEGAL; |
| 653 | 651 |
| 654 literal.Complete(); | 652 literal.Complete(); |
| 655 | 653 |
| 656 return Token::NUMBER; | 654 return Token::NUMBER; |
| 657 } | 655 } |
| 658 | 656 |
| 659 | 657 |
| 660 uc32 JavaScriptScanner::ScanIdentifierUnicodeEscape() { | 658 uc32 JavaScriptScanner::ScanIdentifierUnicodeEscape() { |
| 661 Advance(); | 659 Advance(); |
| 662 if (c0_ != 'u') return unibrow::Utf8::kBadChar; | 660 if (c0_ != 'u') return unibrow::Utf8::kBadChar; |
| 663 Advance(); | 661 Advance(); |
| 664 uc32 c = ScanHexEscape('u', 4); | 662 uc32 c = ScanHexEscape('u', 4); |
| 665 // We do not allow a unicode escape sequence to start another | 663 // We do not allow a unicode escape sequence to start another |
| 666 // unicode escape sequence. | 664 // unicode escape sequence. |
| 667 if (c == '\\') return unibrow::Utf8::kBadChar; | 665 if (c == '\\') return unibrow::Utf8::kBadChar; |
| 668 return c; | 666 return c; |
| 669 } | 667 } |
| 670 | 668 |
| 671 | 669 |
| 672 Token::Value JavaScriptScanner::ScanIdentifierOrKeyword() { | 670 Token::Value JavaScriptScanner::ScanIdentifierOrKeyword() { |
| 673 ASSERT(ScannerConstants::kIsIdentifierStart.get(c0_)); | 671 ASSERT(scanner_constants_->IsIdentifierStart(c0_)); |
| 674 LiteralScope literal(this); | 672 LiteralScope literal(this); |
| 675 KeywordMatcher keyword_match; | 673 KeywordMatcher keyword_match; |
| 676 // Scan identifier start character. | 674 // Scan identifier start character. |
| 677 if (c0_ == '\\') { | 675 if (c0_ == '\\') { |
| 678 uc32 c = ScanIdentifierUnicodeEscape(); | 676 uc32 c = ScanIdentifierUnicodeEscape(); |
| 679 // Only allow legal identifier start characters. | 677 // Only allow legal identifier start characters. |
| 680 if (!ScannerConstants::kIsIdentifierStart.get(c)) return Token::ILLEGAL; | 678 if (!scanner_constants_->IsIdentifierStart(c)) return Token::ILLEGAL; |
| 681 AddLiteralChar(c); | 679 AddLiteralChar(c); |
| 682 return ScanIdentifierSuffix(&literal); | 680 return ScanIdentifierSuffix(&literal); |
| 683 } | 681 } |
| 684 | 682 |
| 685 uc32 first_char = c0_; | 683 uc32 first_char = c0_; |
| 686 Advance(); | 684 Advance(); |
| 687 AddLiteralChar(first_char); | 685 AddLiteralChar(first_char); |
| 688 if (!keyword_match.AddChar(first_char)) { | 686 if (!keyword_match.AddChar(first_char)) { |
| 689 return ScanIdentifierSuffix(&literal); | 687 return ScanIdentifierSuffix(&literal); |
| 690 } | 688 } |
| 691 | 689 |
| 692 // Scan the rest of the identifier characters. | 690 // Scan the rest of the identifier characters. |
| 693 while (ScannerConstants::kIsIdentifierPart.get(c0_)) { | 691 while (scanner_constants_->IsIdentifierPart(c0_)) { |
| 694 if (c0_ != '\\') { | 692 if (c0_ != '\\') { |
| 695 uc32 next_char = c0_; | 693 uc32 next_char = c0_; |
| 696 Advance(); | 694 Advance(); |
| 697 AddLiteralChar(next_char); | 695 AddLiteralChar(next_char); |
| 698 if (keyword_match.AddChar(next_char)) continue; | 696 if (keyword_match.AddChar(next_char)) continue; |
| 699 } | 697 } |
| 700 // Fallthrough if no loner able to complete keyword. | 698 // Fallthrough if no loner able to complete keyword. |
| 701 return ScanIdentifierSuffix(&literal); | 699 return ScanIdentifierSuffix(&literal); |
| 702 } | 700 } |
| 703 literal.Complete(); | 701 literal.Complete(); |
| 704 | 702 |
| 705 return keyword_match.token(); | 703 return keyword_match.token(); |
| 706 } | 704 } |
| 707 | 705 |
| 708 | 706 |
| 709 Token::Value JavaScriptScanner::ScanIdentifierSuffix(LiteralScope* literal) { | 707 Token::Value JavaScriptScanner::ScanIdentifierSuffix(LiteralScope* literal) { |
| 710 // Scan the rest of the identifier characters. | 708 // Scan the rest of the identifier characters. |
| 711 while (ScannerConstants::kIsIdentifierPart.get(c0_)) { | 709 while (scanner_constants_->IsIdentifierPart(c0_)) { |
| 712 if (c0_ == '\\') { | 710 if (c0_ == '\\') { |
| 713 uc32 c = ScanIdentifierUnicodeEscape(); | 711 uc32 c = ScanIdentifierUnicodeEscape(); |
| 714 // Only allow legal identifier part characters. | 712 // Only allow legal identifier part characters. |
| 715 if (!ScannerConstants::kIsIdentifierPart.get(c)) return Token::ILLEGAL; | 713 if (!scanner_constants_->IsIdentifierPart(c)) return Token::ILLEGAL; |
| 716 AddLiteralChar(c); | 714 AddLiteralChar(c); |
| 717 } else { | 715 } else { |
| 718 AddLiteralChar(c0_); | 716 AddLiteralChar(c0_); |
| 719 Advance(); | 717 Advance(); |
| 720 } | 718 } |
| 721 } | 719 } |
| 722 literal->Complete(); | 720 literal->Complete(); |
| 723 | 721 |
| 724 return Token::IDENTIFIER; | 722 return Token::IDENTIFIER; |
| 725 } | 723 } |
| 726 | 724 |
| 727 | 725 |
| 728 bool JavaScriptScanner::ScanRegExpPattern(bool seen_equal) { | 726 bool JavaScriptScanner::ScanRegExpPattern(bool seen_equal) { |
| 729 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags | 727 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags |
| 730 bool in_character_class = false; | 728 bool in_character_class = false; |
| 731 | 729 |
| 732 // Previous token is either '/' or '/=', in the second case, the | 730 // Previous token is either '/' or '/=', in the second case, the |
| 733 // pattern starts at =. | 731 // pattern starts at =. |
| 734 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); | 732 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); |
| 735 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); | 733 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); |
| 736 | 734 |
| 737 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, | 735 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, |
| 738 // the scanner should pass uninterpreted bodies to the RegExp | 736 // the scanner should pass uninterpreted bodies to the RegExp |
| 739 // constructor. | 737 // constructor. |
| 740 LiteralScope literal(this); | 738 LiteralScope literal(this); |
| 741 if (seen_equal) | 739 if (seen_equal) |
| 742 AddLiteralChar('='); | 740 AddLiteralChar('='); |
| 743 | 741 |
| 744 while (c0_ != '/' || in_character_class) { | 742 while (c0_ != '/' || in_character_class) { |
| 745 if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false; | 743 if (scanner_constants_->IsLineTerminator(c0_) || c0_ < 0) return false; |
| 746 if (c0_ == '\\') { // Escape sequence. | 744 if (c0_ == '\\') { // Escape sequence. |
| 747 AddLiteralCharAdvance(); | 745 AddLiteralCharAdvance(); |
| 748 if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false; | 746 if (scanner_constants_->IsLineTerminator(c0_) || c0_ < 0) return false; |
| 749 AddLiteralCharAdvance(); | 747 AddLiteralCharAdvance(); |
| 750 // If the escape allows more characters, i.e., \x??, \u????, or \c?, | 748 // If the escape allows more characters, i.e., \x??, \u????, or \c?, |
| 751 // only "safe" characters are allowed (letters, digits, underscore), | 749 // only "safe" characters are allowed (letters, digits, underscore), |
| 752 // otherwise the escape isn't valid and the invalid character has | 750 // otherwise the escape isn't valid and the invalid character has |
| 753 // its normal meaning. I.e., we can just continue scanning without | 751 // its normal meaning. I.e., we can just continue scanning without |
| 754 // worrying whether the following characters are part of the escape | 752 // worrying whether the following characters are part of the escape |
| 755 // or not, since any '/', '\\' or '[' is guaranteed to not be part | 753 // or not, since any '/', '\\' or '[' is guaranteed to not be part |
| 756 // of the escape sequence. | 754 // of the escape sequence. |
| 757 } else { // Unescaped character. | 755 } else { // Unescaped character. |
| 758 if (c0_ == '[') in_character_class = true; | 756 if (c0_ == '[') in_character_class = true; |
| 759 if (c0_ == ']') in_character_class = false; | 757 if (c0_ == ']') in_character_class = false; |
| 760 AddLiteralCharAdvance(); | 758 AddLiteralCharAdvance(); |
| 761 } | 759 } |
| 762 } | 760 } |
| 763 Advance(); // consume '/' | 761 Advance(); // consume '/' |
| 764 | 762 |
| 765 literal.Complete(); | 763 literal.Complete(); |
| 766 | 764 |
| 767 return true; | 765 return true; |
| 768 } | 766 } |
| 769 | 767 |
| 770 | 768 |
| 771 bool JavaScriptScanner::ScanRegExpFlags() { | 769 bool JavaScriptScanner::ScanRegExpFlags() { |
| 772 // Scan regular expression flags. | 770 // Scan regular expression flags. |
| 773 LiteralScope literal(this); | 771 LiteralScope literal(this); |
| 774 while (ScannerConstants::kIsIdentifierPart.get(c0_)) { | 772 while (scanner_constants_->IsIdentifierPart(c0_)) { |
| 775 if (c0_ == '\\') { | 773 if (c0_ == '\\') { |
| 776 uc32 c = ScanIdentifierUnicodeEscape(); | 774 uc32 c = ScanIdentifierUnicodeEscape(); |
| 777 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) { | 775 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) { |
| 778 // We allow any escaped character, unlike the restriction on | 776 // We allow any escaped character, unlike the restriction on |
| 779 // IdentifierPart when it is used to build an IdentifierName. | 777 // IdentifierPart when it is used to build an IdentifierName. |
| 780 AddLiteralChar(c); | 778 AddLiteralChar(c); |
| 781 continue; | 779 continue; |
| 782 } | 780 } |
| 783 } | 781 } |
| 784 AddLiteralCharAdvance(); | 782 AddLiteralCharAdvance(); |
| (...skipping 177 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 962 if (MatchKeywordStart(input, "with", 1, Token::WITH)) return; | 960 if (MatchKeywordStart(input, "with", 1, Token::WITH)) return; |
| 963 break; | 961 break; |
| 964 case UNMATCHABLE: | 962 case UNMATCHABLE: |
| 965 break; | 963 break; |
| 966 } | 964 } |
| 967 // On fallthrough, it's a failure. | 965 // On fallthrough, it's a failure. |
| 968 state_ = UNMATCHABLE; | 966 state_ = UNMATCHABLE; |
| 969 } | 967 } |
| 970 | 968 |
| 971 } } // namespace v8::internal | 969 } } // namespace v8::internal |
| OLD | NEW |