| OLD | NEW |
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Features shared by parsing and pre-parsing scanners. | 5 // Features shared by parsing and pre-parsing scanners. |
| 6 | 6 |
| 7 #include "src/parsing/scanner.h" | 7 #include "src/parsing/scanner.h" |
| 8 | 8 |
| 9 #include <stdint.h> | 9 #include <stdint.h> |
| 10 | 10 |
| 11 #include <cmath> | 11 #include <cmath> |
| 12 | 12 |
| 13 #include "src/ast/ast-value-factory.h" | 13 #include "src/ast/ast-value-factory.h" |
| 14 #include "src/char-predicates-inl.h" | 14 #include "src/char-predicates-inl.h" |
| 15 #include "src/conversions-inl.h" | 15 #include "src/conversions-inl.h" |
| 16 #include "src/list-inl.h" | 16 #include "src/list-inl.h" |
| 17 #include "src/parsing/duplicate-finder.h" // For Scanner::FindSymbol | 17 #include "src/parsing/duplicate-finder.h" // For Scanner::FindSymbol |
| 18 | 18 |
| 19 namespace v8 { | 19 namespace v8 { |
| 20 namespace internal { | 20 namespace internal { |
| 21 | 21 |
| 22 const size_t Utf16CharacterStream::kNoBookmark = |
| 23 std::numeric_limits<size_t>::max(); |
| 24 |
| 22 Handle<String> Scanner::LiteralBuffer::Internalize(Isolate* isolate) const { | 25 Handle<String> Scanner::LiteralBuffer::Internalize(Isolate* isolate) const { |
| 23 if (is_one_byte()) { | 26 if (is_one_byte()) { |
| 24 return isolate->factory()->InternalizeOneByteString(one_byte_literal()); | 27 return isolate->factory()->InternalizeOneByteString(one_byte_literal()); |
| 25 } | 28 } |
| 26 return isolate->factory()->InternalizeTwoByteString(two_byte_literal()); | 29 return isolate->factory()->InternalizeTwoByteString(two_byte_literal()); |
| 27 } | 30 } |
| 28 | 31 |
| 29 | 32 |
| 30 // Default implementation for streams that do not support bookmarks. | |
| 31 bool Utf16CharacterStream::SetBookmark() { return false; } | |
| 32 void Utf16CharacterStream::ResetToBookmark() { UNREACHABLE(); } | |
| 33 | |
| 34 | 33 |
| 35 // ---------------------------------------------------------------------------- | 34 // ---------------------------------------------------------------------------- |
| 36 // Scanner | 35 // Scanner |
| 37 | 36 |
| 38 Scanner::Scanner(UnicodeCache* unicode_cache) | 37 Scanner::Scanner(UnicodeCache* unicode_cache) |
| 39 : unicode_cache_(unicode_cache), | 38 : unicode_cache_(unicode_cache), |
| 40 bookmark_c0_(kNoBookmark), | 39 bookmark_c0_(kNoBookmark), |
| 41 octal_pos_(Location::invalid()), | 40 octal_pos_(Location::invalid()), |
| 42 decimal_with_leading_zero_pos_(Location::invalid()), | 41 decimal_with_leading_zero_pos_(Location::invalid()), |
| 43 found_html_comment_(false) { | 42 found_html_comment_(false) { |
| (...skipping 261 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 305 return c == 0xFFFE; | 304 return c == 0xFFFE; |
| 306 } | 305 } |
| 307 | 306 |
| 308 | 307 |
| 309 bool Scanner::SkipWhiteSpace() { | 308 bool Scanner::SkipWhiteSpace() { |
| 310 int start_position = source_pos(); | 309 int start_position = source_pos(); |
| 311 | 310 |
| 312 while (true) { | 311 while (true) { |
| 313 while (true) { | 312 while (true) { |
| 314 // The unicode cache accepts unsigned inputs. | 313 // The unicode cache accepts unsigned inputs. |
| 315 if (c0_ < 0) break; | 314 if (c0_ == kEndOfInput) break; |
| 316 // Advance as long as character is a WhiteSpace or LineTerminator. | 315 // Advance as long as character is a WhiteSpace or LineTerminator. |
| 317 // Remember if the latter is the case. | 316 // Remember if the latter is the case. |
| 318 if (unicode_cache_->IsLineTerminator(c0_)) { | 317 if (unicode_cache_->IsLineTerminator(c0_)) { |
| 319 has_line_terminator_before_next_ = true; | 318 has_line_terminator_before_next_ = true; |
| 320 } else if (!unicode_cache_->IsWhiteSpace(c0_) && | 319 } else if (!unicode_cache_->IsWhiteSpace(c0_) && |
| 321 !IsLittleEndianByteOrderMark(c0_)) { | 320 !IsLittleEndianByteOrderMark(c0_)) { |
| 322 break; | 321 break; |
| 323 } | 322 } |
| 324 Advance(); | 323 Advance(); |
| 325 } | 324 } |
| (...skipping 23 matching lines...) Expand all Loading... |
| 349 | 348 |
| 350 | 349 |
| 351 Token::Value Scanner::SkipSingleLineComment() { | 350 Token::Value Scanner::SkipSingleLineComment() { |
| 352 Advance(); | 351 Advance(); |
| 353 | 352 |
| 354 // The line terminator at the end of the line is not considered | 353 // The line terminator at the end of the line is not considered |
| 355 // to be part of the single-line comment; it is recognized | 354 // to be part of the single-line comment; it is recognized |
| 356 // separately by the lexical grammar and becomes part of the | 355 // separately by the lexical grammar and becomes part of the |
| 357 // stream of input elements for the syntactic grammar (see | 356 // stream of input elements for the syntactic grammar (see |
| 358 // ECMA-262, section 7.4). | 357 // ECMA-262, section 7.4). |
| 359 while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) { | 358 while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) { |
| 360 Advance(); | 359 Advance(); |
| 361 } | 360 } |
| 362 | 361 |
| 363 return Token::WHITESPACE; | 362 return Token::WHITESPACE; |
| 364 } | 363 } |
| 365 | 364 |
| 366 | 365 |
| 367 Token::Value Scanner::SkipSourceURLComment() { | 366 Token::Value Scanner::SkipSourceURLComment() { |
| 368 TryToParseSourceURLComment(); | 367 TryToParseSourceURLComment(); |
| 369 while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) { | 368 while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) { |
| 370 Advance(); | 369 Advance(); |
| 371 } | 370 } |
| 372 | 371 |
| 373 return Token::WHITESPACE; | 372 return Token::WHITESPACE; |
| 374 } | 373 } |
| 375 | 374 |
| 376 | 375 |
| 377 void Scanner::TryToParseSourceURLComment() { | 376 void Scanner::TryToParseSourceURLComment() { |
| 378 // Magic comments are of the form: //[#@]\s<name>=\s*<value>\s*.* and this | 377 // Magic comments are of the form: //[#@]\s<name>=\s*<value>\s*.* and this |
| 379 // function will just return if it cannot parse a magic comment. | 378 // function will just return if it cannot parse a magic comment. |
| 380 if (c0_ < 0 || !unicode_cache_->IsWhiteSpace(c0_)) return; | 379 if (c0_ == kEndOfInput || !unicode_cache_->IsWhiteSpace(c0_)) return; |
| 381 Advance(); | 380 Advance(); |
| 382 LiteralBuffer name; | 381 LiteralBuffer name; |
| 383 while (c0_ >= 0 && !unicode_cache_->IsWhiteSpaceOrLineTerminator(c0_) && | 382 while (c0_ != kEndOfInput && |
| 384 c0_ != '=') { | 383 !unicode_cache_->IsWhiteSpaceOrLineTerminator(c0_) && c0_ != '=') { |
| 385 name.AddChar(c0_); | 384 name.AddChar(c0_); |
| 386 Advance(); | 385 Advance(); |
| 387 } | 386 } |
| 388 if (!name.is_one_byte()) return; | 387 if (!name.is_one_byte()) return; |
| 389 Vector<const uint8_t> name_literal = name.one_byte_literal(); | 388 Vector<const uint8_t> name_literal = name.one_byte_literal(); |
| 390 LiteralBuffer* value; | 389 LiteralBuffer* value; |
| 391 if (name_literal == STATIC_CHAR_VECTOR("sourceURL")) { | 390 if (name_literal == STATIC_CHAR_VECTOR("sourceURL")) { |
| 392 value = &source_url_; | 391 value = &source_url_; |
| 393 } else if (name_literal == STATIC_CHAR_VECTOR("sourceMappingURL")) { | 392 } else if (name_literal == STATIC_CHAR_VECTOR("sourceMappingURL")) { |
| 394 value = &source_mapping_url_; | 393 value = &source_mapping_url_; |
| 395 } else { | 394 } else { |
| 396 return; | 395 return; |
| 397 } | 396 } |
| 398 if (c0_ != '=') | 397 if (c0_ != '=') |
| 399 return; | 398 return; |
| 400 Advance(); | 399 Advance(); |
| 401 value->Reset(); | 400 value->Reset(); |
| 402 while (c0_ >= 0 && unicode_cache_->IsWhiteSpace(c0_)) { | 401 while (c0_ != kEndOfInput && unicode_cache_->IsWhiteSpace(c0_)) { |
| 403 Advance(); | 402 Advance(); |
| 404 } | 403 } |
| 405 while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) { | 404 while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) { |
| 406 // Disallowed characters. | 405 // Disallowed characters. |
| 407 if (c0_ == '"' || c0_ == '\'') { | 406 if (c0_ == '"' || c0_ == '\'') { |
| 408 value->Reset(); | 407 value->Reset(); |
| 409 return; | 408 return; |
| 410 } | 409 } |
| 411 if (unicode_cache_->IsWhiteSpace(c0_)) { | 410 if (unicode_cache_->IsWhiteSpace(c0_)) { |
| 412 break; | 411 break; |
| 413 } | 412 } |
| 414 value->AddChar(c0_); | 413 value->AddChar(c0_); |
| 415 Advance(); | 414 Advance(); |
| 416 } | 415 } |
| 417 // Allow whitespace at the end. | 416 // Allow whitespace at the end. |
| 418 while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) { | 417 while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) { |
| 419 if (!unicode_cache_->IsWhiteSpace(c0_)) { | 418 if (!unicode_cache_->IsWhiteSpace(c0_)) { |
| 420 value->Reset(); | 419 value->Reset(); |
| 421 break; | 420 break; |
| 422 } | 421 } |
| 423 Advance(); | 422 Advance(); |
| 424 } | 423 } |
| 425 } | 424 } |
| 426 | 425 |
| 427 | 426 |
| 428 Token::Value Scanner::SkipMultiLineComment() { | 427 Token::Value Scanner::SkipMultiLineComment() { |
| 429 DCHECK(c0_ == '*'); | 428 DCHECK(c0_ == '*'); |
| 430 Advance(); | 429 Advance(); |
| 431 | 430 |
| 432 while (c0_ >= 0) { | 431 while (c0_ != kEndOfInput) { |
| 433 uc32 ch = c0_; | 432 uc32 ch = c0_; |
| 434 Advance(); | 433 Advance(); |
| 435 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(ch)) { | 434 if (c0_ != kEndOfInput && unicode_cache_->IsLineTerminator(ch)) { |
| 436 // Following ECMA-262, section 7.4, a comment containing | 435 // Following ECMA-262, section 7.4, a comment containing |
| 437 // a newline will make the comment count as a line-terminator. | 436 // a newline will make the comment count as a line-terminator. |
| 438 has_multiline_comment_before_next_ = true; | 437 has_multiline_comment_before_next_ = true; |
| 439 } | 438 } |
| 440 // If we have reached the end of the multi-line comment, we | 439 // If we have reached the end of the multi-line comment, we |
| 441 // consume the '/' and insert a whitespace. This way all | 440 // consume the '/' and insert a whitespace. This way all |
| 442 // multi-line comments are treated as whitespace. | 441 // multi-line comments are treated as whitespace. |
| 443 if (ch == '*' && c0_ == '/') { | 442 if (ch == '*' && c0_ == '/') { |
| 444 c0_ = ' '; | 443 c0_ = ' '; |
| 445 return Token::WHITESPACE; | 444 return Token::WHITESPACE; |
| (...skipping 263 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 709 | 708 |
| 710 case '~': | 709 case '~': |
| 711 token = Select(Token::BIT_NOT); | 710 token = Select(Token::BIT_NOT); |
| 712 break; | 711 break; |
| 713 | 712 |
| 714 case '`': | 713 case '`': |
| 715 token = ScanTemplateStart(); | 714 token = ScanTemplateStart(); |
| 716 break; | 715 break; |
| 717 | 716 |
| 718 default: | 717 default: |
| 719 if (c0_ < 0) { | 718 if (c0_ == kEndOfInput) { |
| 720 token = Token::EOS; | 719 token = Token::EOS; |
| 721 } else if (unicode_cache_->IsIdentifierStart(c0_)) { | 720 } else if (unicode_cache_->IsIdentifierStart(c0_)) { |
| 722 token = ScanIdentifierOrKeyword(); | 721 token = ScanIdentifierOrKeyword(); |
| 723 } else if (IsDecimalDigit(c0_)) { | 722 } else if (IsDecimalDigit(c0_)) { |
| 724 token = ScanNumber(false); | 723 token = ScanNumber(false); |
| 725 } else if (SkipWhiteSpace()) { | 724 } else if (SkipWhiteSpace()) { |
| 726 token = Token::WHITESPACE; | 725 token = Token::WHITESPACE; |
| 727 } else { | 726 } else { |
| 728 token = Select(Token::ILLEGAL); | 727 token = Select(Token::ILLEGAL); |
| 729 } | 728 } |
| (...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 801 Scan(); | 800 Scan(); |
| 802 } | 801 } |
| 803 | 802 |
| 804 | 803 |
| 805 template <bool capture_raw, bool in_template_literal> | 804 template <bool capture_raw, bool in_template_literal> |
| 806 bool Scanner::ScanEscape() { | 805 bool Scanner::ScanEscape() { |
| 807 uc32 c = c0_; | 806 uc32 c = c0_; |
| 808 Advance<capture_raw>(); | 807 Advance<capture_raw>(); |
| 809 | 808 |
| 810 // Skip escaped newlines. | 809 // Skip escaped newlines. |
| 811 if (!in_template_literal && c0_ >= 0 && unicode_cache_->IsLineTerminator(c)) { | 810 if (!in_template_literal && c0_ != kEndOfInput && |
| 811 unicode_cache_->IsLineTerminator(c)) { |
| 812 // Allow CR+LF newlines in multiline string literals. | 812 // Allow CR+LF newlines in multiline string literals. |
| 813 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance<capture_raw>(); | 813 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance<capture_raw>(); |
| 814 // Allow LF+CR newlines in multiline string literals. | 814 // Allow LF+CR newlines in multiline string literals. |
| 815 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance<capture_raw>(); | 815 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance<capture_raw>(); |
| 816 return true; | 816 return true; |
| 817 } | 817 } |
| 818 | 818 |
| 819 switch (c) { | 819 switch (c) { |
| 820 case '\'': // fall through | 820 case '\'': // fall through |
| 821 case '"' : // fall through | 821 case '"' : // fall through |
| (...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 887 Token::Value Scanner::ScanString() { | 887 Token::Value Scanner::ScanString() { |
| 888 uc32 quote = c0_; | 888 uc32 quote = c0_; |
| 889 Advance<false, false>(); // consume quote | 889 Advance<false, false>(); // consume quote |
| 890 | 890 |
| 891 LiteralScope literal(this); | 891 LiteralScope literal(this); |
| 892 while (true) { | 892 while (true) { |
| 893 if (c0_ > kMaxAscii) { | 893 if (c0_ > kMaxAscii) { |
| 894 HandleLeadSurrogate(); | 894 HandleLeadSurrogate(); |
| 895 break; | 895 break; |
| 896 } | 896 } |
| 897 if (c0_ < 0 || c0_ == '\n' || c0_ == '\r') return Token::ILLEGAL; | 897 if (c0_ == kEndOfInput || c0_ == '\n' || c0_ == '\r') return Token::ILLEGAL; |
| 898 if (c0_ == quote) { | 898 if (c0_ == quote) { |
| 899 literal.Complete(); | 899 literal.Complete(); |
| 900 Advance<false, false>(); | 900 Advance<false, false>(); |
| 901 return Token::STRING; | 901 return Token::STRING; |
| 902 } | 902 } |
| 903 char c = static_cast<char>(c0_); | 903 char c = static_cast<char>(c0_); |
| 904 if (c == '\\') break; | 904 if (c == '\\') break; |
| 905 Advance<false, false>(); | 905 Advance<false, false>(); |
| 906 AddLiteralChar(c); | 906 AddLiteralChar(c); |
| 907 } | 907 } |
| 908 | 908 |
| 909 while (c0_ != quote && c0_ >= 0 | 909 while (c0_ != quote && c0_ != kEndOfInput && |
| 910 && !unicode_cache_->IsLineTerminator(c0_)) { | 910 !unicode_cache_->IsLineTerminator(c0_)) { |
| 911 uc32 c = c0_; | 911 uc32 c = c0_; |
| 912 Advance(); | 912 Advance(); |
| 913 if (c == '\\') { | 913 if (c == '\\') { |
| 914 if (c0_ < 0 || !ScanEscape<false, false>()) { | 914 if (c0_ == kEndOfInput || !ScanEscape<false, false>()) { |
| 915 return Token::ILLEGAL; | 915 return Token::ILLEGAL; |
| 916 } | 916 } |
| 917 } else { | 917 } else { |
| 918 AddLiteralChar(c); | 918 AddLiteralChar(c); |
| 919 } | 919 } |
| 920 } | 920 } |
| 921 if (c0_ != quote) return Token::ILLEGAL; | 921 if (c0_ != quote) return Token::ILLEGAL; |
| 922 literal.Complete(); | 922 literal.Complete(); |
| 923 | 923 |
| 924 Advance(); // consume quote | 924 Advance(); // consume quote |
| (...skipping 25 matching lines...) Expand all Loading... |
| 950 Advance<capture_raw>(); | 950 Advance<capture_raw>(); |
| 951 if (c == '`') { | 951 if (c == '`') { |
| 952 result = Token::TEMPLATE_TAIL; | 952 result = Token::TEMPLATE_TAIL; |
| 953 ReduceRawLiteralLength(1); | 953 ReduceRawLiteralLength(1); |
| 954 break; | 954 break; |
| 955 } else if (c == '$' && c0_ == '{') { | 955 } else if (c == '$' && c0_ == '{') { |
| 956 Advance<capture_raw>(); // Consume '{' | 956 Advance<capture_raw>(); // Consume '{' |
| 957 ReduceRawLiteralLength(2); | 957 ReduceRawLiteralLength(2); |
| 958 break; | 958 break; |
| 959 } else if (c == '\\') { | 959 } else if (c == '\\') { |
| 960 if (c0_ > 0 && unicode_cache_->IsLineTerminator(c0_)) { | 960 if (c0_ != kEndOfInput && unicode_cache_->IsLineTerminator(c0_)) { |
| 961 // The TV of LineContinuation :: \ LineTerminatorSequence is the empty | 961 // The TV of LineContinuation :: \ LineTerminatorSequence is the empty |
| 962 // code unit sequence. | 962 // code unit sequence. |
| 963 uc32 lastChar = c0_; | 963 uc32 lastChar = c0_; |
| 964 Advance<capture_raw>(); | 964 Advance<capture_raw>(); |
| 965 if (lastChar == '\r') { | 965 if (lastChar == '\r') { |
| 966 ReduceRawLiteralLength(1); // Remove \r | 966 ReduceRawLiteralLength(1); // Remove \r |
| 967 if (c0_ == '\n') { | 967 if (c0_ == '\n') { |
| 968 Advance<capture_raw>(); // Adds \n | 968 Advance<capture_raw>(); // Adds \n |
| 969 } else { | 969 } else { |
| 970 AddRawLiteralChar('\n'); | 970 AddRawLiteralChar('\n'); |
| (...skipping 177 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1148 return Token::ILLEGAL; | 1148 return Token::ILLEGAL; |
| 1149 } | 1149 } |
| 1150 ScanDecimalDigits(); | 1150 ScanDecimalDigits(); |
| 1151 } | 1151 } |
| 1152 | 1152 |
| 1153 // The source character immediately following a numeric literal must | 1153 // The source character immediately following a numeric literal must |
| 1154 // not be an identifier start or a decimal digit; see ECMA-262 | 1154 // not be an identifier start or a decimal digit; see ECMA-262 |
| 1155 // section 7.8.3, page 17 (note that we read only one decimal digit | 1155 // section 7.8.3, page 17 (note that we read only one decimal digit |
| 1156 // if the value is 0). | 1156 // if the value is 0). |
| 1157 if (IsDecimalDigit(c0_) || | 1157 if (IsDecimalDigit(c0_) || |
| 1158 (c0_ >= 0 && unicode_cache_->IsIdentifierStart(c0_))) | 1158 (c0_ != kEndOfInput && unicode_cache_->IsIdentifierStart(c0_))) |
| 1159 return Token::ILLEGAL; | 1159 return Token::ILLEGAL; |
| 1160 | 1160 |
| 1161 literal.Complete(); | 1161 literal.Complete(); |
| 1162 | 1162 |
| 1163 if (kind == DECIMAL_WITH_LEADING_ZERO) | 1163 if (kind == DECIMAL_WITH_LEADING_ZERO) |
| 1164 decimal_with_leading_zero_pos_ = Location(start_pos, source_pos()); | 1164 decimal_with_leading_zero_pos_ = Location(start_pos, source_pos()); |
| 1165 return Token::NUMBER; | 1165 return Token::NUMBER; |
| 1166 } | 1166 } |
| 1167 | 1167 |
| 1168 | 1168 |
| (...skipping 206 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1375 } | 1375 } |
| 1376 AddLiteralChar(c); | 1376 AddLiteralChar(c); |
| 1377 return ScanIdentifierSuffix(&literal, true); | 1377 return ScanIdentifierSuffix(&literal, true); |
| 1378 } else { | 1378 } else { |
| 1379 uc32 first_char = c0_; | 1379 uc32 first_char = c0_; |
| 1380 Advance(); | 1380 Advance(); |
| 1381 AddLiteralChar(first_char); | 1381 AddLiteralChar(first_char); |
| 1382 } | 1382 } |
| 1383 | 1383 |
| 1384 // Scan the rest of the identifier characters. | 1384 // Scan the rest of the identifier characters. |
| 1385 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) { | 1385 while (c0_ != kEndOfInput && unicode_cache_->IsIdentifierPart(c0_)) { |
| 1386 if (c0_ != '\\') { | 1386 if (c0_ != '\\') { |
| 1387 uc32 next_char = c0_; | 1387 uc32 next_char = c0_; |
| 1388 Advance(); | 1388 Advance(); |
| 1389 AddLiteralChar(next_char); | 1389 AddLiteralChar(next_char); |
| 1390 continue; | 1390 continue; |
| 1391 } | 1391 } |
| 1392 // Fallthrough if no longer able to complete keyword. | 1392 // Fallthrough if no longer able to complete keyword. |
| 1393 return ScanIdentifierSuffix(&literal, false); | 1393 return ScanIdentifierSuffix(&literal, false); |
| 1394 } | 1394 } |
| 1395 | 1395 |
| 1396 if (next_.literal_chars->is_one_byte()) { | 1396 if (next_.literal_chars->is_one_byte()) { |
| 1397 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal(); | 1397 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal(); |
| 1398 Token::Value token = | 1398 Token::Value token = |
| 1399 KeywordOrIdentifierToken(chars.start(), chars.length()); | 1399 KeywordOrIdentifierToken(chars.start(), chars.length()); |
| 1400 if (token == Token::IDENTIFIER) literal.Complete(); | 1400 if (token == Token::IDENTIFIER) literal.Complete(); |
| 1401 return token; | 1401 return token; |
| 1402 } | 1402 } |
| 1403 literal.Complete(); | 1403 literal.Complete(); |
| 1404 return Token::IDENTIFIER; | 1404 return Token::IDENTIFIER; |
| 1405 } | 1405 } |
| 1406 | 1406 |
| 1407 | 1407 |
| 1408 Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal, | 1408 Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal, |
| 1409 bool escaped) { | 1409 bool escaped) { |
| 1410 // Scan the rest of the identifier characters. | 1410 // Scan the rest of the identifier characters. |
| 1411 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) { | 1411 while (c0_ != kEndOfInput && unicode_cache_->IsIdentifierPart(c0_)) { |
| 1412 if (c0_ == '\\') { | 1412 if (c0_ == '\\') { |
| 1413 uc32 c = ScanIdentifierUnicodeEscape(); | 1413 uc32 c = ScanIdentifierUnicodeEscape(); |
| 1414 escaped = true; | 1414 escaped = true; |
| 1415 // Only allow legal identifier part characters. | 1415 // Only allow legal identifier part characters. |
| 1416 if (c < 0 || | 1416 if (c < 0 || |
| 1417 c == '\\' || | 1417 c == '\\' || |
| 1418 !unicode_cache_->IsIdentifierPart(c)) { | 1418 !unicode_cache_->IsIdentifierPart(c)) { |
| 1419 return Token::ILLEGAL; | 1419 return Token::ILLEGAL; |
| 1420 } | 1420 } |
| 1421 AddLiteralChar(c); | 1421 AddLiteralChar(c); |
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1458 | 1458 |
| 1459 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, | 1459 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, |
| 1460 // the scanner should pass uninterpreted bodies to the RegExp | 1460 // the scanner should pass uninterpreted bodies to the RegExp |
| 1461 // constructor. | 1461 // constructor. |
| 1462 LiteralScope literal(this); | 1462 LiteralScope literal(this); |
| 1463 if (seen_equal) { | 1463 if (seen_equal) { |
| 1464 AddLiteralChar('='); | 1464 AddLiteralChar('='); |
| 1465 } | 1465 } |
| 1466 | 1466 |
| 1467 while (c0_ != '/' || in_character_class) { | 1467 while (c0_ != '/' || in_character_class) { |
| 1468 if (c0_ < 0 || unicode_cache_->IsLineTerminator(c0_)) return false; | 1468 if (c0_ == kEndOfInput || unicode_cache_->IsLineTerminator(c0_)) |
| 1469 return false; |
| 1469 if (c0_ == '\\') { // Escape sequence. | 1470 if (c0_ == '\\') { // Escape sequence. |
| 1470 AddLiteralCharAdvance(); | 1471 AddLiteralCharAdvance(); |
| 1471 if (c0_ < 0 || unicode_cache_->IsLineTerminator(c0_)) return false; | 1472 if (c0_ == kEndOfInput || unicode_cache_->IsLineTerminator(c0_)) |
| 1473 return false; |
| 1472 AddLiteralCharAdvance(); | 1474 AddLiteralCharAdvance(); |
| 1473 // If the escape allows more characters, i.e., \x??, \u????, or \c?, | 1475 // If the escape allows more characters, i.e., \x??, \u????, or \c?, |
| 1474 // only "safe" characters are allowed (letters, digits, underscore), | 1476 // only "safe" characters are allowed (letters, digits, underscore), |
| 1475 // otherwise the escape isn't valid and the invalid character has | 1477 // otherwise the escape isn't valid and the invalid character has |
| 1476 // its normal meaning. I.e., we can just continue scanning without | 1478 // its normal meaning. I.e., we can just continue scanning without |
| 1477 // worrying whether the following characters are part of the escape | 1479 // worrying whether the following characters are part of the escape |
| 1478 // or not, since any '/', '\\' or '[' is guaranteed to not be part | 1480 // or not, since any '/', '\\' or '[' is guaranteed to not be part |
| 1479 // of the escape sequence. | 1481 // of the escape sequence. |
| 1480 | 1482 |
| 1481 // TODO(896): At some point, parse RegExps more throughly to capture | 1483 // TODO(896): At some point, parse RegExps more throughly to capture |
| (...skipping 10 matching lines...) Expand all Loading... |
| 1492 next_.token = Token::REGEXP_LITERAL; | 1494 next_.token = Token::REGEXP_LITERAL; |
| 1493 return true; | 1495 return true; |
| 1494 } | 1496 } |
| 1495 | 1497 |
| 1496 | 1498 |
| 1497 Maybe<RegExp::Flags> Scanner::ScanRegExpFlags() { | 1499 Maybe<RegExp::Flags> Scanner::ScanRegExpFlags() { |
| 1498 DCHECK(next_.token == Token::REGEXP_LITERAL); | 1500 DCHECK(next_.token == Token::REGEXP_LITERAL); |
| 1499 | 1501 |
| 1500 // Scan regular expression flags. | 1502 // Scan regular expression flags. |
| 1501 int flags = 0; | 1503 int flags = 0; |
| 1502 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) { | 1504 while (c0_ != kEndOfInput && unicode_cache_->IsIdentifierPart(c0_)) { |
| 1503 RegExp::Flags flag = RegExp::kNone; | 1505 RegExp::Flags flag = RegExp::kNone; |
| 1504 switch (c0_) { | 1506 switch (c0_) { |
| 1505 case 'g': | 1507 case 'g': |
| 1506 flag = RegExp::kGlobal; | 1508 flag = RegExp::kGlobal; |
| 1507 break; | 1509 break; |
| 1508 case 'i': | 1510 case 'i': |
| 1509 flag = RegExp::kIgnoreCase; | 1511 flag = RegExp::kIgnoreCase; |
| 1510 break; | 1512 break; |
| 1511 case 'm': | 1513 case 'm': |
| 1512 flag = RegExp::kMultiline; | 1514 flag = RegExp::kMultiline; |
| (...skipping 118 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1631 to->token = from->token; | 1633 to->token = from->token; |
| 1632 to->location = from->location; | 1634 to->location = from->location; |
| 1633 to->literal_chars->CopyFrom(from->literal_chars); | 1635 to->literal_chars->CopyFrom(from->literal_chars); |
| 1634 to->raw_literal_chars->CopyFrom(from->raw_literal_chars); | 1636 to->raw_literal_chars->CopyFrom(from->raw_literal_chars); |
| 1635 } | 1637 } |
| 1636 | 1638 |
| 1637 | 1639 |
| 1638 | 1640 |
| 1639 } // namespace internal | 1641 } // namespace internal |
| 1640 } // namespace v8 | 1642 } // namespace v8 |
| OLD | NEW |