OLD | NEW |
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Features shared by parsing and pre-parsing scanners. | 5 // Features shared by parsing and pre-parsing scanners. |
6 | 6 |
7 #include "src/parsing/scanner.h" | 7 #include "src/parsing/scanner.h" |
8 | 8 |
9 #include <stdint.h> | 9 #include <stdint.h> |
10 | 10 |
11 #include <cmath> | 11 #include <cmath> |
12 | 12 |
13 #include "src/ast/ast-value-factory.h" | 13 #include "src/ast/ast-value-factory.h" |
14 #include "src/char-predicates-inl.h" | 14 #include "src/char-predicates-inl.h" |
15 #include "src/conversions-inl.h" | 15 #include "src/conversions-inl.h" |
16 #include "src/list-inl.h" | 16 #include "src/list-inl.h" |
17 #include "src/parsing/duplicate-finder.h" // For Scanner::FindSymbol | 17 #include "src/parsing/duplicate-finder.h" // For Scanner::FindSymbol |
18 | 18 |
19 namespace v8 { | 19 namespace v8 { |
20 namespace internal { | 20 namespace internal { |
21 | 21 |
| 22 const size_t Utf16CharacterStream::kNoBookmark = |
| 23 std::numeric_limits<size_t>::max(); |
| 24 |
22 Handle<String> Scanner::LiteralBuffer::Internalize(Isolate* isolate) const { | 25 Handle<String> Scanner::LiteralBuffer::Internalize(Isolate* isolate) const { |
23 if (is_one_byte()) { | 26 if (is_one_byte()) { |
24 return isolate->factory()->InternalizeOneByteString(one_byte_literal()); | 27 return isolate->factory()->InternalizeOneByteString(one_byte_literal()); |
25 } | 28 } |
26 return isolate->factory()->InternalizeTwoByteString(two_byte_literal()); | 29 return isolate->factory()->InternalizeTwoByteString(two_byte_literal()); |
27 } | 30 } |
28 | 31 |
29 | 32 |
30 // Default implementation for streams that do not support bookmarks. | |
31 bool Utf16CharacterStream::SetBookmark() { return false; } | |
32 void Utf16CharacterStream::ResetToBookmark() { UNREACHABLE(); } | |
33 | |
34 | 33 |
35 // ---------------------------------------------------------------------------- | 34 // ---------------------------------------------------------------------------- |
36 // Scanner | 35 // Scanner |
37 | 36 |
38 Scanner::Scanner(UnicodeCache* unicode_cache) | 37 Scanner::Scanner(UnicodeCache* unicode_cache) |
39 : unicode_cache_(unicode_cache), | 38 : unicode_cache_(unicode_cache), |
40 bookmark_c0_(kNoBookmark), | 39 bookmark_c0_(kNoBookmark), |
41 octal_pos_(Location::invalid()), | 40 octal_pos_(Location::invalid()), |
42 decimal_with_leading_zero_pos_(Location::invalid()), | 41 decimal_with_leading_zero_pos_(Location::invalid()), |
43 found_html_comment_(false) { | 42 found_html_comment_(false) { |
(...skipping 261 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
305 return c == 0xFFFE; | 304 return c == 0xFFFE; |
306 } | 305 } |
307 | 306 |
308 | 307 |
309 bool Scanner::SkipWhiteSpace() { | 308 bool Scanner::SkipWhiteSpace() { |
310 int start_position = source_pos(); | 309 int start_position = source_pos(); |
311 | 310 |
312 while (true) { | 311 while (true) { |
313 while (true) { | 312 while (true) { |
314 // The unicode cache accepts unsigned inputs. | 313 // The unicode cache accepts unsigned inputs. |
315 if (c0_ < 0) break; | 314 if (c0_ == kEndOfInput) break; |
316 // Advance as long as character is a WhiteSpace or LineTerminator. | 315 // Advance as long as character is a WhiteSpace or LineTerminator. |
317 // Remember if the latter is the case. | 316 // Remember if the latter is the case. |
318 if (unicode_cache_->IsLineTerminator(c0_)) { | 317 if (unicode_cache_->IsLineTerminator(c0_)) { |
319 has_line_terminator_before_next_ = true; | 318 has_line_terminator_before_next_ = true; |
320 } else if (!unicode_cache_->IsWhiteSpace(c0_) && | 319 } else if (!unicode_cache_->IsWhiteSpace(c0_) && |
321 !IsLittleEndianByteOrderMark(c0_)) { | 320 !IsLittleEndianByteOrderMark(c0_)) { |
322 break; | 321 break; |
323 } | 322 } |
324 Advance(); | 323 Advance(); |
325 } | 324 } |
(...skipping 23 matching lines...) Expand all Loading... |
349 | 348 |
350 | 349 |
351 Token::Value Scanner::SkipSingleLineComment() { | 350 Token::Value Scanner::SkipSingleLineComment() { |
352 Advance(); | 351 Advance(); |
353 | 352 |
354 // The line terminator at the end of the line is not considered | 353 // The line terminator at the end of the line is not considered |
355 // to be part of the single-line comment; it is recognized | 354 // to be part of the single-line comment; it is recognized |
356 // separately by the lexical grammar and becomes part of the | 355 // separately by the lexical grammar and becomes part of the |
357 // stream of input elements for the syntactic grammar (see | 356 // stream of input elements for the syntactic grammar (see |
358 // ECMA-262, section 7.4). | 357 // ECMA-262, section 7.4). |
359 while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) { | 358 while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) { |
360 Advance(); | 359 Advance(); |
361 } | 360 } |
362 | 361 |
363 return Token::WHITESPACE; | 362 return Token::WHITESPACE; |
364 } | 363 } |
365 | 364 |
366 | 365 |
367 Token::Value Scanner::SkipSourceURLComment() { | 366 Token::Value Scanner::SkipSourceURLComment() { |
368 TryToParseSourceURLComment(); | 367 TryToParseSourceURLComment(); |
369 while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) { | 368 while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) { |
370 Advance(); | 369 Advance(); |
371 } | 370 } |
372 | 371 |
373 return Token::WHITESPACE; | 372 return Token::WHITESPACE; |
374 } | 373 } |
375 | 374 |
376 | 375 |
377 void Scanner::TryToParseSourceURLComment() { | 376 void Scanner::TryToParseSourceURLComment() { |
378 // Magic comments are of the form: //[#@]\s<name>=\s*<value>\s*.* and this | 377 // Magic comments are of the form: //[#@]\s<name>=\s*<value>\s*.* and this |
379 // function will just return if it cannot parse a magic comment. | 378 // function will just return if it cannot parse a magic comment. |
380 if (c0_ < 0 || !unicode_cache_->IsWhiteSpace(c0_)) return; | 379 if (c0_ == kEndOfInput || !unicode_cache_->IsWhiteSpace(c0_)) return; |
381 Advance(); | 380 Advance(); |
382 LiteralBuffer name; | 381 LiteralBuffer name; |
383 while (c0_ >= 0 && !unicode_cache_->IsWhiteSpaceOrLineTerminator(c0_) && | 382 while (c0_ != kEndOfInput && |
384 c0_ != '=') { | 383 !unicode_cache_->IsWhiteSpaceOrLineTerminator(c0_) && c0_ != '=') { |
385 name.AddChar(c0_); | 384 name.AddChar(c0_); |
386 Advance(); | 385 Advance(); |
387 } | 386 } |
388 if (!name.is_one_byte()) return; | 387 if (!name.is_one_byte()) return; |
389 Vector<const uint8_t> name_literal = name.one_byte_literal(); | 388 Vector<const uint8_t> name_literal = name.one_byte_literal(); |
390 LiteralBuffer* value; | 389 LiteralBuffer* value; |
391 if (name_literal == STATIC_CHAR_VECTOR("sourceURL")) { | 390 if (name_literal == STATIC_CHAR_VECTOR("sourceURL")) { |
392 value = &source_url_; | 391 value = &source_url_; |
393 } else if (name_literal == STATIC_CHAR_VECTOR("sourceMappingURL")) { | 392 } else if (name_literal == STATIC_CHAR_VECTOR("sourceMappingURL")) { |
394 value = &source_mapping_url_; | 393 value = &source_mapping_url_; |
395 } else { | 394 } else { |
396 return; | 395 return; |
397 } | 396 } |
398 if (c0_ != '=') | 397 if (c0_ != '=') |
399 return; | 398 return; |
400 Advance(); | 399 Advance(); |
401 value->Reset(); | 400 value->Reset(); |
402 while (c0_ >= 0 && unicode_cache_->IsWhiteSpace(c0_)) { | 401 while (c0_ != kEndOfInput && unicode_cache_->IsWhiteSpace(c0_)) { |
403 Advance(); | 402 Advance(); |
404 } | 403 } |
405 while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) { | 404 while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) { |
406 // Disallowed characters. | 405 // Disallowed characters. |
407 if (c0_ == '"' || c0_ == '\'') { | 406 if (c0_ == '"' || c0_ == '\'') { |
408 value->Reset(); | 407 value->Reset(); |
409 return; | 408 return; |
410 } | 409 } |
411 if (unicode_cache_->IsWhiteSpace(c0_)) { | 410 if (unicode_cache_->IsWhiteSpace(c0_)) { |
412 break; | 411 break; |
413 } | 412 } |
414 value->AddChar(c0_); | 413 value->AddChar(c0_); |
415 Advance(); | 414 Advance(); |
416 } | 415 } |
417 // Allow whitespace at the end. | 416 // Allow whitespace at the end. |
418 while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) { | 417 while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) { |
419 if (!unicode_cache_->IsWhiteSpace(c0_)) { | 418 if (!unicode_cache_->IsWhiteSpace(c0_)) { |
420 value->Reset(); | 419 value->Reset(); |
421 break; | 420 break; |
422 } | 421 } |
423 Advance(); | 422 Advance(); |
424 } | 423 } |
425 } | 424 } |
426 | 425 |
427 | 426 |
428 Token::Value Scanner::SkipMultiLineComment() { | 427 Token::Value Scanner::SkipMultiLineComment() { |
429 DCHECK(c0_ == '*'); | 428 DCHECK(c0_ == '*'); |
430 Advance(); | 429 Advance(); |
431 | 430 |
432 while (c0_ >= 0) { | 431 while (c0_ != kEndOfInput) { |
433 uc32 ch = c0_; | 432 uc32 ch = c0_; |
434 Advance(); | 433 Advance(); |
435 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(ch)) { | 434 if (c0_ != kEndOfInput && unicode_cache_->IsLineTerminator(ch)) { |
436 // Following ECMA-262, section 7.4, a comment containing | 435 // Following ECMA-262, section 7.4, a comment containing |
437 // a newline will make the comment count as a line-terminator. | 436 // a newline will make the comment count as a line-terminator. |
438 has_multiline_comment_before_next_ = true; | 437 has_multiline_comment_before_next_ = true; |
439 } | 438 } |
440 // If we have reached the end of the multi-line comment, we | 439 // If we have reached the end of the multi-line comment, we |
441 // consume the '/' and insert a whitespace. This way all | 440 // consume the '/' and insert a whitespace. This way all |
442 // multi-line comments are treated as whitespace. | 441 // multi-line comments are treated as whitespace. |
443 if (ch == '*' && c0_ == '/') { | 442 if (ch == '*' && c0_ == '/') { |
444 c0_ = ' '; | 443 c0_ = ' '; |
445 return Token::WHITESPACE; | 444 return Token::WHITESPACE; |
(...skipping 263 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
709 | 708 |
710 case '~': | 709 case '~': |
711 token = Select(Token::BIT_NOT); | 710 token = Select(Token::BIT_NOT); |
712 break; | 711 break; |
713 | 712 |
714 case '`': | 713 case '`': |
715 token = ScanTemplateStart(); | 714 token = ScanTemplateStart(); |
716 break; | 715 break; |
717 | 716 |
718 default: | 717 default: |
719 if (c0_ < 0) { | 718 if (c0_ == kEndOfInput) { |
720 token = Token::EOS; | 719 token = Token::EOS; |
721 } else if (unicode_cache_->IsIdentifierStart(c0_)) { | 720 } else if (unicode_cache_->IsIdentifierStart(c0_)) { |
722 token = ScanIdentifierOrKeyword(); | 721 token = ScanIdentifierOrKeyword(); |
723 } else if (IsDecimalDigit(c0_)) { | 722 } else if (IsDecimalDigit(c0_)) { |
724 token = ScanNumber(false); | 723 token = ScanNumber(false); |
725 } else if (SkipWhiteSpace()) { | 724 } else if (SkipWhiteSpace()) { |
726 token = Token::WHITESPACE; | 725 token = Token::WHITESPACE; |
727 } else { | 726 } else { |
728 token = Select(Token::ILLEGAL); | 727 token = Select(Token::ILLEGAL); |
729 } | 728 } |
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
801 Scan(); | 800 Scan(); |
802 } | 801 } |
803 | 802 |
804 | 803 |
805 template <bool capture_raw, bool in_template_literal> | 804 template <bool capture_raw, bool in_template_literal> |
806 bool Scanner::ScanEscape() { | 805 bool Scanner::ScanEscape() { |
807 uc32 c = c0_; | 806 uc32 c = c0_; |
808 Advance<capture_raw>(); | 807 Advance<capture_raw>(); |
809 | 808 |
810 // Skip escaped newlines. | 809 // Skip escaped newlines. |
811 if (!in_template_literal && c0_ >= 0 && unicode_cache_->IsLineTerminator(c)) { | 810 if (!in_template_literal && c0_ != kEndOfInput && |
| 811 unicode_cache_->IsLineTerminator(c)) { |
812 // Allow CR+LF newlines in multiline string literals. | 812 // Allow CR+LF newlines in multiline string literals. |
813 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance<capture_raw>(); | 813 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance<capture_raw>(); |
814 // Allow LF+CR newlines in multiline string literals. | 814 // Allow LF+CR newlines in multiline string literals. |
815 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance<capture_raw>(); | 815 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance<capture_raw>(); |
816 return true; | 816 return true; |
817 } | 817 } |
818 | 818 |
819 switch (c) { | 819 switch (c) { |
820 case '\'': // fall through | 820 case '\'': // fall through |
821 case '"' : // fall through | 821 case '"' : // fall through |
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
887 Token::Value Scanner::ScanString() { | 887 Token::Value Scanner::ScanString() { |
888 uc32 quote = c0_; | 888 uc32 quote = c0_; |
889 Advance<false, false>(); // consume quote | 889 Advance<false, false>(); // consume quote |
890 | 890 |
891 LiteralScope literal(this); | 891 LiteralScope literal(this); |
892 while (true) { | 892 while (true) { |
893 if (c0_ > kMaxAscii) { | 893 if (c0_ > kMaxAscii) { |
894 HandleLeadSurrogate(); | 894 HandleLeadSurrogate(); |
895 break; | 895 break; |
896 } | 896 } |
897 if (c0_ < 0 || c0_ == '\n' || c0_ == '\r') return Token::ILLEGAL; | 897 if (c0_ == kEndOfInput || c0_ == '\n' || c0_ == '\r') return Token::ILLEGAL; |
898 if (c0_ == quote) { | 898 if (c0_ == quote) { |
899 literal.Complete(); | 899 literal.Complete(); |
900 Advance<false, false>(); | 900 Advance<false, false>(); |
901 return Token::STRING; | 901 return Token::STRING; |
902 } | 902 } |
903 char c = static_cast<char>(c0_); | 903 char c = static_cast<char>(c0_); |
904 if (c == '\\') break; | 904 if (c == '\\') break; |
905 Advance<false, false>(); | 905 Advance<false, false>(); |
906 AddLiteralChar(c); | 906 AddLiteralChar(c); |
907 } | 907 } |
908 | 908 |
909 while (c0_ != quote && c0_ >= 0 | 909 while (c0_ != quote && c0_ != kEndOfInput && |
910 && !unicode_cache_->IsLineTerminator(c0_)) { | 910 !unicode_cache_->IsLineTerminator(c0_)) { |
911 uc32 c = c0_; | 911 uc32 c = c0_; |
912 Advance(); | 912 Advance(); |
913 if (c == '\\') { | 913 if (c == '\\') { |
914 if (c0_ < 0 || !ScanEscape<false, false>()) { | 914 if (c0_ == kEndOfInput || !ScanEscape<false, false>()) { |
915 return Token::ILLEGAL; | 915 return Token::ILLEGAL; |
916 } | 916 } |
917 } else { | 917 } else { |
918 AddLiteralChar(c); | 918 AddLiteralChar(c); |
919 } | 919 } |
920 } | 920 } |
921 if (c0_ != quote) return Token::ILLEGAL; | 921 if (c0_ != quote) return Token::ILLEGAL; |
922 literal.Complete(); | 922 literal.Complete(); |
923 | 923 |
924 Advance(); // consume quote | 924 Advance(); // consume quote |
(...skipping 25 matching lines...) Expand all Loading... |
950 Advance<capture_raw>(); | 950 Advance<capture_raw>(); |
951 if (c == '`') { | 951 if (c == '`') { |
952 result = Token::TEMPLATE_TAIL; | 952 result = Token::TEMPLATE_TAIL; |
953 ReduceRawLiteralLength(1); | 953 ReduceRawLiteralLength(1); |
954 break; | 954 break; |
955 } else if (c == '$' && c0_ == '{') { | 955 } else if (c == '$' && c0_ == '{') { |
956 Advance<capture_raw>(); // Consume '{' | 956 Advance<capture_raw>(); // Consume '{' |
957 ReduceRawLiteralLength(2); | 957 ReduceRawLiteralLength(2); |
958 break; | 958 break; |
959 } else if (c == '\\') { | 959 } else if (c == '\\') { |
960 if (c0_ > 0 && unicode_cache_->IsLineTerminator(c0_)) { | 960 if (c0_ != kEndOfInput && unicode_cache_->IsLineTerminator(c0_)) { |
961 // The TV of LineContinuation :: \ LineTerminatorSequence is the empty | 961 // The TV of LineContinuation :: \ LineTerminatorSequence is the empty |
962 // code unit sequence. | 962 // code unit sequence. |
963 uc32 lastChar = c0_; | 963 uc32 lastChar = c0_; |
964 Advance<capture_raw>(); | 964 Advance<capture_raw>(); |
965 if (lastChar == '\r') { | 965 if (lastChar == '\r') { |
966 ReduceRawLiteralLength(1); // Remove \r | 966 ReduceRawLiteralLength(1); // Remove \r |
967 if (c0_ == '\n') { | 967 if (c0_ == '\n') { |
968 Advance<capture_raw>(); // Adds \n | 968 Advance<capture_raw>(); // Adds \n |
969 } else { | 969 } else { |
970 AddRawLiteralChar('\n'); | 970 AddRawLiteralChar('\n'); |
(...skipping 177 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1148 return Token::ILLEGAL; | 1148 return Token::ILLEGAL; |
1149 } | 1149 } |
1150 ScanDecimalDigits(); | 1150 ScanDecimalDigits(); |
1151 } | 1151 } |
1152 | 1152 |
1153 // The source character immediately following a numeric literal must | 1153 // The source character immediately following a numeric literal must |
1154 // not be an identifier start or a decimal digit; see ECMA-262 | 1154 // not be an identifier start or a decimal digit; see ECMA-262 |
1155 // section 7.8.3, page 17 (note that we read only one decimal digit | 1155 // section 7.8.3, page 17 (note that we read only one decimal digit |
1156 // if the value is 0). | 1156 // if the value is 0). |
1157 if (IsDecimalDigit(c0_) || | 1157 if (IsDecimalDigit(c0_) || |
1158 (c0_ >= 0 && unicode_cache_->IsIdentifierStart(c0_))) | 1158 (c0_ != kEndOfInput && unicode_cache_->IsIdentifierStart(c0_))) |
1159 return Token::ILLEGAL; | 1159 return Token::ILLEGAL; |
1160 | 1160 |
1161 literal.Complete(); | 1161 literal.Complete(); |
1162 | 1162 |
1163 if (kind == DECIMAL_WITH_LEADING_ZERO) | 1163 if (kind == DECIMAL_WITH_LEADING_ZERO) |
1164 decimal_with_leading_zero_pos_ = Location(start_pos, source_pos()); | 1164 decimal_with_leading_zero_pos_ = Location(start_pos, source_pos()); |
1165 return Token::NUMBER; | 1165 return Token::NUMBER; |
1166 } | 1166 } |
1167 | 1167 |
1168 | 1168 |
(...skipping 206 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1375 } | 1375 } |
1376 AddLiteralChar(c); | 1376 AddLiteralChar(c); |
1377 return ScanIdentifierSuffix(&literal, true); | 1377 return ScanIdentifierSuffix(&literal, true); |
1378 } else { | 1378 } else { |
1379 uc32 first_char = c0_; | 1379 uc32 first_char = c0_; |
1380 Advance(); | 1380 Advance(); |
1381 AddLiteralChar(first_char); | 1381 AddLiteralChar(first_char); |
1382 } | 1382 } |
1383 | 1383 |
1384 // Scan the rest of the identifier characters. | 1384 // Scan the rest of the identifier characters. |
1385 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) { | 1385 while (c0_ != kEndOfInput && unicode_cache_->IsIdentifierPart(c0_)) { |
1386 if (c0_ != '\\') { | 1386 if (c0_ != '\\') { |
1387 uc32 next_char = c0_; | 1387 uc32 next_char = c0_; |
1388 Advance(); | 1388 Advance(); |
1389 AddLiteralChar(next_char); | 1389 AddLiteralChar(next_char); |
1390 continue; | 1390 continue; |
1391 } | 1391 } |
1392 // Fallthrough if no longer able to complete keyword. | 1392 // Fallthrough if no longer able to complete keyword. |
1393 return ScanIdentifierSuffix(&literal, false); | 1393 return ScanIdentifierSuffix(&literal, false); |
1394 } | 1394 } |
1395 | 1395 |
1396 if (next_.literal_chars->is_one_byte()) { | 1396 if (next_.literal_chars->is_one_byte()) { |
1397 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal(); | 1397 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal(); |
1398 Token::Value token = | 1398 Token::Value token = |
1399 KeywordOrIdentifierToken(chars.start(), chars.length()); | 1399 KeywordOrIdentifierToken(chars.start(), chars.length()); |
1400 if (token == Token::IDENTIFIER) literal.Complete(); | 1400 if (token == Token::IDENTIFIER) literal.Complete(); |
1401 return token; | 1401 return token; |
1402 } | 1402 } |
1403 literal.Complete(); | 1403 literal.Complete(); |
1404 return Token::IDENTIFIER; | 1404 return Token::IDENTIFIER; |
1405 } | 1405 } |
1406 | 1406 |
1407 | 1407 |
1408 Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal, | 1408 Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal, |
1409 bool escaped) { | 1409 bool escaped) { |
1410 // Scan the rest of the identifier characters. | 1410 // Scan the rest of the identifier characters. |
1411 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) { | 1411 while (c0_ != kEndOfInput && unicode_cache_->IsIdentifierPart(c0_)) { |
1412 if (c0_ == '\\') { | 1412 if (c0_ == '\\') { |
1413 uc32 c = ScanIdentifierUnicodeEscape(); | 1413 uc32 c = ScanIdentifierUnicodeEscape(); |
1414 escaped = true; | 1414 escaped = true; |
1415 // Only allow legal identifier part characters. | 1415 // Only allow legal identifier part characters. |
1416 if (c < 0 || | 1416 if (c < 0 || |
1417 c == '\\' || | 1417 c == '\\' || |
1418 !unicode_cache_->IsIdentifierPart(c)) { | 1418 !unicode_cache_->IsIdentifierPart(c)) { |
1419 return Token::ILLEGAL; | 1419 return Token::ILLEGAL; |
1420 } | 1420 } |
1421 AddLiteralChar(c); | 1421 AddLiteralChar(c); |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1458 | 1458 |
1459 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, | 1459 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, |
1460 // the scanner should pass uninterpreted bodies to the RegExp | 1460 // the scanner should pass uninterpreted bodies to the RegExp |
1461 // constructor. | 1461 // constructor. |
1462 LiteralScope literal(this); | 1462 LiteralScope literal(this); |
1463 if (seen_equal) { | 1463 if (seen_equal) { |
1464 AddLiteralChar('='); | 1464 AddLiteralChar('='); |
1465 } | 1465 } |
1466 | 1466 |
1467 while (c0_ != '/' || in_character_class) { | 1467 while (c0_ != '/' || in_character_class) { |
1468 if (c0_ < 0 || unicode_cache_->IsLineTerminator(c0_)) return false; | 1468 if (c0_ == kEndOfInput || unicode_cache_->IsLineTerminator(c0_)) |
| 1469 return false; |
1469 if (c0_ == '\\') { // Escape sequence. | 1470 if (c0_ == '\\') { // Escape sequence. |
1470 AddLiteralCharAdvance(); | 1471 AddLiteralCharAdvance(); |
1471 if (c0_ < 0 || unicode_cache_->IsLineTerminator(c0_)) return false; | 1472 if (c0_ == kEndOfInput || unicode_cache_->IsLineTerminator(c0_)) |
| 1473 return false; |
1472 AddLiteralCharAdvance(); | 1474 AddLiteralCharAdvance(); |
1473 // If the escape allows more characters, i.e., \x??, \u????, or \c?, | 1475 // If the escape allows more characters, i.e., \x??, \u????, or \c?, |
1474 // only "safe" characters are allowed (letters, digits, underscore), | 1476 // only "safe" characters are allowed (letters, digits, underscore), |
1475 // otherwise the escape isn't valid and the invalid character has | 1477 // otherwise the escape isn't valid and the invalid character has |
1476 // its normal meaning. I.e., we can just continue scanning without | 1478 // its normal meaning. I.e., we can just continue scanning without |
1477 // worrying whether the following characters are part of the escape | 1479 // worrying whether the following characters are part of the escape |
1478 // or not, since any '/', '\\' or '[' is guaranteed to not be part | 1480 // or not, since any '/', '\\' or '[' is guaranteed to not be part |
1479 // of the escape sequence. | 1481 // of the escape sequence. |
1480 | 1482 |
1481 // TODO(896): At some point, parse RegExps more throughly to capture | 1483 // TODO(896): At some point, parse RegExps more throughly to capture |
(...skipping 10 matching lines...) Expand all Loading... |
1492 next_.token = Token::REGEXP_LITERAL; | 1494 next_.token = Token::REGEXP_LITERAL; |
1493 return true; | 1495 return true; |
1494 } | 1496 } |
1495 | 1497 |
1496 | 1498 |
1497 Maybe<RegExp::Flags> Scanner::ScanRegExpFlags() { | 1499 Maybe<RegExp::Flags> Scanner::ScanRegExpFlags() { |
1498 DCHECK(next_.token == Token::REGEXP_LITERAL); | 1500 DCHECK(next_.token == Token::REGEXP_LITERAL); |
1499 | 1501 |
1500 // Scan regular expression flags. | 1502 // Scan regular expression flags. |
1501 int flags = 0; | 1503 int flags = 0; |
1502 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) { | 1504 while (c0_ != kEndOfInput && unicode_cache_->IsIdentifierPart(c0_)) { |
1503 RegExp::Flags flag = RegExp::kNone; | 1505 RegExp::Flags flag = RegExp::kNone; |
1504 switch (c0_) { | 1506 switch (c0_) { |
1505 case 'g': | 1507 case 'g': |
1506 flag = RegExp::kGlobal; | 1508 flag = RegExp::kGlobal; |
1507 break; | 1509 break; |
1508 case 'i': | 1510 case 'i': |
1509 flag = RegExp::kIgnoreCase; | 1511 flag = RegExp::kIgnoreCase; |
1510 break; | 1512 break; |
1511 case 'm': | 1513 case 'm': |
1512 flag = RegExp::kMultiline; | 1514 flag = RegExp::kMultiline; |
(...skipping 118 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1631 to->token = from->token; | 1633 to->token = from->token; |
1632 to->location = from->location; | 1634 to->location = from->location; |
1633 to->literal_chars->CopyFrom(from->literal_chars); | 1635 to->literal_chars->CopyFrom(from->literal_chars); |
1634 to->raw_literal_chars->CopyFrom(from->raw_literal_chars); | 1636 to->raw_literal_chars->CopyFrom(from->raw_literal_chars); |
1635 } | 1637 } |
1636 | 1638 |
1637 | 1639 |
1638 | 1640 |
1639 } // namespace internal | 1641 } // namespace internal |
1640 } // namespace v8 | 1642 } // namespace v8 |
OLD | NEW |