OLD | NEW |
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Features shared by parsing and pre-parsing scanners. | 5 // Features shared by parsing and pre-parsing scanners. |
6 | 6 |
7 #include "src/parsing/scanner.h" | 7 #include "src/parsing/scanner.h" |
8 | 8 |
9 #include <stdint.h> | 9 #include <stdint.h> |
10 | 10 |
11 #include <cmath> | 11 #include <cmath> |
12 | 12 |
13 #include "src/ast/ast-value-factory.h" | 13 #include "src/ast/ast-value-factory.h" |
14 #include "src/char-predicates-inl.h" | 14 #include "src/char-predicates-inl.h" |
15 #include "src/conversions-inl.h" | 15 #include "src/conversions-inl.h" |
16 #include "src/list-inl.h" | 16 #include "src/list-inl.h" |
17 #include "src/parsing/duplicate-finder.h" // For Scanner::FindSymbol | 17 #include "src/parsing/duplicate-finder.h" // For Scanner::FindSymbol |
18 | 18 |
19 namespace v8 { | 19 namespace v8 { |
20 namespace internal { | 20 namespace internal { |
21 | 21 |
22 Handle<String> Scanner::LiteralBuffer::Internalize(Isolate* isolate) const { | 22 Handle<String> Scanner::LiteralBuffer::Internalize(Isolate* isolate) const { |
23 if (is_one_byte()) { | 23 if (is_one_byte()) { |
24 return isolate->factory()->InternalizeOneByteString(one_byte_literal()); | 24 return isolate->factory()->InternalizeOneByteString(one_byte_literal()); |
25 } | 25 } |
26 return isolate->factory()->InternalizeTwoByteString(two_byte_literal()); | 26 return isolate->factory()->InternalizeTwoByteString(two_byte_literal()); |
27 } | 27 } |
28 | 28 |
29 | 29 |
30 // Default implementation for streams that do not support bookmarks. | |
31 bool Utf16CharacterStream::SetBookmark() { return false; } | |
32 void Utf16CharacterStream::ResetToBookmark() { UNREACHABLE(); } | |
33 | 30 |
34 | 31 |
35 // ---------------------------------------------------------------------------- | 32 // ---------------------------------------------------------------------------- |
36 // Scanner | 33 // Scanner |
37 | 34 |
38 Scanner::Scanner(UnicodeCache* unicode_cache) | 35 Scanner::Scanner(UnicodeCache* unicode_cache) |
39 : unicode_cache_(unicode_cache), | 36 : unicode_cache_(unicode_cache), |
40 bookmark_c0_(kNoBookmark), | 37 bookmark_c0_(kNoBookmark), |
41 octal_pos_(Location::invalid()), | 38 octal_pos_(Location::invalid()), |
42 decimal_with_leading_zero_pos_(Location::invalid()), | 39 decimal_with_leading_zero_pos_(Location::invalid()), |
(...skipping 262 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
305 return c == 0xFFFE; | 302 return c == 0xFFFE; |
306 } | 303 } |
307 | 304 |
308 | 305 |
309 bool Scanner::SkipWhiteSpace() { | 306 bool Scanner::SkipWhiteSpace() { |
310 int start_position = source_pos(); | 307 int start_position = source_pos(); |
311 | 308 |
312 while (true) { | 309 while (true) { |
313 while (true) { | 310 while (true) { |
314 // The unicode cache accepts unsigned inputs. | 311 // The unicode cache accepts unsigned inputs. |
315 if (c0_ < 0) break; | 312 if (c0_ == kEndOfInput) break; |
316 // Advance as long as character is a WhiteSpace or LineTerminator. | 313 // Advance as long as character is a WhiteSpace or LineTerminator. |
317 // Remember if the latter is the case. | 314 // Remember if the latter is the case. |
318 if (unicode_cache_->IsLineTerminator(c0_)) { | 315 if (unicode_cache_->IsLineTerminator(c0_)) { |
319 has_line_terminator_before_next_ = true; | 316 has_line_terminator_before_next_ = true; |
320 } else if (!unicode_cache_->IsWhiteSpace(c0_) && | 317 } else if (!unicode_cache_->IsWhiteSpace(c0_) && |
321 !IsLittleEndianByteOrderMark(c0_)) { | 318 !IsLittleEndianByteOrderMark(c0_)) { |
322 break; | 319 break; |
323 } | 320 } |
324 Advance(); | 321 Advance(); |
325 } | 322 } |
(...skipping 23 matching lines...) Expand all Loading... |
349 | 346 |
350 | 347 |
351 Token::Value Scanner::SkipSingleLineComment() { | 348 Token::Value Scanner::SkipSingleLineComment() { |
352 Advance(); | 349 Advance(); |
353 | 350 |
354 // The line terminator at the end of the line is not considered | 351 // The line terminator at the end of the line is not considered |
355 // to be part of the single-line comment; it is recognized | 352 // to be part of the single-line comment; it is recognized |
356 // separately by the lexical grammar and becomes part of the | 353 // separately by the lexical grammar and becomes part of the |
357 // stream of input elements for the syntactic grammar (see | 354 // stream of input elements for the syntactic grammar (see |
358 // ECMA-262, section 7.4). | 355 // ECMA-262, section 7.4). |
359 while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) { | 356 while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) { |
360 Advance(); | 357 Advance(); |
361 } | 358 } |
362 | 359 |
363 return Token::WHITESPACE; | 360 return Token::WHITESPACE; |
364 } | 361 } |
365 | 362 |
366 | 363 |
367 Token::Value Scanner::SkipSourceURLComment() { | 364 Token::Value Scanner::SkipSourceURLComment() { |
368 TryToParseSourceURLComment(); | 365 TryToParseSourceURLComment(); |
369 while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) { | 366 while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) { |
370 Advance(); | 367 Advance(); |
371 } | 368 } |
372 | 369 |
373 return Token::WHITESPACE; | 370 return Token::WHITESPACE; |
374 } | 371 } |
375 | 372 |
376 | 373 |
377 void Scanner::TryToParseSourceURLComment() { | 374 void Scanner::TryToParseSourceURLComment() { |
378 // Magic comments are of the form: //[#@]\s<name>=\s*<value>\s*.* and this | 375 // Magic comments are of the form: //[#@]\s<name>=\s*<value>\s*.* and this |
379 // function will just return if it cannot parse a magic comment. | 376 // function will just return if it cannot parse a magic comment. |
380 if (c0_ < 0 || !unicode_cache_->IsWhiteSpace(c0_)) return; | 377 if (c0_ == kEndOfInput || !unicode_cache_->IsWhiteSpace(c0_)) return; |
381 Advance(); | 378 Advance(); |
382 LiteralBuffer name; | 379 LiteralBuffer name; |
383 while (c0_ >= 0 && !unicode_cache_->IsWhiteSpaceOrLineTerminator(c0_) && | 380 while (c0_ != kEndOfInput && |
384 c0_ != '=') { | 381 !unicode_cache_->IsWhiteSpaceOrLineTerminator(c0_) && c0_ != '=') { |
385 name.AddChar(c0_); | 382 name.AddChar(c0_); |
386 Advance(); | 383 Advance(); |
387 } | 384 } |
388 if (!name.is_one_byte()) return; | 385 if (!name.is_one_byte()) return; |
389 Vector<const uint8_t> name_literal = name.one_byte_literal(); | 386 Vector<const uint8_t> name_literal = name.one_byte_literal(); |
390 LiteralBuffer* value; | 387 LiteralBuffer* value; |
391 if (name_literal == STATIC_CHAR_VECTOR("sourceURL")) { | 388 if (name_literal == STATIC_CHAR_VECTOR("sourceURL")) { |
392 value = &source_url_; | 389 value = &source_url_; |
393 } else if (name_literal == STATIC_CHAR_VECTOR("sourceMappingURL")) { | 390 } else if (name_literal == STATIC_CHAR_VECTOR("sourceMappingURL")) { |
394 value = &source_mapping_url_; | 391 value = &source_mapping_url_; |
395 } else { | 392 } else { |
396 return; | 393 return; |
397 } | 394 } |
398 if (c0_ != '=') | 395 if (c0_ != '=') |
399 return; | 396 return; |
400 Advance(); | 397 Advance(); |
401 value->Reset(); | 398 value->Reset(); |
402 while (c0_ >= 0 && unicode_cache_->IsWhiteSpace(c0_)) { | 399 while (c0_ != kEndOfInput && unicode_cache_->IsWhiteSpace(c0_)) { |
403 Advance(); | 400 Advance(); |
404 } | 401 } |
405 while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) { | 402 while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) { |
406 // Disallowed characters. | 403 // Disallowed characters. |
407 if (c0_ == '"' || c0_ == '\'') { | 404 if (c0_ == '"' || c0_ == '\'') { |
408 value->Reset(); | 405 value->Reset(); |
409 return; | 406 return; |
410 } | 407 } |
411 if (unicode_cache_->IsWhiteSpace(c0_)) { | 408 if (unicode_cache_->IsWhiteSpace(c0_)) { |
412 break; | 409 break; |
413 } | 410 } |
414 value->AddChar(c0_); | 411 value->AddChar(c0_); |
415 Advance(); | 412 Advance(); |
416 } | 413 } |
417 // Allow whitespace at the end. | 414 // Allow whitespace at the end. |
418 while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) { | 415 while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) { |
419 if (!unicode_cache_->IsWhiteSpace(c0_)) { | 416 if (!unicode_cache_->IsWhiteSpace(c0_)) { |
420 value->Reset(); | 417 value->Reset(); |
421 break; | 418 break; |
422 } | 419 } |
423 Advance(); | 420 Advance(); |
424 } | 421 } |
425 } | 422 } |
426 | 423 |
427 | 424 |
428 Token::Value Scanner::SkipMultiLineComment() { | 425 Token::Value Scanner::SkipMultiLineComment() { |
429 DCHECK(c0_ == '*'); | 426 DCHECK(c0_ == '*'); |
430 Advance(); | 427 Advance(); |
431 | 428 |
432 while (c0_ >= 0) { | 429 while (c0_ != kEndOfInput) { |
433 uc32 ch = c0_; | 430 uc32 ch = c0_; |
434 Advance(); | 431 Advance(); |
435 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(ch)) { | 432 if (c0_ != kEndOfInput && unicode_cache_->IsLineTerminator(ch)) { |
436 // Following ECMA-262, section 7.4, a comment containing | 433 // Following ECMA-262, section 7.4, a comment containing |
437 // a newline will make the comment count as a line-terminator. | 434 // a newline will make the comment count as a line-terminator. |
438 has_multiline_comment_before_next_ = true; | 435 has_multiline_comment_before_next_ = true; |
439 } | 436 } |
440 // If we have reached the end of the multi-line comment, we | 437 // If we have reached the end of the multi-line comment, we |
441 // consume the '/' and insert a whitespace. This way all | 438 // consume the '/' and insert a whitespace. This way all |
442 // multi-line comments are treated as whitespace. | 439 // multi-line comments are treated as whitespace. |
443 if (ch == '*' && c0_ == '/') { | 440 if (ch == '*' && c0_ == '/') { |
444 c0_ = ' '; | 441 c0_ = ' '; |
445 return Token::WHITESPACE; | 442 return Token::WHITESPACE; |
(...skipping 263 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
709 | 706 |
710 case '~': | 707 case '~': |
711 token = Select(Token::BIT_NOT); | 708 token = Select(Token::BIT_NOT); |
712 break; | 709 break; |
713 | 710 |
714 case '`': | 711 case '`': |
715 token = ScanTemplateStart(); | 712 token = ScanTemplateStart(); |
716 break; | 713 break; |
717 | 714 |
718 default: | 715 default: |
719 if (c0_ < 0) { | 716 if (c0_ == kEndOfInput) { |
720 token = Token::EOS; | 717 token = Token::EOS; |
721 } else if (unicode_cache_->IsIdentifierStart(c0_)) { | 718 } else if (unicode_cache_->IsIdentifierStart(c0_)) { |
722 token = ScanIdentifierOrKeyword(); | 719 token = ScanIdentifierOrKeyword(); |
723 } else if (IsDecimalDigit(c0_)) { | 720 } else if (IsDecimalDigit(c0_)) { |
724 token = ScanNumber(false); | 721 token = ScanNumber(false); |
725 } else if (SkipWhiteSpace()) { | 722 } else if (SkipWhiteSpace()) { |
726 token = Token::WHITESPACE; | 723 token = Token::WHITESPACE; |
727 } else { | 724 } else { |
728 token = Select(Token::ILLEGAL); | 725 token = Select(Token::ILLEGAL); |
729 } | 726 } |
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
801 Scan(); | 798 Scan(); |
802 } | 799 } |
803 | 800 |
804 | 801 |
805 template <bool capture_raw, bool in_template_literal> | 802 template <bool capture_raw, bool in_template_literal> |
806 bool Scanner::ScanEscape() { | 803 bool Scanner::ScanEscape() { |
807 uc32 c = c0_; | 804 uc32 c = c0_; |
808 Advance<capture_raw>(); | 805 Advance<capture_raw>(); |
809 | 806 |
810 // Skip escaped newlines. | 807 // Skip escaped newlines. |
811 if (!in_template_literal && c0_ >= 0 && unicode_cache_->IsLineTerminator(c)) { | 808 if (!in_template_literal && c0_ != kEndOfInput && |
| 809 unicode_cache_->IsLineTerminator(c)) { |
812 // Allow CR+LF newlines in multiline string literals. | 810 // Allow CR+LF newlines in multiline string literals. |
813 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance<capture_raw>(); | 811 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance<capture_raw>(); |
814 // Allow LF+CR newlines in multiline string literals. | 812 // Allow LF+CR newlines in multiline string literals. |
815 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance<capture_raw>(); | 813 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance<capture_raw>(); |
816 return true; | 814 return true; |
817 } | 815 } |
818 | 816 |
819 switch (c) { | 817 switch (c) { |
820 case '\'': // fall through | 818 case '\'': // fall through |
821 case '"' : // fall through | 819 case '"' : // fall through |
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
887 Token::Value Scanner::ScanString() { | 885 Token::Value Scanner::ScanString() { |
888 uc32 quote = c0_; | 886 uc32 quote = c0_; |
889 Advance<false, false>(); // consume quote | 887 Advance<false, false>(); // consume quote |
890 | 888 |
891 LiteralScope literal(this); | 889 LiteralScope literal(this); |
892 while (true) { | 890 while (true) { |
893 if (c0_ > kMaxAscii) { | 891 if (c0_ > kMaxAscii) { |
894 HandleLeadSurrogate(); | 892 HandleLeadSurrogate(); |
895 break; | 893 break; |
896 } | 894 } |
897 if (c0_ < 0 || c0_ == '\n' || c0_ == '\r') return Token::ILLEGAL; | 895 if (c0_ == kEndOfInput || c0_ == '\n' || c0_ == '\r') return Token::ILLEGAL; |
898 if (c0_ == quote) { | 896 if (c0_ == quote) { |
899 literal.Complete(); | 897 literal.Complete(); |
900 Advance<false, false>(); | 898 Advance<false, false>(); |
901 return Token::STRING; | 899 return Token::STRING; |
902 } | 900 } |
903 char c = static_cast<char>(c0_); | 901 char c = static_cast<char>(c0_); |
904 if (c == '\\') break; | 902 if (c == '\\') break; |
905 Advance<false, false>(); | 903 Advance<false, false>(); |
906 AddLiteralChar(c); | 904 AddLiteralChar(c); |
907 } | 905 } |
908 | 906 |
909 while (c0_ != quote && c0_ >= 0 | 907 while (c0_ != quote && c0_ != kEndOfInput && |
910 && !unicode_cache_->IsLineTerminator(c0_)) { | 908 !unicode_cache_->IsLineTerminator(c0_)) { |
911 uc32 c = c0_; | 909 uc32 c = c0_; |
912 Advance(); | 910 Advance(); |
913 if (c == '\\') { | 911 if (c == '\\') { |
914 if (c0_ < 0 || !ScanEscape<false, false>()) { | 912 if (c0_ == kEndOfInput || !ScanEscape<false, false>()) { |
915 return Token::ILLEGAL; | 913 return Token::ILLEGAL; |
916 } | 914 } |
917 } else { | 915 } else { |
918 AddLiteralChar(c); | 916 AddLiteralChar(c); |
919 } | 917 } |
920 } | 918 } |
921 if (c0_ != quote) return Token::ILLEGAL; | 919 if (c0_ != quote) return Token::ILLEGAL; |
922 literal.Complete(); | 920 literal.Complete(); |
923 | 921 |
924 Advance(); // consume quote | 922 Advance(); // consume quote |
(...skipping 25 matching lines...) Expand all Loading... |
950 Advance<capture_raw>(); | 948 Advance<capture_raw>(); |
951 if (c == '`') { | 949 if (c == '`') { |
952 result = Token::TEMPLATE_TAIL; | 950 result = Token::TEMPLATE_TAIL; |
953 ReduceRawLiteralLength(1); | 951 ReduceRawLiteralLength(1); |
954 break; | 952 break; |
955 } else if (c == '$' && c0_ == '{') { | 953 } else if (c == '$' && c0_ == '{') { |
956 Advance<capture_raw>(); // Consume '{' | 954 Advance<capture_raw>(); // Consume '{' |
957 ReduceRawLiteralLength(2); | 955 ReduceRawLiteralLength(2); |
958 break; | 956 break; |
959 } else if (c == '\\') { | 957 } else if (c == '\\') { |
960 if (c0_ > 0 && unicode_cache_->IsLineTerminator(c0_)) { | 958 if (c0_ != kEndOfInput && unicode_cache_->IsLineTerminator(c0_)) { |
961 // The TV of LineContinuation :: \ LineTerminatorSequence is the empty | 959 // The TV of LineContinuation :: \ LineTerminatorSequence is the empty |
962 // code unit sequence. | 960 // code unit sequence. |
963 uc32 lastChar = c0_; | 961 uc32 lastChar = c0_; |
964 Advance<capture_raw>(); | 962 Advance<capture_raw>(); |
965 if (lastChar == '\r') { | 963 if (lastChar == '\r') { |
966 ReduceRawLiteralLength(1); // Remove \r | 964 ReduceRawLiteralLength(1); // Remove \r |
967 if (c0_ == '\n') { | 965 if (c0_ == '\n') { |
968 Advance<capture_raw>(); // Adds \n | 966 Advance<capture_raw>(); // Adds \n |
969 } else { | 967 } else { |
970 AddRawLiteralChar('\n'); | 968 AddRawLiteralChar('\n'); |
(...skipping 177 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1148 return Token::ILLEGAL; | 1146 return Token::ILLEGAL; |
1149 } | 1147 } |
1150 ScanDecimalDigits(); | 1148 ScanDecimalDigits(); |
1151 } | 1149 } |
1152 | 1150 |
1153 // The source character immediately following a numeric literal must | 1151 // The source character immediately following a numeric literal must |
1154 // not be an identifier start or a decimal digit; see ECMA-262 | 1152 // not be an identifier start or a decimal digit; see ECMA-262 |
1155 // section 7.8.3, page 17 (note that we read only one decimal digit | 1153 // section 7.8.3, page 17 (note that we read only one decimal digit |
1156 // if the value is 0). | 1154 // if the value is 0). |
1157 if (IsDecimalDigit(c0_) || | 1155 if (IsDecimalDigit(c0_) || |
1158 (c0_ >= 0 && unicode_cache_->IsIdentifierStart(c0_))) | 1156 (c0_ != kEndOfInput && unicode_cache_->IsIdentifierStart(c0_))) |
1159 return Token::ILLEGAL; | 1157 return Token::ILLEGAL; |
1160 | 1158 |
1161 literal.Complete(); | 1159 literal.Complete(); |
1162 | 1160 |
1163 if (kind == DECIMAL_WITH_LEADING_ZERO) | 1161 if (kind == DECIMAL_WITH_LEADING_ZERO) |
1164 decimal_with_leading_zero_pos_ = Location(start_pos, source_pos()); | 1162 decimal_with_leading_zero_pos_ = Location(start_pos, source_pos()); |
1165 return Token::NUMBER; | 1163 return Token::NUMBER; |
1166 } | 1164 } |
1167 | 1165 |
1168 | 1166 |
(...skipping 206 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1375 } | 1373 } |
1376 AddLiteralChar(c); | 1374 AddLiteralChar(c); |
1377 return ScanIdentifierSuffix(&literal, true); | 1375 return ScanIdentifierSuffix(&literal, true); |
1378 } else { | 1376 } else { |
1379 uc32 first_char = c0_; | 1377 uc32 first_char = c0_; |
1380 Advance(); | 1378 Advance(); |
1381 AddLiteralChar(first_char); | 1379 AddLiteralChar(first_char); |
1382 } | 1380 } |
1383 | 1381 |
1384 // Scan the rest of the identifier characters. | 1382 // Scan the rest of the identifier characters. |
1385 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) { | 1383 while (c0_ != kEndOfInput && unicode_cache_->IsIdentifierPart(c0_)) { |
1386 if (c0_ != '\\') { | 1384 if (c0_ != '\\') { |
1387 uc32 next_char = c0_; | 1385 uc32 next_char = c0_; |
1388 Advance(); | 1386 Advance(); |
1389 AddLiteralChar(next_char); | 1387 AddLiteralChar(next_char); |
1390 continue; | 1388 continue; |
1391 } | 1389 } |
1392 // Fallthrough if no longer able to complete keyword. | 1390 // Fallthrough if no longer able to complete keyword. |
1393 return ScanIdentifierSuffix(&literal, false); | 1391 return ScanIdentifierSuffix(&literal, false); |
1394 } | 1392 } |
1395 | 1393 |
1396 if (next_.literal_chars->is_one_byte()) { | 1394 if (next_.literal_chars->is_one_byte()) { |
1397 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal(); | 1395 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal(); |
1398 Token::Value token = | 1396 Token::Value token = |
1399 KeywordOrIdentifierToken(chars.start(), chars.length()); | 1397 KeywordOrIdentifierToken(chars.start(), chars.length()); |
1400 if (token == Token::IDENTIFIER) literal.Complete(); | 1398 if (token == Token::IDENTIFIER) literal.Complete(); |
1401 return token; | 1399 return token; |
1402 } | 1400 } |
1403 literal.Complete(); | 1401 literal.Complete(); |
1404 return Token::IDENTIFIER; | 1402 return Token::IDENTIFIER; |
1405 } | 1403 } |
1406 | 1404 |
1407 | 1405 |
1408 Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal, | 1406 Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal, |
1409 bool escaped) { | 1407 bool escaped) { |
1410 // Scan the rest of the identifier characters. | 1408 // Scan the rest of the identifier characters. |
1411 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) { | 1409 while (c0_ != kEndOfInput && unicode_cache_->IsIdentifierPart(c0_)) { |
1412 if (c0_ == '\\') { | 1410 if (c0_ == '\\') { |
1413 uc32 c = ScanIdentifierUnicodeEscape(); | 1411 uc32 c = ScanIdentifierUnicodeEscape(); |
1414 escaped = true; | 1412 escaped = true; |
1415 // Only allow legal identifier part characters. | 1413 // Only allow legal identifier part characters. |
1416 if (c < 0 || | 1414 if (c < 0 || |
1417 c == '\\' || | 1415 c == '\\' || |
1418 !unicode_cache_->IsIdentifierPart(c)) { | 1416 !unicode_cache_->IsIdentifierPart(c)) { |
1419 return Token::ILLEGAL; | 1417 return Token::ILLEGAL; |
1420 } | 1418 } |
1421 AddLiteralChar(c); | 1419 AddLiteralChar(c); |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1458 | 1456 |
1459 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, | 1457 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, |
1460 // the scanner should pass uninterpreted bodies to the RegExp | 1458 // the scanner should pass uninterpreted bodies to the RegExp |
1461 // constructor. | 1459 // constructor. |
1462 LiteralScope literal(this); | 1460 LiteralScope literal(this); |
1463 if (seen_equal) { | 1461 if (seen_equal) { |
1464 AddLiteralChar('='); | 1462 AddLiteralChar('='); |
1465 } | 1463 } |
1466 | 1464 |
1467 while (c0_ != '/' || in_character_class) { | 1465 while (c0_ != '/' || in_character_class) { |
1468 if (c0_ < 0 || unicode_cache_->IsLineTerminator(c0_)) return false; | 1466 if (c0_ == kEndOfInput || unicode_cache_->IsLineTerminator(c0_)) |
| 1467 return false; |
1469 if (c0_ == '\\') { // Escape sequence. | 1468 if (c0_ == '\\') { // Escape sequence. |
1470 AddLiteralCharAdvance(); | 1469 AddLiteralCharAdvance(); |
1471 if (c0_ < 0 || unicode_cache_->IsLineTerminator(c0_)) return false; | 1470 if (c0_ == kEndOfInput || unicode_cache_->IsLineTerminator(c0_)) |
| 1471 return false; |
1472 AddLiteralCharAdvance(); | 1472 AddLiteralCharAdvance(); |
1473 // If the escape allows more characters, i.e., \x??, \u????, or \c?, | 1473 // If the escape allows more characters, i.e., \x??, \u????, or \c?, |
1474 // only "safe" characters are allowed (letters, digits, underscore), | 1474 // only "safe" characters are allowed (letters, digits, underscore), |
1475 // otherwise the escape isn't valid and the invalid character has | 1475 // otherwise the escape isn't valid and the invalid character has |
1476 // its normal meaning. I.e., we can just continue scanning without | 1476 // its normal meaning. I.e., we can just continue scanning without |
1477 // worrying whether the following characters are part of the escape | 1477 // worrying whether the following characters are part of the escape |
1478 // or not, since any '/', '\\' or '[' is guaranteed to not be part | 1478 // or not, since any '/', '\\' or '[' is guaranteed to not be part |
1479 // of the escape sequence. | 1479 // of the escape sequence. |
1480 | 1480 |
1481 // TODO(896): At some point, parse RegExps more throughly to capture | 1481 // TODO(896): At some point, parse RegExps more throughly to capture |
(...skipping 10 matching lines...) Expand all Loading... |
1492 next_.token = Token::REGEXP_LITERAL; | 1492 next_.token = Token::REGEXP_LITERAL; |
1493 return true; | 1493 return true; |
1494 } | 1494 } |
1495 | 1495 |
1496 | 1496 |
1497 Maybe<RegExp::Flags> Scanner::ScanRegExpFlags() { | 1497 Maybe<RegExp::Flags> Scanner::ScanRegExpFlags() { |
1498 DCHECK(next_.token == Token::REGEXP_LITERAL); | 1498 DCHECK(next_.token == Token::REGEXP_LITERAL); |
1499 | 1499 |
1500 // Scan regular expression flags. | 1500 // Scan regular expression flags. |
1501 int flags = 0; | 1501 int flags = 0; |
1502 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) { | 1502 while (c0_ != kEndOfInput && unicode_cache_->IsIdentifierPart(c0_)) { |
1503 RegExp::Flags flag = RegExp::kNone; | 1503 RegExp::Flags flag = RegExp::kNone; |
1504 switch (c0_) { | 1504 switch (c0_) { |
1505 case 'g': | 1505 case 'g': |
1506 flag = RegExp::kGlobal; | 1506 flag = RegExp::kGlobal; |
1507 break; | 1507 break; |
1508 case 'i': | 1508 case 'i': |
1509 flag = RegExp::kIgnoreCase; | 1509 flag = RegExp::kIgnoreCase; |
1510 break; | 1510 break; |
1511 case 'm': | 1511 case 'm': |
1512 flag = RegExp::kMultiline; | 1512 flag = RegExp::kMultiline; |
(...skipping 118 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1631 to->token = from->token; | 1631 to->token = from->token; |
1632 to->location = from->location; | 1632 to->location = from->location; |
1633 to->literal_chars->CopyFrom(from->literal_chars); | 1633 to->literal_chars->CopyFrom(from->literal_chars); |
1634 to->raw_literal_chars->CopyFrom(from->raw_literal_chars); | 1634 to->raw_literal_chars->CopyFrom(from->raw_literal_chars); |
1635 } | 1635 } |
1636 | 1636 |
1637 | 1637 |
1638 | 1638 |
1639 } // namespace internal | 1639 } // namespace internal |
1640 } // namespace v8 | 1640 } // namespace v8 |
OLD | NEW |