Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(809)

Side by Side Diff: src/parsing/scanner.cc

Issue 2314663002: Rework scanner-character-streams. (Closed)
Patch Set: Marja's feedback, round 1. Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Features shared by parsing and pre-parsing scanners. 5 // Features shared by parsing and pre-parsing scanners.
6 6
7 #include "src/parsing/scanner.h" 7 #include "src/parsing/scanner.h"
8 8
9 #include <stdint.h> 9 #include <stdint.h>
10 10
11 #include <cmath> 11 #include <cmath>
12 12
13 #include "src/ast/ast-value-factory.h" 13 #include "src/ast/ast-value-factory.h"
14 #include "src/char-predicates-inl.h" 14 #include "src/char-predicates-inl.h"
15 #include "src/conversions-inl.h" 15 #include "src/conversions-inl.h"
16 #include "src/list-inl.h" 16 #include "src/list-inl.h"
17 #include "src/parsing/duplicate-finder.h" // For Scanner::FindSymbol 17 #include "src/parsing/duplicate-finder.h" // For Scanner::FindSymbol
18 18
19 namespace v8 { 19 namespace v8 {
20 namespace internal { 20 namespace internal {
21 21
22 Handle<String> Scanner::LiteralBuffer::Internalize(Isolate* isolate) const { 22 Handle<String> Scanner::LiteralBuffer::Internalize(Isolate* isolate) const {
23 if (is_one_byte()) { 23 if (is_one_byte()) {
24 return isolate->factory()->InternalizeOneByteString(one_byte_literal()); 24 return isolate->factory()->InternalizeOneByteString(one_byte_literal());
25 } 25 }
26 return isolate->factory()->InternalizeTwoByteString(two_byte_literal()); 26 return isolate->factory()->InternalizeTwoByteString(two_byte_literal());
27 } 27 }
28 28
29 29
30 // Default implementation for streams that do not support bookmarks.
31 bool Utf16CharacterStream::SetBookmark() { return false; }
32 void Utf16CharacterStream::ResetToBookmark() { UNREACHABLE(); }
33 30
34 31
35 // ---------------------------------------------------------------------------- 32 // ----------------------------------------------------------------------------
36 // Scanner 33 // Scanner
37 34
38 Scanner::Scanner(UnicodeCache* unicode_cache) 35 Scanner::Scanner(UnicodeCache* unicode_cache)
39 : unicode_cache_(unicode_cache), 36 : unicode_cache_(unicode_cache),
40 bookmark_c0_(kNoBookmark), 37 bookmark_c0_(kNoBookmark),
41 octal_pos_(Location::invalid()), 38 octal_pos_(Location::invalid()),
42 decimal_with_leading_zero_pos_(Location::invalid()), 39 decimal_with_leading_zero_pos_(Location::invalid()),
(...skipping 262 matching lines...) Expand 10 before | Expand all | Expand 10 after
305 return c == 0xFFFE; 302 return c == 0xFFFE;
306 } 303 }
307 304
308 305
309 bool Scanner::SkipWhiteSpace() { 306 bool Scanner::SkipWhiteSpace() {
310 int start_position = source_pos(); 307 int start_position = source_pos();
311 308
312 while (true) { 309 while (true) {
313 while (true) { 310 while (true) {
314 // The unicode cache accepts unsigned inputs. 311 // The unicode cache accepts unsigned inputs.
315 if (c0_ < 0) break; 312 if (c0_ == kEndOfInput) break;
316 // Advance as long as character is a WhiteSpace or LineTerminator. 313 // Advance as long as character is a WhiteSpace or LineTerminator.
317 // Remember if the latter is the case. 314 // Remember if the latter is the case.
318 if (unicode_cache_->IsLineTerminator(c0_)) { 315 if (unicode_cache_->IsLineTerminator(c0_)) {
319 has_line_terminator_before_next_ = true; 316 has_line_terminator_before_next_ = true;
320 } else if (!unicode_cache_->IsWhiteSpace(c0_) && 317 } else if (!unicode_cache_->IsWhiteSpace(c0_) &&
321 !IsLittleEndianByteOrderMark(c0_)) { 318 !IsLittleEndianByteOrderMark(c0_)) {
322 break; 319 break;
323 } 320 }
324 Advance(); 321 Advance();
325 } 322 }
(...skipping 23 matching lines...) Expand all
349 346
350 347
351 Token::Value Scanner::SkipSingleLineComment() { 348 Token::Value Scanner::SkipSingleLineComment() {
352 Advance(); 349 Advance();
353 350
354 // The line terminator at the end of the line is not considered 351 // The line terminator at the end of the line is not considered
355 // to be part of the single-line comment; it is recognized 352 // to be part of the single-line comment; it is recognized
356 // separately by the lexical grammar and becomes part of the 353 // separately by the lexical grammar and becomes part of the
357 // stream of input elements for the syntactic grammar (see 354 // stream of input elements for the syntactic grammar (see
358 // ECMA-262, section 7.4). 355 // ECMA-262, section 7.4).
359 while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) { 356 while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) {
360 Advance(); 357 Advance();
361 } 358 }
362 359
363 return Token::WHITESPACE; 360 return Token::WHITESPACE;
364 } 361 }
365 362
366 363
367 Token::Value Scanner::SkipSourceURLComment() { 364 Token::Value Scanner::SkipSourceURLComment() {
368 TryToParseSourceURLComment(); 365 TryToParseSourceURLComment();
369 while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) { 366 while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) {
370 Advance(); 367 Advance();
371 } 368 }
372 369
373 return Token::WHITESPACE; 370 return Token::WHITESPACE;
374 } 371 }
375 372
376 373
377 void Scanner::TryToParseSourceURLComment() { 374 void Scanner::TryToParseSourceURLComment() {
378 // Magic comments are of the form: //[#@]\s<name>=\s*<value>\s*.* and this 375 // Magic comments are of the form: //[#@]\s<name>=\s*<value>\s*.* and this
379 // function will just return if it cannot parse a magic comment. 376 // function will just return if it cannot parse a magic comment.
380 if (c0_ < 0 || !unicode_cache_->IsWhiteSpace(c0_)) return; 377 if (c0_ == kEndOfInput || !unicode_cache_->IsWhiteSpace(c0_)) return;
381 Advance(); 378 Advance();
382 LiteralBuffer name; 379 LiteralBuffer name;
383 while (c0_ >= 0 && !unicode_cache_->IsWhiteSpaceOrLineTerminator(c0_) && 380 while (c0_ != kEndOfInput &&
384 c0_ != '=') { 381 !unicode_cache_->IsWhiteSpaceOrLineTerminator(c0_) && c0_ != '=') {
385 name.AddChar(c0_); 382 name.AddChar(c0_);
386 Advance(); 383 Advance();
387 } 384 }
388 if (!name.is_one_byte()) return; 385 if (!name.is_one_byte()) return;
389 Vector<const uint8_t> name_literal = name.one_byte_literal(); 386 Vector<const uint8_t> name_literal = name.one_byte_literal();
390 LiteralBuffer* value; 387 LiteralBuffer* value;
391 if (name_literal == STATIC_CHAR_VECTOR("sourceURL")) { 388 if (name_literal == STATIC_CHAR_VECTOR("sourceURL")) {
392 value = &source_url_; 389 value = &source_url_;
393 } else if (name_literal == STATIC_CHAR_VECTOR("sourceMappingURL")) { 390 } else if (name_literal == STATIC_CHAR_VECTOR("sourceMappingURL")) {
394 value = &source_mapping_url_; 391 value = &source_mapping_url_;
395 } else { 392 } else {
396 return; 393 return;
397 } 394 }
398 if (c0_ != '=') 395 if (c0_ != '=')
399 return; 396 return;
400 Advance(); 397 Advance();
401 value->Reset(); 398 value->Reset();
402 while (c0_ >= 0 && unicode_cache_->IsWhiteSpace(c0_)) { 399 while (c0_ != kEndOfInput && unicode_cache_->IsWhiteSpace(c0_)) {
403 Advance(); 400 Advance();
404 } 401 }
405 while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) { 402 while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) {
406 // Disallowed characters. 403 // Disallowed characters.
407 if (c0_ == '"' || c0_ == '\'') { 404 if (c0_ == '"' || c0_ == '\'') {
408 value->Reset(); 405 value->Reset();
409 return; 406 return;
410 } 407 }
411 if (unicode_cache_->IsWhiteSpace(c0_)) { 408 if (unicode_cache_->IsWhiteSpace(c0_)) {
412 break; 409 break;
413 } 410 }
414 value->AddChar(c0_); 411 value->AddChar(c0_);
415 Advance(); 412 Advance();
416 } 413 }
417 // Allow whitespace at the end. 414 // Allow whitespace at the end.
418 while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) { 415 while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) {
419 if (!unicode_cache_->IsWhiteSpace(c0_)) { 416 if (!unicode_cache_->IsWhiteSpace(c0_)) {
420 value->Reset(); 417 value->Reset();
421 break; 418 break;
422 } 419 }
423 Advance(); 420 Advance();
424 } 421 }
425 } 422 }
426 423
427 424
428 Token::Value Scanner::SkipMultiLineComment() { 425 Token::Value Scanner::SkipMultiLineComment() {
429 DCHECK(c0_ == '*'); 426 DCHECK(c0_ == '*');
430 Advance(); 427 Advance();
431 428
432 while (c0_ >= 0) { 429 while (c0_ != kEndOfInput) {
433 uc32 ch = c0_; 430 uc32 ch = c0_;
434 Advance(); 431 Advance();
435 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(ch)) { 432 if (c0_ != kEndOfInput && unicode_cache_->IsLineTerminator(ch)) {
436 // Following ECMA-262, section 7.4, a comment containing 433 // Following ECMA-262, section 7.4, a comment containing
437 // a newline will make the comment count as a line-terminator. 434 // a newline will make the comment count as a line-terminator.
438 has_multiline_comment_before_next_ = true; 435 has_multiline_comment_before_next_ = true;
439 } 436 }
440 // If we have reached the end of the multi-line comment, we 437 // If we have reached the end of the multi-line comment, we
441 // consume the '/' and insert a whitespace. This way all 438 // consume the '/' and insert a whitespace. This way all
442 // multi-line comments are treated as whitespace. 439 // multi-line comments are treated as whitespace.
443 if (ch == '*' && c0_ == '/') { 440 if (ch == '*' && c0_ == '/') {
444 c0_ = ' '; 441 c0_ = ' ';
445 return Token::WHITESPACE; 442 return Token::WHITESPACE;
(...skipping 263 matching lines...) Expand 10 before | Expand all | Expand 10 after
709 706
710 case '~': 707 case '~':
711 token = Select(Token::BIT_NOT); 708 token = Select(Token::BIT_NOT);
712 break; 709 break;
713 710
714 case '`': 711 case '`':
715 token = ScanTemplateStart(); 712 token = ScanTemplateStart();
716 break; 713 break;
717 714
718 default: 715 default:
719 if (c0_ < 0) { 716 if (c0_ == kEndOfInput) {
720 token = Token::EOS; 717 token = Token::EOS;
721 } else if (unicode_cache_->IsIdentifierStart(c0_)) { 718 } else if (unicode_cache_->IsIdentifierStart(c0_)) {
722 token = ScanIdentifierOrKeyword(); 719 token = ScanIdentifierOrKeyword();
723 } else if (IsDecimalDigit(c0_)) { 720 } else if (IsDecimalDigit(c0_)) {
724 token = ScanNumber(false); 721 token = ScanNumber(false);
725 } else if (SkipWhiteSpace()) { 722 } else if (SkipWhiteSpace()) {
726 token = Token::WHITESPACE; 723 token = Token::WHITESPACE;
727 } else { 724 } else {
728 token = Select(Token::ILLEGAL); 725 token = Select(Token::ILLEGAL);
729 } 726 }
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
801 Scan(); 798 Scan();
802 } 799 }
803 800
804 801
805 template <bool capture_raw, bool in_template_literal> 802 template <bool capture_raw, bool in_template_literal>
806 bool Scanner::ScanEscape() { 803 bool Scanner::ScanEscape() {
807 uc32 c = c0_; 804 uc32 c = c0_;
808 Advance<capture_raw>(); 805 Advance<capture_raw>();
809 806
810 // Skip escaped newlines. 807 // Skip escaped newlines.
811 if (!in_template_literal && c0_ >= 0 && unicode_cache_->IsLineTerminator(c)) { 808 if (!in_template_literal && c0_ != kEndOfInput &&
809 unicode_cache_->IsLineTerminator(c)) {
812 // Allow CR+LF newlines in multiline string literals. 810 // Allow CR+LF newlines in multiline string literals.
813 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance<capture_raw>(); 811 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance<capture_raw>();
814 // Allow LF+CR newlines in multiline string literals. 812 // Allow LF+CR newlines in multiline string literals.
815 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance<capture_raw>(); 813 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance<capture_raw>();
816 return true; 814 return true;
817 } 815 }
818 816
819 switch (c) { 817 switch (c) {
820 case '\'': // fall through 818 case '\'': // fall through
821 case '"' : // fall through 819 case '"' : // fall through
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after
887 Token::Value Scanner::ScanString() { 885 Token::Value Scanner::ScanString() {
888 uc32 quote = c0_; 886 uc32 quote = c0_;
889 Advance<false, false>(); // consume quote 887 Advance<false, false>(); // consume quote
890 888
891 LiteralScope literal(this); 889 LiteralScope literal(this);
892 while (true) { 890 while (true) {
893 if (c0_ > kMaxAscii) { 891 if (c0_ > kMaxAscii) {
894 HandleLeadSurrogate(); 892 HandleLeadSurrogate();
895 break; 893 break;
896 } 894 }
897 if (c0_ < 0 || c0_ == '\n' || c0_ == '\r') return Token::ILLEGAL; 895 if (c0_ == kEndOfInput || c0_ == '\n' || c0_ == '\r') return Token::ILLEGAL;
898 if (c0_ == quote) { 896 if (c0_ == quote) {
899 literal.Complete(); 897 literal.Complete();
900 Advance<false, false>(); 898 Advance<false, false>();
901 return Token::STRING; 899 return Token::STRING;
902 } 900 }
903 char c = static_cast<char>(c0_); 901 char c = static_cast<char>(c0_);
904 if (c == '\\') break; 902 if (c == '\\') break;
905 Advance<false, false>(); 903 Advance<false, false>();
906 AddLiteralChar(c); 904 AddLiteralChar(c);
907 } 905 }
908 906
909 while (c0_ != quote && c0_ >= 0 907 while (c0_ != quote && c0_ != kEndOfInput &&
910 && !unicode_cache_->IsLineTerminator(c0_)) { 908 !unicode_cache_->IsLineTerminator(c0_)) {
911 uc32 c = c0_; 909 uc32 c = c0_;
912 Advance(); 910 Advance();
913 if (c == '\\') { 911 if (c == '\\') {
914 if (c0_ < 0 || !ScanEscape<false, false>()) { 912 if (c0_ == kEndOfInput || !ScanEscape<false, false>()) {
915 return Token::ILLEGAL; 913 return Token::ILLEGAL;
916 } 914 }
917 } else { 915 } else {
918 AddLiteralChar(c); 916 AddLiteralChar(c);
919 } 917 }
920 } 918 }
921 if (c0_ != quote) return Token::ILLEGAL; 919 if (c0_ != quote) return Token::ILLEGAL;
922 literal.Complete(); 920 literal.Complete();
923 921
924 Advance(); // consume quote 922 Advance(); // consume quote
(...skipping 25 matching lines...) Expand all
950 Advance<capture_raw>(); 948 Advance<capture_raw>();
951 if (c == '`') { 949 if (c == '`') {
952 result = Token::TEMPLATE_TAIL; 950 result = Token::TEMPLATE_TAIL;
953 ReduceRawLiteralLength(1); 951 ReduceRawLiteralLength(1);
954 break; 952 break;
955 } else if (c == '$' && c0_ == '{') { 953 } else if (c == '$' && c0_ == '{') {
956 Advance<capture_raw>(); // Consume '{' 954 Advance<capture_raw>(); // Consume '{'
957 ReduceRawLiteralLength(2); 955 ReduceRawLiteralLength(2);
958 break; 956 break;
959 } else if (c == '\\') { 957 } else if (c == '\\') {
960 if (c0_ > 0 && unicode_cache_->IsLineTerminator(c0_)) { 958 if (c0_ != kEndOfInput && unicode_cache_->IsLineTerminator(c0_)) {
961 // The TV of LineContinuation :: \ LineTerminatorSequence is the empty 959 // The TV of LineContinuation :: \ LineTerminatorSequence is the empty
962 // code unit sequence. 960 // code unit sequence.
963 uc32 lastChar = c0_; 961 uc32 lastChar = c0_;
964 Advance<capture_raw>(); 962 Advance<capture_raw>();
965 if (lastChar == '\r') { 963 if (lastChar == '\r') {
966 ReduceRawLiteralLength(1); // Remove \r 964 ReduceRawLiteralLength(1); // Remove \r
967 if (c0_ == '\n') { 965 if (c0_ == '\n') {
968 Advance<capture_raw>(); // Adds \n 966 Advance<capture_raw>(); // Adds \n
969 } else { 967 } else {
970 AddRawLiteralChar('\n'); 968 AddRawLiteralChar('\n');
(...skipping 177 matching lines...) Expand 10 before | Expand all | Expand 10 after
1148 return Token::ILLEGAL; 1146 return Token::ILLEGAL;
1149 } 1147 }
1150 ScanDecimalDigits(); 1148 ScanDecimalDigits();
1151 } 1149 }
1152 1150
1153 // The source character immediately following a numeric literal must 1151 // The source character immediately following a numeric literal must
1154 // not be an identifier start or a decimal digit; see ECMA-262 1152 // not be an identifier start or a decimal digit; see ECMA-262
1155 // section 7.8.3, page 17 (note that we read only one decimal digit 1153 // section 7.8.3, page 17 (note that we read only one decimal digit
1156 // if the value is 0). 1154 // if the value is 0).
1157 if (IsDecimalDigit(c0_) || 1155 if (IsDecimalDigit(c0_) ||
1158 (c0_ >= 0 && unicode_cache_->IsIdentifierStart(c0_))) 1156 (c0_ != kEndOfInput && unicode_cache_->IsIdentifierStart(c0_)))
1159 return Token::ILLEGAL; 1157 return Token::ILLEGAL;
1160 1158
1161 literal.Complete(); 1159 literal.Complete();
1162 1160
1163 if (kind == DECIMAL_WITH_LEADING_ZERO) 1161 if (kind == DECIMAL_WITH_LEADING_ZERO)
1164 decimal_with_leading_zero_pos_ = Location(start_pos, source_pos()); 1162 decimal_with_leading_zero_pos_ = Location(start_pos, source_pos());
1165 return Token::NUMBER; 1163 return Token::NUMBER;
1166 } 1164 }
1167 1165
1168 1166
(...skipping 206 matching lines...) Expand 10 before | Expand all | Expand 10 after
1375 } 1373 }
1376 AddLiteralChar(c); 1374 AddLiteralChar(c);
1377 return ScanIdentifierSuffix(&literal, true); 1375 return ScanIdentifierSuffix(&literal, true);
1378 } else { 1376 } else {
1379 uc32 first_char = c0_; 1377 uc32 first_char = c0_;
1380 Advance(); 1378 Advance();
1381 AddLiteralChar(first_char); 1379 AddLiteralChar(first_char);
1382 } 1380 }
1383 1381
1384 // Scan the rest of the identifier characters. 1382 // Scan the rest of the identifier characters.
1385 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) { 1383 while (c0_ != kEndOfInput && unicode_cache_->IsIdentifierPart(c0_)) {
1386 if (c0_ != '\\') { 1384 if (c0_ != '\\') {
1387 uc32 next_char = c0_; 1385 uc32 next_char = c0_;
1388 Advance(); 1386 Advance();
1389 AddLiteralChar(next_char); 1387 AddLiteralChar(next_char);
1390 continue; 1388 continue;
1391 } 1389 }
1392 // Fallthrough if no longer able to complete keyword. 1390 // Fallthrough if no longer able to complete keyword.
1393 return ScanIdentifierSuffix(&literal, false); 1391 return ScanIdentifierSuffix(&literal, false);
1394 } 1392 }
1395 1393
1396 if (next_.literal_chars->is_one_byte()) { 1394 if (next_.literal_chars->is_one_byte()) {
1397 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal(); 1395 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();
1398 Token::Value token = 1396 Token::Value token =
1399 KeywordOrIdentifierToken(chars.start(), chars.length()); 1397 KeywordOrIdentifierToken(chars.start(), chars.length());
1400 if (token == Token::IDENTIFIER) literal.Complete(); 1398 if (token == Token::IDENTIFIER) literal.Complete();
1401 return token; 1399 return token;
1402 } 1400 }
1403 literal.Complete(); 1401 literal.Complete();
1404 return Token::IDENTIFIER; 1402 return Token::IDENTIFIER;
1405 } 1403 }
1406 1404
1407 1405
1408 Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal, 1406 Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal,
1409 bool escaped) { 1407 bool escaped) {
1410 // Scan the rest of the identifier characters. 1408 // Scan the rest of the identifier characters.
1411 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) { 1409 while (c0_ != kEndOfInput && unicode_cache_->IsIdentifierPart(c0_)) {
1412 if (c0_ == '\\') { 1410 if (c0_ == '\\') {
1413 uc32 c = ScanIdentifierUnicodeEscape(); 1411 uc32 c = ScanIdentifierUnicodeEscape();
1414 escaped = true; 1412 escaped = true;
1415 // Only allow legal identifier part characters. 1413 // Only allow legal identifier part characters.
1416 if (c < 0 || 1414 if (c < 0 ||
1417 c == '\\' || 1415 c == '\\' ||
1418 !unicode_cache_->IsIdentifierPart(c)) { 1416 !unicode_cache_->IsIdentifierPart(c)) {
1419 return Token::ILLEGAL; 1417 return Token::ILLEGAL;
1420 } 1418 }
1421 AddLiteralChar(c); 1419 AddLiteralChar(c);
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
1458 1456
1459 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, 1457 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,
1460 // the scanner should pass uninterpreted bodies to the RegExp 1458 // the scanner should pass uninterpreted bodies to the RegExp
1461 // constructor. 1459 // constructor.
1462 LiteralScope literal(this); 1460 LiteralScope literal(this);
1463 if (seen_equal) { 1461 if (seen_equal) {
1464 AddLiteralChar('='); 1462 AddLiteralChar('=');
1465 } 1463 }
1466 1464
1467 while (c0_ != '/' || in_character_class) { 1465 while (c0_ != '/' || in_character_class) {
1468 if (c0_ < 0 || unicode_cache_->IsLineTerminator(c0_)) return false; 1466 if (c0_ == kEndOfInput || unicode_cache_->IsLineTerminator(c0_))
1467 return false;
1469 if (c0_ == '\\') { // Escape sequence. 1468 if (c0_ == '\\') { // Escape sequence.
1470 AddLiteralCharAdvance(); 1469 AddLiteralCharAdvance();
1471 if (c0_ < 0 || unicode_cache_->IsLineTerminator(c0_)) return false; 1470 if (c0_ == kEndOfInput || unicode_cache_->IsLineTerminator(c0_))
1471 return false;
1472 AddLiteralCharAdvance(); 1472 AddLiteralCharAdvance();
1473 // If the escape allows more characters, i.e., \x??, \u????, or \c?, 1473 // If the escape allows more characters, i.e., \x??, \u????, or \c?,
1474 // only "safe" characters are allowed (letters, digits, underscore), 1474 // only "safe" characters are allowed (letters, digits, underscore),
1475 // otherwise the escape isn't valid and the invalid character has 1475 // otherwise the escape isn't valid and the invalid character has
1476 // its normal meaning. I.e., we can just continue scanning without 1476 // its normal meaning. I.e., we can just continue scanning without
1477 // worrying whether the following characters are part of the escape 1477 // worrying whether the following characters are part of the escape
1478 // or not, since any '/', '\\' or '[' is guaranteed to not be part 1478 // or not, since any '/', '\\' or '[' is guaranteed to not be part
1479 // of the escape sequence. 1479 // of the escape sequence.
1480 1480
1481 // TODO(896): At some point, parse RegExps more throughly to capture 1481 // TODO(896): At some point, parse RegExps more throughly to capture
(...skipping 10 matching lines...) Expand all
1492 next_.token = Token::REGEXP_LITERAL; 1492 next_.token = Token::REGEXP_LITERAL;
1493 return true; 1493 return true;
1494 } 1494 }
1495 1495
1496 1496
1497 Maybe<RegExp::Flags> Scanner::ScanRegExpFlags() { 1497 Maybe<RegExp::Flags> Scanner::ScanRegExpFlags() {
1498 DCHECK(next_.token == Token::REGEXP_LITERAL); 1498 DCHECK(next_.token == Token::REGEXP_LITERAL);
1499 1499
1500 // Scan regular expression flags. 1500 // Scan regular expression flags.
1501 int flags = 0; 1501 int flags = 0;
1502 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) { 1502 while (c0_ != kEndOfInput && unicode_cache_->IsIdentifierPart(c0_)) {
1503 RegExp::Flags flag = RegExp::kNone; 1503 RegExp::Flags flag = RegExp::kNone;
1504 switch (c0_) { 1504 switch (c0_) {
1505 case 'g': 1505 case 'g':
1506 flag = RegExp::kGlobal; 1506 flag = RegExp::kGlobal;
1507 break; 1507 break;
1508 case 'i': 1508 case 'i':
1509 flag = RegExp::kIgnoreCase; 1509 flag = RegExp::kIgnoreCase;
1510 break; 1510 break;
1511 case 'm': 1511 case 'm':
1512 flag = RegExp::kMultiline; 1512 flag = RegExp::kMultiline;
(...skipping 118 matching lines...) Expand 10 before | Expand all | Expand 10 after
1631 to->token = from->token; 1631 to->token = from->token;
1632 to->location = from->location; 1632 to->location = from->location;
1633 to->literal_chars->CopyFrom(from->literal_chars); 1633 to->literal_chars->CopyFrom(from->literal_chars);
1634 to->raw_literal_chars->CopyFrom(from->raw_literal_chars); 1634 to->raw_literal_chars->CopyFrom(from->raw_literal_chars);
1635 } 1635 }
1636 1636
1637 1637
1638 1638
1639 } // namespace internal 1639 } // namespace internal
1640 } // namespace v8 1640 } // namespace v8
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698