Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(149)

Side by Side Diff: src/parsing/scanner.cc

Issue 2314663002: Rework scanner-character-streams. (Closed)
Patch Set: Feedback, round 2. Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Features shared by parsing and pre-parsing scanners. 5 // Features shared by parsing and pre-parsing scanners.
6 6
7 #include "src/parsing/scanner.h" 7 #include "src/parsing/scanner.h"
8 8
9 #include <stdint.h> 9 #include <stdint.h>
10 10
11 #include <cmath> 11 #include <cmath>
12 12
13 #include "src/ast/ast-value-factory.h" 13 #include "src/ast/ast-value-factory.h"
14 #include "src/char-predicates-inl.h" 14 #include "src/char-predicates-inl.h"
15 #include "src/conversions-inl.h" 15 #include "src/conversions-inl.h"
16 #include "src/list-inl.h" 16 #include "src/list-inl.h"
17 #include "src/parsing/duplicate-finder.h" // For Scanner::FindSymbol 17 #include "src/parsing/duplicate-finder.h" // For Scanner::FindSymbol
18 18
19 namespace v8 { 19 namespace v8 {
20 namespace internal { 20 namespace internal {
21 21
22 const size_t Utf16CharacterStream::kNoBookmark =
23 std::numeric_limits<size_t>::max();
24
22 Handle<String> Scanner::LiteralBuffer::Internalize(Isolate* isolate) const { 25 Handle<String> Scanner::LiteralBuffer::Internalize(Isolate* isolate) const {
23 if (is_one_byte()) { 26 if (is_one_byte()) {
24 return isolate->factory()->InternalizeOneByteString(one_byte_literal()); 27 return isolate->factory()->InternalizeOneByteString(one_byte_literal());
25 } 28 }
26 return isolate->factory()->InternalizeTwoByteString(two_byte_literal()); 29 return isolate->factory()->InternalizeTwoByteString(two_byte_literal());
27 } 30 }
28 31
29 32
30 // Default implementation for streams that do not support bookmarks.
31 bool Utf16CharacterStream::SetBookmark() { return false; }
32 void Utf16CharacterStream::ResetToBookmark() { UNREACHABLE(); }
33
34 33
35 // ---------------------------------------------------------------------------- 34 // ----------------------------------------------------------------------------
36 // Scanner 35 // Scanner
37 36
38 Scanner::Scanner(UnicodeCache* unicode_cache) 37 Scanner::Scanner(UnicodeCache* unicode_cache)
39 : unicode_cache_(unicode_cache), 38 : unicode_cache_(unicode_cache),
40 bookmark_c0_(kNoBookmark), 39 bookmark_c0_(kNoBookmark),
41 octal_pos_(Location::invalid()), 40 octal_pos_(Location::invalid()),
42 decimal_with_leading_zero_pos_(Location::invalid()), 41 decimal_with_leading_zero_pos_(Location::invalid()),
43 found_html_comment_(false) { 42 found_html_comment_(false) {
(...skipping 261 matching lines...) Expand 10 before | Expand all | Expand 10 after
305 return c == 0xFFFE; 304 return c == 0xFFFE;
306 } 305 }
307 306
308 307
309 bool Scanner::SkipWhiteSpace() { 308 bool Scanner::SkipWhiteSpace() {
310 int start_position = source_pos(); 309 int start_position = source_pos();
311 310
312 while (true) { 311 while (true) {
313 while (true) { 312 while (true) {
314 // The unicode cache accepts unsigned inputs. 313 // The unicode cache accepts unsigned inputs.
315 if (c0_ < 0) break; 314 if (c0_ == kEndOfInput) break;
316 // Advance as long as character is a WhiteSpace or LineTerminator. 315 // Advance as long as character is a WhiteSpace or LineTerminator.
317 // Remember if the latter is the case. 316 // Remember if the latter is the case.
318 if (unicode_cache_->IsLineTerminator(c0_)) { 317 if (unicode_cache_->IsLineTerminator(c0_)) {
319 has_line_terminator_before_next_ = true; 318 has_line_terminator_before_next_ = true;
320 } else if (!unicode_cache_->IsWhiteSpace(c0_) && 319 } else if (!unicode_cache_->IsWhiteSpace(c0_) &&
321 !IsLittleEndianByteOrderMark(c0_)) { 320 !IsLittleEndianByteOrderMark(c0_)) {
322 break; 321 break;
323 } 322 }
324 Advance(); 323 Advance();
325 } 324 }
(...skipping 23 matching lines...) Expand all
349 348
350 349
351 Token::Value Scanner::SkipSingleLineComment() { 350 Token::Value Scanner::SkipSingleLineComment() {
352 Advance(); 351 Advance();
353 352
354 // The line terminator at the end of the line is not considered 353 // The line terminator at the end of the line is not considered
355 // to be part of the single-line comment; it is recognized 354 // to be part of the single-line comment; it is recognized
356 // separately by the lexical grammar and becomes part of the 355 // separately by the lexical grammar and becomes part of the
357 // stream of input elements for the syntactic grammar (see 356 // stream of input elements for the syntactic grammar (see
358 // ECMA-262, section 7.4). 357 // ECMA-262, section 7.4).
359 while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) { 358 while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) {
360 Advance(); 359 Advance();
361 } 360 }
362 361
363 return Token::WHITESPACE; 362 return Token::WHITESPACE;
364 } 363 }
365 364
366 365
367 Token::Value Scanner::SkipSourceURLComment() { 366 Token::Value Scanner::SkipSourceURLComment() {
368 TryToParseSourceURLComment(); 367 TryToParseSourceURLComment();
369 while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) { 368 while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) {
370 Advance(); 369 Advance();
371 } 370 }
372 371
373 return Token::WHITESPACE; 372 return Token::WHITESPACE;
374 } 373 }
375 374
376 375
377 void Scanner::TryToParseSourceURLComment() { 376 void Scanner::TryToParseSourceURLComment() {
378 // Magic comments are of the form: //[#@]\s<name>=\s*<value>\s*.* and this 377 // Magic comments are of the form: //[#@]\s<name>=\s*<value>\s*.* and this
379 // function will just return if it cannot parse a magic comment. 378 // function will just return if it cannot parse a magic comment.
380 if (c0_ < 0 || !unicode_cache_->IsWhiteSpace(c0_)) return; 379 if (c0_ == kEndOfInput || !unicode_cache_->IsWhiteSpace(c0_)) return;
381 Advance(); 380 Advance();
382 LiteralBuffer name; 381 LiteralBuffer name;
383 while (c0_ >= 0 && !unicode_cache_->IsWhiteSpaceOrLineTerminator(c0_) && 382 while (c0_ != kEndOfInput &&
384 c0_ != '=') { 383 !unicode_cache_->IsWhiteSpaceOrLineTerminator(c0_) && c0_ != '=') {
385 name.AddChar(c0_); 384 name.AddChar(c0_);
386 Advance(); 385 Advance();
387 } 386 }
388 if (!name.is_one_byte()) return; 387 if (!name.is_one_byte()) return;
389 Vector<const uint8_t> name_literal = name.one_byte_literal(); 388 Vector<const uint8_t> name_literal = name.one_byte_literal();
390 LiteralBuffer* value; 389 LiteralBuffer* value;
391 if (name_literal == STATIC_CHAR_VECTOR("sourceURL")) { 390 if (name_literal == STATIC_CHAR_VECTOR("sourceURL")) {
392 value = &source_url_; 391 value = &source_url_;
393 } else if (name_literal == STATIC_CHAR_VECTOR("sourceMappingURL")) { 392 } else if (name_literal == STATIC_CHAR_VECTOR("sourceMappingURL")) {
394 value = &source_mapping_url_; 393 value = &source_mapping_url_;
395 } else { 394 } else {
396 return; 395 return;
397 } 396 }
398 if (c0_ != '=') 397 if (c0_ != '=')
399 return; 398 return;
400 Advance(); 399 Advance();
401 value->Reset(); 400 value->Reset();
402 while (c0_ >= 0 && unicode_cache_->IsWhiteSpace(c0_)) { 401 while (c0_ != kEndOfInput && unicode_cache_->IsWhiteSpace(c0_)) {
403 Advance(); 402 Advance();
404 } 403 }
405 while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) { 404 while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) {
406 // Disallowed characters. 405 // Disallowed characters.
407 if (c0_ == '"' || c0_ == '\'') { 406 if (c0_ == '"' || c0_ == '\'') {
408 value->Reset(); 407 value->Reset();
409 return; 408 return;
410 } 409 }
411 if (unicode_cache_->IsWhiteSpace(c0_)) { 410 if (unicode_cache_->IsWhiteSpace(c0_)) {
412 break; 411 break;
413 } 412 }
414 value->AddChar(c0_); 413 value->AddChar(c0_);
415 Advance(); 414 Advance();
416 } 415 }
417 // Allow whitespace at the end. 416 // Allow whitespace at the end.
418 while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) { 417 while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) {
419 if (!unicode_cache_->IsWhiteSpace(c0_)) { 418 if (!unicode_cache_->IsWhiteSpace(c0_)) {
420 value->Reset(); 419 value->Reset();
421 break; 420 break;
422 } 421 }
423 Advance(); 422 Advance();
424 } 423 }
425 } 424 }
426 425
427 426
428 Token::Value Scanner::SkipMultiLineComment() { 427 Token::Value Scanner::SkipMultiLineComment() {
429 DCHECK(c0_ == '*'); 428 DCHECK(c0_ == '*');
430 Advance(); 429 Advance();
431 430
432 while (c0_ >= 0) { 431 while (c0_ != kEndOfInput) {
433 uc32 ch = c0_; 432 uc32 ch = c0_;
434 Advance(); 433 Advance();
435 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(ch)) { 434 if (c0_ != kEndOfInput && unicode_cache_->IsLineTerminator(ch)) {
436 // Following ECMA-262, section 7.4, a comment containing 435 // Following ECMA-262, section 7.4, a comment containing
437 // a newline will make the comment count as a line-terminator. 436 // a newline will make the comment count as a line-terminator.
438 has_multiline_comment_before_next_ = true; 437 has_multiline_comment_before_next_ = true;
439 } 438 }
440 // If we have reached the end of the multi-line comment, we 439 // If we have reached the end of the multi-line comment, we
441 // consume the '/' and insert a whitespace. This way all 440 // consume the '/' and insert a whitespace. This way all
442 // multi-line comments are treated as whitespace. 441 // multi-line comments are treated as whitespace.
443 if (ch == '*' && c0_ == '/') { 442 if (ch == '*' && c0_ == '/') {
444 c0_ = ' '; 443 c0_ = ' ';
445 return Token::WHITESPACE; 444 return Token::WHITESPACE;
(...skipping 263 matching lines...) Expand 10 before | Expand all | Expand 10 after
709 708
710 case '~': 709 case '~':
711 token = Select(Token::BIT_NOT); 710 token = Select(Token::BIT_NOT);
712 break; 711 break;
713 712
714 case '`': 713 case '`':
715 token = ScanTemplateStart(); 714 token = ScanTemplateStart();
716 break; 715 break;
717 716
718 default: 717 default:
719 if (c0_ < 0) { 718 if (c0_ == kEndOfInput) {
720 token = Token::EOS; 719 token = Token::EOS;
721 } else if (unicode_cache_->IsIdentifierStart(c0_)) { 720 } else if (unicode_cache_->IsIdentifierStart(c0_)) {
722 token = ScanIdentifierOrKeyword(); 721 token = ScanIdentifierOrKeyword();
723 } else if (IsDecimalDigit(c0_)) { 722 } else if (IsDecimalDigit(c0_)) {
724 token = ScanNumber(false); 723 token = ScanNumber(false);
725 } else if (SkipWhiteSpace()) { 724 } else if (SkipWhiteSpace()) {
726 token = Token::WHITESPACE; 725 token = Token::WHITESPACE;
727 } else { 726 } else {
728 token = Select(Token::ILLEGAL); 727 token = Select(Token::ILLEGAL);
729 } 728 }
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
801 Scan(); 800 Scan();
802 } 801 }
803 802
804 803
805 template <bool capture_raw, bool in_template_literal> 804 template <bool capture_raw, bool in_template_literal>
806 bool Scanner::ScanEscape() { 805 bool Scanner::ScanEscape() {
807 uc32 c = c0_; 806 uc32 c = c0_;
808 Advance<capture_raw>(); 807 Advance<capture_raw>();
809 808
810 // Skip escaped newlines. 809 // Skip escaped newlines.
811 if (!in_template_literal && c0_ >= 0 && unicode_cache_->IsLineTerminator(c)) { 810 if (!in_template_literal && c0_ != kEndOfInput &&
811 unicode_cache_->IsLineTerminator(c)) {
812 // Allow CR+LF newlines in multiline string literals. 812 // Allow CR+LF newlines in multiline string literals.
813 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance<capture_raw>(); 813 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance<capture_raw>();
814 // Allow LF+CR newlines in multiline string literals. 814 // Allow LF+CR newlines in multiline string literals.
815 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance<capture_raw>(); 815 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance<capture_raw>();
816 return true; 816 return true;
817 } 817 }
818 818
819 switch (c) { 819 switch (c) {
820 case '\'': // fall through 820 case '\'': // fall through
821 case '"' : // fall through 821 case '"' : // fall through
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after
887 Token::Value Scanner::ScanString() { 887 Token::Value Scanner::ScanString() {
888 uc32 quote = c0_; 888 uc32 quote = c0_;
889 Advance<false, false>(); // consume quote 889 Advance<false, false>(); // consume quote
890 890
891 LiteralScope literal(this); 891 LiteralScope literal(this);
892 while (true) { 892 while (true) {
893 if (c0_ > kMaxAscii) { 893 if (c0_ > kMaxAscii) {
894 HandleLeadSurrogate(); 894 HandleLeadSurrogate();
895 break; 895 break;
896 } 896 }
897 if (c0_ < 0 || c0_ == '\n' || c0_ == '\r') return Token::ILLEGAL; 897 if (c0_ == kEndOfInput || c0_ == '\n' || c0_ == '\r') return Token::ILLEGAL;
898 if (c0_ == quote) { 898 if (c0_ == quote) {
899 literal.Complete(); 899 literal.Complete();
900 Advance<false, false>(); 900 Advance<false, false>();
901 return Token::STRING; 901 return Token::STRING;
902 } 902 }
903 char c = static_cast<char>(c0_); 903 char c = static_cast<char>(c0_);
904 if (c == '\\') break; 904 if (c == '\\') break;
905 Advance<false, false>(); 905 Advance<false, false>();
906 AddLiteralChar(c); 906 AddLiteralChar(c);
907 } 907 }
908 908
909 while (c0_ != quote && c0_ >= 0 909 while (c0_ != quote && c0_ != kEndOfInput &&
910 && !unicode_cache_->IsLineTerminator(c0_)) { 910 !unicode_cache_->IsLineTerminator(c0_)) {
911 uc32 c = c0_; 911 uc32 c = c0_;
912 Advance(); 912 Advance();
913 if (c == '\\') { 913 if (c == '\\') {
914 if (c0_ < 0 || !ScanEscape<false, false>()) { 914 if (c0_ == kEndOfInput || !ScanEscape<false, false>()) {
915 return Token::ILLEGAL; 915 return Token::ILLEGAL;
916 } 916 }
917 } else { 917 } else {
918 AddLiteralChar(c); 918 AddLiteralChar(c);
919 } 919 }
920 } 920 }
921 if (c0_ != quote) return Token::ILLEGAL; 921 if (c0_ != quote) return Token::ILLEGAL;
922 literal.Complete(); 922 literal.Complete();
923 923
924 Advance(); // consume quote 924 Advance(); // consume quote
(...skipping 25 matching lines...) Expand all
950 Advance<capture_raw>(); 950 Advance<capture_raw>();
951 if (c == '`') { 951 if (c == '`') {
952 result = Token::TEMPLATE_TAIL; 952 result = Token::TEMPLATE_TAIL;
953 ReduceRawLiteralLength(1); 953 ReduceRawLiteralLength(1);
954 break; 954 break;
955 } else if (c == '$' && c0_ == '{') { 955 } else if (c == '$' && c0_ == '{') {
956 Advance<capture_raw>(); // Consume '{' 956 Advance<capture_raw>(); // Consume '{'
957 ReduceRawLiteralLength(2); 957 ReduceRawLiteralLength(2);
958 break; 958 break;
959 } else if (c == '\\') { 959 } else if (c == '\\') {
960 if (c0_ > 0 && unicode_cache_->IsLineTerminator(c0_)) { 960 if (c0_ != kEndOfInput && unicode_cache_->IsLineTerminator(c0_)) {
961 // The TV of LineContinuation :: \ LineTerminatorSequence is the empty 961 // The TV of LineContinuation :: \ LineTerminatorSequence is the empty
962 // code unit sequence. 962 // code unit sequence.
963 uc32 lastChar = c0_; 963 uc32 lastChar = c0_;
964 Advance<capture_raw>(); 964 Advance<capture_raw>();
965 if (lastChar == '\r') { 965 if (lastChar == '\r') {
966 ReduceRawLiteralLength(1); // Remove \r 966 ReduceRawLiteralLength(1); // Remove \r
967 if (c0_ == '\n') { 967 if (c0_ == '\n') {
968 Advance<capture_raw>(); // Adds \n 968 Advance<capture_raw>(); // Adds \n
969 } else { 969 } else {
970 AddRawLiteralChar('\n'); 970 AddRawLiteralChar('\n');
(...skipping 177 matching lines...) Expand 10 before | Expand all | Expand 10 after
1148 return Token::ILLEGAL; 1148 return Token::ILLEGAL;
1149 } 1149 }
1150 ScanDecimalDigits(); 1150 ScanDecimalDigits();
1151 } 1151 }
1152 1152
1153 // The source character immediately following a numeric literal must 1153 // The source character immediately following a numeric literal must
1154 // not be an identifier start or a decimal digit; see ECMA-262 1154 // not be an identifier start or a decimal digit; see ECMA-262
1155 // section 7.8.3, page 17 (note that we read only one decimal digit 1155 // section 7.8.3, page 17 (note that we read only one decimal digit
1156 // if the value is 0). 1156 // if the value is 0).
1157 if (IsDecimalDigit(c0_) || 1157 if (IsDecimalDigit(c0_) ||
1158 (c0_ >= 0 && unicode_cache_->IsIdentifierStart(c0_))) 1158 (c0_ != kEndOfInput && unicode_cache_->IsIdentifierStart(c0_)))
1159 return Token::ILLEGAL; 1159 return Token::ILLEGAL;
1160 1160
1161 literal.Complete(); 1161 literal.Complete();
1162 1162
1163 if (kind == DECIMAL_WITH_LEADING_ZERO) 1163 if (kind == DECIMAL_WITH_LEADING_ZERO)
1164 decimal_with_leading_zero_pos_ = Location(start_pos, source_pos()); 1164 decimal_with_leading_zero_pos_ = Location(start_pos, source_pos());
1165 return Token::NUMBER; 1165 return Token::NUMBER;
1166 } 1166 }
1167 1167
1168 1168
(...skipping 206 matching lines...) Expand 10 before | Expand all | Expand 10 after
1375 } 1375 }
1376 AddLiteralChar(c); 1376 AddLiteralChar(c);
1377 return ScanIdentifierSuffix(&literal, true); 1377 return ScanIdentifierSuffix(&literal, true);
1378 } else { 1378 } else {
1379 uc32 first_char = c0_; 1379 uc32 first_char = c0_;
1380 Advance(); 1380 Advance();
1381 AddLiteralChar(first_char); 1381 AddLiteralChar(first_char);
1382 } 1382 }
1383 1383
1384 // Scan the rest of the identifier characters. 1384 // Scan the rest of the identifier characters.
1385 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) { 1385 while (c0_ != kEndOfInput && unicode_cache_->IsIdentifierPart(c0_)) {
1386 if (c0_ != '\\') { 1386 if (c0_ != '\\') {
1387 uc32 next_char = c0_; 1387 uc32 next_char = c0_;
1388 Advance(); 1388 Advance();
1389 AddLiteralChar(next_char); 1389 AddLiteralChar(next_char);
1390 continue; 1390 continue;
1391 } 1391 }
1392 // Fallthrough if no longer able to complete keyword. 1392 // Fallthrough if no longer able to complete keyword.
1393 return ScanIdentifierSuffix(&literal, false); 1393 return ScanIdentifierSuffix(&literal, false);
1394 } 1394 }
1395 1395
1396 if (next_.literal_chars->is_one_byte()) { 1396 if (next_.literal_chars->is_one_byte()) {
1397 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal(); 1397 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();
1398 Token::Value token = 1398 Token::Value token =
1399 KeywordOrIdentifierToken(chars.start(), chars.length()); 1399 KeywordOrIdentifierToken(chars.start(), chars.length());
1400 if (token == Token::IDENTIFIER) literal.Complete(); 1400 if (token == Token::IDENTIFIER) literal.Complete();
1401 return token; 1401 return token;
1402 } 1402 }
1403 literal.Complete(); 1403 literal.Complete();
1404 return Token::IDENTIFIER; 1404 return Token::IDENTIFIER;
1405 } 1405 }
1406 1406
1407 1407
1408 Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal, 1408 Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal,
1409 bool escaped) { 1409 bool escaped) {
1410 // Scan the rest of the identifier characters. 1410 // Scan the rest of the identifier characters.
1411 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) { 1411 while (c0_ != kEndOfInput && unicode_cache_->IsIdentifierPart(c0_)) {
1412 if (c0_ == '\\') { 1412 if (c0_ == '\\') {
1413 uc32 c = ScanIdentifierUnicodeEscape(); 1413 uc32 c = ScanIdentifierUnicodeEscape();
1414 escaped = true; 1414 escaped = true;
1415 // Only allow legal identifier part characters. 1415 // Only allow legal identifier part characters.
1416 if (c < 0 || 1416 if (c < 0 ||
1417 c == '\\' || 1417 c == '\\' ||
1418 !unicode_cache_->IsIdentifierPart(c)) { 1418 !unicode_cache_->IsIdentifierPart(c)) {
1419 return Token::ILLEGAL; 1419 return Token::ILLEGAL;
1420 } 1420 }
1421 AddLiteralChar(c); 1421 AddLiteralChar(c);
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
1458 1458
1459 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, 1459 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,
1460 // the scanner should pass uninterpreted bodies to the RegExp 1460 // the scanner should pass uninterpreted bodies to the RegExp
1461 // constructor. 1461 // constructor.
1462 LiteralScope literal(this); 1462 LiteralScope literal(this);
1463 if (seen_equal) { 1463 if (seen_equal) {
1464 AddLiteralChar('='); 1464 AddLiteralChar('=');
1465 } 1465 }
1466 1466
1467 while (c0_ != '/' || in_character_class) { 1467 while (c0_ != '/' || in_character_class) {
1468 if (c0_ < 0 || unicode_cache_->IsLineTerminator(c0_)) return false; 1468 if (c0_ == kEndOfInput || unicode_cache_->IsLineTerminator(c0_))
1469 return false;
1469 if (c0_ == '\\') { // Escape sequence. 1470 if (c0_ == '\\') { // Escape sequence.
1470 AddLiteralCharAdvance(); 1471 AddLiteralCharAdvance();
1471 if (c0_ < 0 || unicode_cache_->IsLineTerminator(c0_)) return false; 1472 if (c0_ == kEndOfInput || unicode_cache_->IsLineTerminator(c0_))
1473 return false;
1472 AddLiteralCharAdvance(); 1474 AddLiteralCharAdvance();
1473 // If the escape allows more characters, i.e., \x??, \u????, or \c?, 1475 // If the escape allows more characters, i.e., \x??, \u????, or \c?,
1474 // only "safe" characters are allowed (letters, digits, underscore), 1476 // only "safe" characters are allowed (letters, digits, underscore),
1475 // otherwise the escape isn't valid and the invalid character has 1477 // otherwise the escape isn't valid and the invalid character has
1476 // its normal meaning. I.e., we can just continue scanning without 1478 // its normal meaning. I.e., we can just continue scanning without
1477 // worrying whether the following characters are part of the escape 1479 // worrying whether the following characters are part of the escape
1478 // or not, since any '/', '\\' or '[' is guaranteed to not be part 1480 // or not, since any '/', '\\' or '[' is guaranteed to not be part
1479 // of the escape sequence. 1481 // of the escape sequence.
1480 1482
1481 // TODO(896): At some point, parse RegExps more throughly to capture 1483 // TODO(896): At some point, parse RegExps more throughly to capture
(...skipping 10 matching lines...) Expand all
1492 next_.token = Token::REGEXP_LITERAL; 1494 next_.token = Token::REGEXP_LITERAL;
1493 return true; 1495 return true;
1494 } 1496 }
1495 1497
1496 1498
1497 Maybe<RegExp::Flags> Scanner::ScanRegExpFlags() { 1499 Maybe<RegExp::Flags> Scanner::ScanRegExpFlags() {
1498 DCHECK(next_.token == Token::REGEXP_LITERAL); 1500 DCHECK(next_.token == Token::REGEXP_LITERAL);
1499 1501
1500 // Scan regular expression flags. 1502 // Scan regular expression flags.
1501 int flags = 0; 1503 int flags = 0;
1502 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) { 1504 while (c0_ != kEndOfInput && unicode_cache_->IsIdentifierPart(c0_)) {
1503 RegExp::Flags flag = RegExp::kNone; 1505 RegExp::Flags flag = RegExp::kNone;
1504 switch (c0_) { 1506 switch (c0_) {
1505 case 'g': 1507 case 'g':
1506 flag = RegExp::kGlobal; 1508 flag = RegExp::kGlobal;
1507 break; 1509 break;
1508 case 'i': 1510 case 'i':
1509 flag = RegExp::kIgnoreCase; 1511 flag = RegExp::kIgnoreCase;
1510 break; 1512 break;
1511 case 'm': 1513 case 'm':
1512 flag = RegExp::kMultiline; 1514 flag = RegExp::kMultiline;
(...skipping 118 matching lines...) Expand 10 before | Expand all | Expand 10 after
1631 to->token = from->token; 1633 to->token = from->token;
1632 to->location = from->location; 1634 to->location = from->location;
1633 to->literal_chars->CopyFrom(from->literal_chars); 1635 to->literal_chars->CopyFrom(from->literal_chars);
1634 to->raw_literal_chars->CopyFrom(from->raw_literal_chars); 1636 to->raw_literal_chars->CopyFrom(from->raw_literal_chars);
1635 } 1637 }
1636 1638
1637 1639
1638 1640
1639 } // namespace internal 1641 } // namespace internal
1640 } // namespace v8 1642 } // namespace v8
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698