src/parsing/scanner.cc - Issue 2314663002: Rework scanner-character-streams.

Side by Side Diff: src/parsing/scanner.cc

Issue 2314663002: Rework scanner-character-streams. (Closed)

Patch Set: Niko's feedback and fix compile even harder Created 4 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Features shared by parsing and pre-parsing scanners.	5 // Features shared by parsing and pre-parsing scanners.

6	6

7 #include "src/parsing/scanner.h"	7 #include "src/parsing/scanner.h"

8	8

9 #include <stdint.h>	9 #include <stdint.h>

10	10

11 #include <cmath>	11 #include <cmath>

12	12

13 #include "src/ast/ast-value-factory.h"	13 #include "src/ast/ast-value-factory.h"

14 #include "src/char-predicates-inl.h"	14 #include "src/char-predicates-inl.h"

15 #include "src/conversions-inl.h"	15 #include "src/conversions-inl.h"

16 #include "src/list-inl.h"	16 #include "src/list-inl.h"

17 #include "src/parsing/duplicate-finder.h" // For Scanner::FindSymbol	17 #include "src/parsing/duplicate-finder.h" // For Scanner::FindSymbol

18	18

19 namespace v8 {	19 namespace v8 {

20 namespace internal {	20 namespace internal {

21	21

	22 const size_t Utf16CharacterStream::kNoBookmark =

	23 std::numeric_limits<size_t>::max();

	24

22 Handle<String> Scanner::LiteralBuffer::Internalize(Isolate* isolate) const {	25 Handle<String> Scanner::LiteralBuffer::Internalize(Isolate* isolate) const {

23 if (is_one_byte()) {	26 if (is_one_byte()) {

24 return isolate->factory()->InternalizeOneByteString(one_byte_literal());	27 return isolate->factory()->InternalizeOneByteString(one_byte_literal());

25 }	28 }

26 return isolate->factory()->InternalizeTwoByteString(two_byte_literal());	29 return isolate->factory()->InternalizeTwoByteString(two_byte_literal());

27 }	30 }

28	31

29	32

30 // Default implementation for streams that do not support bookmarks.

31 bool Utf16CharacterStream::SetBookmark() { return false; }

32 void Utf16CharacterStream::ResetToBookmark() { UNREACHABLE(); }

33

34	33

35 // ----------------------------------------------------------------------------	34 // ----------------------------------------------------------------------------

36 // Scanner	35 // Scanner

37	36

38 Scanner::Scanner(UnicodeCache* unicode_cache)	37 Scanner::Scanner(UnicodeCache* unicode_cache)

39 : unicode_cache_(unicode_cache),	38 : unicode_cache_(unicode_cache),

40 bookmark_c0_(kNoBookmark),	39 bookmark_c0_(kNoBookmark),

41 octal_pos_(Location::invalid()),	40 octal_pos_(Location::invalid()),

42 decimal_with_leading_zero_pos_(Location::invalid()),	41 decimal_with_leading_zero_pos_(Location::invalid()),

43 found_html_comment_(false) {	42 found_html_comment_(false) {

(...skipping 261 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
305 return c == 0xFFFE;	304 return c == 0xFFFE;

306 }	305 }

307	306

308	307

309 bool Scanner::SkipWhiteSpace() {	308 bool Scanner::SkipWhiteSpace() {

310 int start_position = source_pos();	309 int start_position = source_pos();

311	310

312 while (true) {	311 while (true) {

313 while (true) {	312 while (true) {

314 // The unicode cache accepts unsigned inputs.	313 // The unicode cache accepts unsigned inputs.

315 if (c0_ < 0) break;	314 if (c0_ == kEndOfInput) break;

316 // Advance as long as character is a WhiteSpace or LineTerminator.	315 // Advance as long as character is a WhiteSpace or LineTerminator.

317 // Remember if the latter is the case.	316 // Remember if the latter is the case.

318 if (unicode_cache_->IsLineTerminator(c0_)) {	317 if (unicode_cache_->IsLineTerminator(c0_)) {

319 has_line_terminator_before_next_ = true;	318 has_line_terminator_before_next_ = true;

320 } else if (!unicode_cache_->IsWhiteSpace(c0_) &&	319 } else if (!unicode_cache_->IsWhiteSpace(c0_) &&

321 !IsLittleEndianByteOrderMark(c0_)) {	320 !IsLittleEndianByteOrderMark(c0_)) {

322 break;	321 break;

323 }	322 }

324 Advance();	323 Advance();

325 }	324 }

(...skipping 23 matching lines...) Expand all Loading...
349	348

350	349

351 Token::Value Scanner::SkipSingleLineComment() {	350 Token::Value Scanner::SkipSingleLineComment() {

352 Advance();	351 Advance();

353	352

354 // The line terminator at the end of the line is not considered	353 // The line terminator at the end of the line is not considered

355 // to be part of the single-line comment; it is recognized	354 // to be part of the single-line comment; it is recognized

356 // separately by the lexical grammar and becomes part of the	355 // separately by the lexical grammar and becomes part of the

357 // stream of input elements for the syntactic grammar (see	356 // stream of input elements for the syntactic grammar (see

358 // ECMA-262, section 7.4).	357 // ECMA-262, section 7.4).

359 while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) {	358 while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) {

360 Advance();	359 Advance();

361 }	360 }

362	361

363 return Token::WHITESPACE;	362 return Token::WHITESPACE;

364 }	363 }

365	364

366	365

367 Token::Value Scanner::SkipSourceURLComment() {	366 Token::Value Scanner::SkipSourceURLComment() {

368 TryToParseSourceURLComment();	367 TryToParseSourceURLComment();

369 while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) {	368 while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) {

370 Advance();	369 Advance();

371 }	370 }

372	371

373 return Token::WHITESPACE;	372 return Token::WHITESPACE;

374 }	373 }

375	374

376	375

377 void Scanner::TryToParseSourceURLComment() {	376 void Scanner::TryToParseSourceURLComment() {

378 // Magic comments are of the form: //[#@]\s<name>=\s<value>\s.* and this	377 // Magic comments are of the form: //[#@]\s<name>=\s<value>\s.* and this

379 // function will just return if it cannot parse a magic comment.	378 // function will just return if it cannot parse a magic comment.

380 if (c0_ < 0 \|\| !unicode_cache_->IsWhiteSpace(c0_)) return;	379 if (c0_ == kEndOfInput \|\| !unicode_cache_->IsWhiteSpace(c0_)) return;

381 Advance();	380 Advance();

382 LiteralBuffer name;	381 LiteralBuffer name;

383 while (c0_ >= 0 && !unicode_cache_->IsWhiteSpaceOrLineTerminator(c0_) &&	382 while (c0_ != kEndOfInput &&

384 c0_ != '=') {	383 !unicode_cache_->IsWhiteSpaceOrLineTerminator(c0_) && c0_ != '=') {

385 name.AddChar(c0_);	384 name.AddChar(c0_);

386 Advance();	385 Advance();

387 }	386 }

388 if (!name.is_one_byte()) return;	387 if (!name.is_one_byte()) return;

389 Vector<const uint8_t> name_literal = name.one_byte_literal();	388 Vector<const uint8_t> name_literal = name.one_byte_literal();

390 LiteralBuffer* value;	389 LiteralBuffer* value;

391 if (name_literal == STATIC_CHAR_VECTOR("sourceURL")) {	390 if (name_literal == STATIC_CHAR_VECTOR("sourceURL")) {

392 value = &source_url_;	391 value = &source_url_;

393 } else if (name_literal == STATIC_CHAR_VECTOR("sourceMappingURL")) {	392 } else if (name_literal == STATIC_CHAR_VECTOR("sourceMappingURL")) {

394 value = &source_mapping_url_;	393 value = &source_mapping_url_;

395 } else {	394 } else {

396 return;	395 return;

397 }	396 }

398 if (c0_ != '=')	397 if (c0_ != '=')

399 return;	398 return;

400 Advance();	399 Advance();

401 value->Reset();	400 value->Reset();

402 while (c0_ >= 0 && unicode_cache_->IsWhiteSpace(c0_)) {	401 while (c0_ != kEndOfInput && unicode_cache_->IsWhiteSpace(c0_)) {

403 Advance();	402 Advance();

404 }	403 }

405 while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) {	404 while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) {

406 // Disallowed characters.	405 // Disallowed characters.

407 if (c0_ == '"' \|\| c0_ == '\'') {	406 if (c0_ == '"' \|\| c0_ == '\'') {

408 value->Reset();	407 value->Reset();

409 return;	408 return;

410 }	409 }

411 if (unicode_cache_->IsWhiteSpace(c0_)) {	410 if (unicode_cache_->IsWhiteSpace(c0_)) {

412 break;	411 break;

413 }	412 }

414 value->AddChar(c0_);	413 value->AddChar(c0_);

415 Advance();	414 Advance();

416 }	415 }

417 // Allow whitespace at the end.	416 // Allow whitespace at the end.

418 while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) {	417 while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) {

419 if (!unicode_cache_->IsWhiteSpace(c0_)) {	418 if (!unicode_cache_->IsWhiteSpace(c0_)) {

420 value->Reset();	419 value->Reset();

421 break;	420 break;

422 }	421 }

423 Advance();	422 Advance();

424 }	423 }

425 }	424 }

426	425

427	426

428 Token::Value Scanner::SkipMultiLineComment() {	427 Token::Value Scanner::SkipMultiLineComment() {

429 DCHECK(c0_ == '*');	428 DCHECK(c0_ == '*');

430 Advance();	429 Advance();

431	430

432 while (c0_ >= 0) {	431 while (c0_ != kEndOfInput) {

433 uc32 ch = c0_;	432 uc32 ch = c0_;

434 Advance();	433 Advance();

435 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(ch)) {	434 if (c0_ != kEndOfInput && unicode_cache_->IsLineTerminator(ch)) {

436 // Following ECMA-262, section 7.4, a comment containing	435 // Following ECMA-262, section 7.4, a comment containing

437 // a newline will make the comment count as a line-terminator.	436 // a newline will make the comment count as a line-terminator.

438 has_multiline_comment_before_next_ = true;	437 has_multiline_comment_before_next_ = true;

439 }	438 }

440 // If we have reached the end of the multi-line comment, we	439 // If we have reached the end of the multi-line comment, we

441 // consume the '/' and insert a whitespace. This way all	440 // consume the '/' and insert a whitespace. This way all

442 // multi-line comments are treated as whitespace.	441 // multi-line comments are treated as whitespace.

443 if (ch == '*' && c0_ == '/') {	442 if (ch == '*' && c0_ == '/') {

444 c0_ = ' ';	443 c0_ = ' ';

445 return Token::WHITESPACE;	444 return Token::WHITESPACE;

(...skipping 263 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
709	708

710 case '~':	709 case '~':

711 token = Select(Token::BIT_NOT);	710 token = Select(Token::BIT_NOT);

712 break;	711 break;

713	712

714 case '`':	713 case '`':

715 token = ScanTemplateStart();	714 token = ScanTemplateStart();

716 break;	715 break;

717	716

718 default:	717 default:

719 if (c0_ < 0) {	718 if (c0_ == kEndOfInput) {

720 token = Token::EOS;	719 token = Token::EOS;

721 } else if (unicode_cache_->IsIdentifierStart(c0_)) {	720 } else if (unicode_cache_->IsIdentifierStart(c0_)) {

722 token = ScanIdentifierOrKeyword();	721 token = ScanIdentifierOrKeyword();

723 } else if (IsDecimalDigit(c0_)) {	722 } else if (IsDecimalDigit(c0_)) {

724 token = ScanNumber(false);	723 token = ScanNumber(false);

725 } else if (SkipWhiteSpace()) {	724 } else if (SkipWhiteSpace()) {

726 token = Token::WHITESPACE;	725 token = Token::WHITESPACE;

727 } else {	726 } else {

728 token = Select(Token::ILLEGAL);	727 token = Select(Token::ILLEGAL);

729 }	728 }

(...skipping 71 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
801 Scan();	800 Scan();

802 }	801 }

803	802

804	803

805 template <bool capture_raw, bool in_template_literal>	804 template <bool capture_raw, bool in_template_literal>

806 bool Scanner::ScanEscape() {	805 bool Scanner::ScanEscape() {

807 uc32 c = c0_;	806 uc32 c = c0_;

808 Advance<capture_raw>();	807 Advance<capture_raw>();

809	808

810 // Skip escaped newlines.	809 // Skip escaped newlines.

811 if (!in_template_literal && c0_ >= 0 && unicode_cache_->IsLineTerminator(c)) {	810 if (!in_template_literal && c0_ != kEndOfInput &&

	811 unicode_cache_->IsLineTerminator(c)) {

812 // Allow CR+LF newlines in multiline string literals.	812 // Allow CR+LF newlines in multiline string literals.

813 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance<capture_raw>();	813 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance<capture_raw>();

814 // Allow LF+CR newlines in multiline string literals.	814 // Allow LF+CR newlines in multiline string literals.

815 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance<capture_raw>();	815 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance<capture_raw>();

816 return true;	816 return true;

817 }	817 }

818	818

819 switch (c) {	819 switch (c) {

820 case '\'': // fall through	820 case '\'': // fall through

821 case '"' : // fall through	821 case '"' : // fall through

(...skipping 65 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
887 Token::Value Scanner::ScanString() {	887 Token::Value Scanner::ScanString() {

888 uc32 quote = c0_;	888 uc32 quote = c0_;

889 Advance<false, false>(); // consume quote	889 Advance<false, false>(); // consume quote

890	890

891 LiteralScope literal(this);	891 LiteralScope literal(this);

892 while (true) {	892 while (true) {

893 if (c0_ > kMaxAscii) {	893 if (c0_ > kMaxAscii) {

894 HandleLeadSurrogate();	894 HandleLeadSurrogate();

895 break;	895 break;

896 }	896 }

897 if (c0_ < 0 \|\| c0_ == '\n' \|\| c0_ == '\r') return Token::ILLEGAL;	897 if (c0_ == kEndOfInput \|\| c0_ == '\n' \|\| c0_ == '\r') return Token::ILLEGAL;

898 if (c0_ == quote) {	898 if (c0_ == quote) {

899 literal.Complete();	899 literal.Complete();

900 Advance<false, false>();	900 Advance<false, false>();

901 return Token::STRING;	901 return Token::STRING;

902 }	902 }

903 char c = static_cast<char>(c0_);	903 char c = static_cast<char>(c0_);

904 if (c == '\\') break;	904 if (c == '\\') break;

905 Advance<false, false>();	905 Advance<false, false>();

906 AddLiteralChar(c);	906 AddLiteralChar(c);

907 }	907 }

908	908

909 while (c0_ != quote && c0_ >= 0	909 while (c0_ != quote && c0_ != kEndOfInput &&

910 && !unicode_cache_->IsLineTerminator(c0_)) {	910 !unicode_cache_->IsLineTerminator(c0_)) {

911 uc32 c = c0_;	911 uc32 c = c0_;

912 Advance();	912 Advance();

913 if (c == '\\') {	913 if (c == '\\') {

914 if (c0_ < 0 \|\| !ScanEscape<false, false>()) {	914 if (c0_ == kEndOfInput \|\| !ScanEscape<false, false>()) {

915 return Token::ILLEGAL;	915 return Token::ILLEGAL;

916 }	916 }

917 } else {	917 } else {

918 AddLiteralChar(c);	918 AddLiteralChar(c);

919 }	919 }

920 }	920 }

921 if (c0_ != quote) return Token::ILLEGAL;	921 if (c0_ != quote) return Token::ILLEGAL;

922 literal.Complete();	922 literal.Complete();

923	923

924 Advance(); // consume quote	924 Advance(); // consume quote

(...skipping 25 matching lines...) Expand all Loading...
950 Advance<capture_raw>();	950 Advance<capture_raw>();

951 if (c == '`') {	951 if (c == '`') {

952 result = Token::TEMPLATE_TAIL;	952 result = Token::TEMPLATE_TAIL;

953 ReduceRawLiteralLength(1);	953 ReduceRawLiteralLength(1);

954 break;	954 break;

955 } else if (c == '$' && c0_ == '{') {	955 } else if (c == '$' && c0_ == '{') {

956 Advance<capture_raw>(); // Consume '{'	956 Advance<capture_raw>(); // Consume '{'

957 ReduceRawLiteralLength(2);	957 ReduceRawLiteralLength(2);

958 break;	958 break;

959 } else if (c == '\\') {	959 } else if (c == '\\') {

960 if (c0_ > 0 && unicode_cache_->IsLineTerminator(c0_)) {	960 if (c0_ != kEndOfInput && unicode_cache_->IsLineTerminator(c0_)) {

961 // The TV of LineContinuation :: \ LineTerminatorSequence is the empty	961 // The TV of LineContinuation :: \ LineTerminatorSequence is the empty

962 // code unit sequence.	962 // code unit sequence.

963 uc32 lastChar = c0_;	963 uc32 lastChar = c0_;

964 Advance<capture_raw>();	964 Advance<capture_raw>();

965 if (lastChar == '\r') {	965 if (lastChar == '\r') {

966 ReduceRawLiteralLength(1); // Remove \r	966 ReduceRawLiteralLength(1); // Remove \r

967 if (c0_ == '\n') {	967 if (c0_ == '\n') {

968 Advance<capture_raw>(); // Adds \n	968 Advance<capture_raw>(); // Adds \n

969 } else {	969 } else {

970 AddRawLiteralChar('\n');	970 AddRawLiteralChar('\n');

(...skipping 177 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1148 return Token::ILLEGAL;	1148 return Token::ILLEGAL;

1149 }	1149 }

1150 ScanDecimalDigits();	1150 ScanDecimalDigits();

1151 }	1151 }

1152	1152

1153 // The source character immediately following a numeric literal must	1153 // The source character immediately following a numeric literal must

1154 // not be an identifier start or a decimal digit; see ECMA-262	1154 // not be an identifier start or a decimal digit; see ECMA-262

1155 // section 7.8.3, page 17 (note that we read only one decimal digit	1155 // section 7.8.3, page 17 (note that we read only one decimal digit

1156 // if the value is 0).	1156 // if the value is 0).

1157 if (IsDecimalDigit(c0_) \|\|	1157 if (IsDecimalDigit(c0_) \|\|

1158 (c0_ >= 0 && unicode_cache_->IsIdentifierStart(c0_)))	1158 (c0_ != kEndOfInput && unicode_cache_->IsIdentifierStart(c0_)))

1159 return Token::ILLEGAL;	1159 return Token::ILLEGAL;

1160	1160

1161 literal.Complete();	1161 literal.Complete();

1162	1162

1163 if (kind == DECIMAL_WITH_LEADING_ZERO)	1163 if (kind == DECIMAL_WITH_LEADING_ZERO)

1164 decimal_with_leading_zero_pos_ = Location(start_pos, source_pos());	1164 decimal_with_leading_zero_pos_ = Location(start_pos, source_pos());

1165 return Token::NUMBER;	1165 return Token::NUMBER;

1166 }	1166 }

1167	1167

1168	1168

(...skipping 206 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1375 }	1375 }

1376 AddLiteralChar(c);	1376 AddLiteralChar(c);

1377 return ScanIdentifierSuffix(&literal, true);	1377 return ScanIdentifierSuffix(&literal, true);

1378 } else {	1378 } else {

1379 uc32 first_char = c0_;	1379 uc32 first_char = c0_;

1380 Advance();	1380 Advance();

1381 AddLiteralChar(first_char);	1381 AddLiteralChar(first_char);

1382 }	1382 }

1383	1383

1384 // Scan the rest of the identifier characters.	1384 // Scan the rest of the identifier characters.

1385 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) {	1385 while (c0_ != kEndOfInput && unicode_cache_->IsIdentifierPart(c0_)) {

1386 if (c0_ != '\\') {	1386 if (c0_ != '\\') {

1387 uc32 next_char = c0_;	1387 uc32 next_char = c0_;

1388 Advance();	1388 Advance();

1389 AddLiteralChar(next_char);	1389 AddLiteralChar(next_char);

1390 continue;	1390 continue;

1391 }	1391 }

1392 // Fallthrough if no longer able to complete keyword.	1392 // Fallthrough if no longer able to complete keyword.

1393 return ScanIdentifierSuffix(&literal, false);	1393 return ScanIdentifierSuffix(&literal, false);

1394 }	1394 }

1395	1395

1396 if (next_.literal_chars->is_one_byte()) {	1396 if (next_.literal_chars->is_one_byte()) {

1397 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();	1397 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();

1398 Token::Value token =	1398 Token::Value token =

1399 KeywordOrIdentifierToken(chars.start(), chars.length());	1399 KeywordOrIdentifierToken(chars.start(), chars.length());

1400 if (token == Token::IDENTIFIER) literal.Complete();	1400 if (token == Token::IDENTIFIER) literal.Complete();

1401 return token;	1401 return token;

1402 }	1402 }

1403 literal.Complete();	1403 literal.Complete();

1404 return Token::IDENTIFIER;	1404 return Token::IDENTIFIER;

1405 }	1405 }

1406	1406

1407	1407

1408 Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal,	1408 Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal,

1409 bool escaped) {	1409 bool escaped) {

1410 // Scan the rest of the identifier characters.	1410 // Scan the rest of the identifier characters.

1411 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) {	1411 while (c0_ != kEndOfInput && unicode_cache_->IsIdentifierPart(c0_)) {

1412 if (c0_ == '\\') {	1412 if (c0_ == '\\') {

1413 uc32 c = ScanIdentifierUnicodeEscape();	1413 uc32 c = ScanIdentifierUnicodeEscape();

1414 escaped = true;	1414 escaped = true;

1415 // Only allow legal identifier part characters.	1415 // Only allow legal identifier part characters.

1416 if (c < 0 \|\|	1416 if (c < 0 \|\|

1417 c == '\\' \|\|	1417 c == '\\' \|\|

1418 !unicode_cache_->IsIdentifierPart(c)) {	1418 !unicode_cache_->IsIdentifierPart(c)) {

1419 return Token::ILLEGAL;	1419 return Token::ILLEGAL;

1420 }	1420 }

1421 AddLiteralChar(c);	1421 AddLiteralChar(c);

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1458	1458

1459 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,	1459 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,

1460 // the scanner should pass uninterpreted bodies to the RegExp	1460 // the scanner should pass uninterpreted bodies to the RegExp

1461 // constructor.	1461 // constructor.

1462 LiteralScope literal(this);	1462 LiteralScope literal(this);

1463 if (seen_equal) {	1463 if (seen_equal) {

1464 AddLiteralChar('=');	1464 AddLiteralChar('=');

1465 }	1465 }

1466	1466

1467 while (c0_ != '/' \|\| in_character_class) {	1467 while (c0_ != '/' \|\| in_character_class) {

1468 if (c0_ < 0 \|\| unicode_cache_->IsLineTerminator(c0_)) return false;	1468 if (c0_ == kEndOfInput \|\| unicode_cache_->IsLineTerminator(c0_))

	1469 return false;

1469 if (c0_ == '\\') { // Escape sequence.	1470 if (c0_ == '\\') { // Escape sequence.

1470 AddLiteralCharAdvance();	1471 AddLiteralCharAdvance();

1471 if (c0_ < 0 \|\| unicode_cache_->IsLineTerminator(c0_)) return false;	1472 if (c0_ == kEndOfInput \|\| unicode_cache_->IsLineTerminator(c0_))

	1473 return false;

1472 AddLiteralCharAdvance();	1474 AddLiteralCharAdvance();

1473 // If the escape allows more characters, i.e., \x??, \u????, or \c?,	1475 // If the escape allows more characters, i.e., \x??, \u????, or \c?,

1474 // only "safe" characters are allowed (letters, digits, underscore),	1476 // only "safe" characters are allowed (letters, digits, underscore),

1475 // otherwise the escape isn't valid and the invalid character has	1477 // otherwise the escape isn't valid and the invalid character has

1476 // its normal meaning. I.e., we can just continue scanning without	1478 // its normal meaning. I.e., we can just continue scanning without

1477 // worrying whether the following characters are part of the escape	1479 // worrying whether the following characters are part of the escape

1478 // or not, since any '/', '\\' or '[' is guaranteed to not be part	1480 // or not, since any '/', '\\' or '[' is guaranteed to not be part

1479 // of the escape sequence.	1481 // of the escape sequence.

1480	1482

1481 // TODO(896): At some point, parse RegExps more throughly to capture	1483 // TODO(896): At some point, parse RegExps more throughly to capture

(...skipping 10 matching lines...) Expand all Loading...
1492 next_.token = Token::REGEXP_LITERAL;	1494 next_.token = Token::REGEXP_LITERAL;

1493 return true;	1495 return true;

1494 }	1496 }

1495	1497

1496	1498

1497 Maybe<RegExp::Flags> Scanner::ScanRegExpFlags() {	1499 Maybe<RegExp::Flags> Scanner::ScanRegExpFlags() {

1498 DCHECK(next_.token == Token::REGEXP_LITERAL);	1500 DCHECK(next_.token == Token::REGEXP_LITERAL);

1499	1501

1500 // Scan regular expression flags.	1502 // Scan regular expression flags.

1501 int flags = 0;	1503 int flags = 0;

1502 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) {	1504 while (c0_ != kEndOfInput && unicode_cache_->IsIdentifierPart(c0_)) {

1503 RegExp::Flags flag = RegExp::kNone;	1505 RegExp::Flags flag = RegExp::kNone;

1504 switch (c0_) {	1506 switch (c0_) {

1505 case 'g':	1507 case 'g':

1506 flag = RegExp::kGlobal;	1508 flag = RegExp::kGlobal;

1507 break;	1509 break;

1508 case 'i':	1510 case 'i':

1509 flag = RegExp::kIgnoreCase;	1511 flag = RegExp::kIgnoreCase;

1510 break;	1512 break;

1511 case 'm':	1513 case 'm':

1512 flag = RegExp::kMultiline;	1514 flag = RegExp::kMultiline;

(...skipping 118 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1631 to->token = from->token;	1633 to->token = from->token;

1632 to->location = from->location;	1634 to->location = from->location;

1633 to->literal_chars->CopyFrom(from->literal_chars);	1635 to->literal_chars->CopyFrom(from->literal_chars);

1634 to->raw_literal_chars->CopyFrom(from->raw_literal_chars);	1636 to->raw_literal_chars->CopyFrom(from->raw_literal_chars);

1635 }	1637 }

1636	1638

1637	1639

1638	1640

1639 } // namespace internal	1641 } // namespace internal

1640 } // namespace v8	1642 } // namespace v8

OLD	NEW

« no previous file with comments | « src/parsing/scanner.h ('k') | src/parsing/scanner-character-streams.h » ('j') | src/parsing/scanner-character-streams.cc » ('J')