src/parsing/scanner.cc - Issue 2314663002: Rework scanner-character-streams.

Side by Side Diff: src/parsing/scanner.cc

Issue 2314663002: Rework scanner-character-streams. (Closed)

Patch Set: Marja's feedback, round 1. Created 4 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Features shared by parsing and pre-parsing scanners.	5 // Features shared by parsing and pre-parsing scanners.

6	6

7 #include "src/parsing/scanner.h"	7 #include "src/parsing/scanner.h"

8	8

9 #include <stdint.h>	9 #include <stdint.h>

10	10

11 #include <cmath>	11 #include <cmath>

12	12

13 #include "src/ast/ast-value-factory.h"	13 #include "src/ast/ast-value-factory.h"

14 #include "src/char-predicates-inl.h"	14 #include "src/char-predicates-inl.h"

15 #include "src/conversions-inl.h"	15 #include "src/conversions-inl.h"

16 #include "src/list-inl.h"	16 #include "src/list-inl.h"

17 #include "src/parsing/duplicate-finder.h" // For Scanner::FindSymbol	17 #include "src/parsing/duplicate-finder.h" // For Scanner::FindSymbol

18	18

19 namespace v8 {	19 namespace v8 {

20 namespace internal {	20 namespace internal {

21	21

22 Handle<String> Scanner::LiteralBuffer::Internalize(Isolate* isolate) const {	22 Handle<String> Scanner::LiteralBuffer::Internalize(Isolate* isolate) const {

23 if (is_one_byte()) {	23 if (is_one_byte()) {

24 return isolate->factory()->InternalizeOneByteString(one_byte_literal());	24 return isolate->factory()->InternalizeOneByteString(one_byte_literal());

25 }	25 }

26 return isolate->factory()->InternalizeTwoByteString(two_byte_literal());	26 return isolate->factory()->InternalizeTwoByteString(two_byte_literal());

27 }	27 }

28	28

29	29

30 // Default implementation for streams that do not support bookmarks.

31 bool Utf16CharacterStream::SetBookmark() { return false; }

32 void Utf16CharacterStream::ResetToBookmark() { UNREACHABLE(); }

33	30

34	31

35 // ----------------------------------------------------------------------------	32 // ----------------------------------------------------------------------------

36 // Scanner	33 // Scanner

37	34

38 Scanner::Scanner(UnicodeCache* unicode_cache)	35 Scanner::Scanner(UnicodeCache* unicode_cache)

39 : unicode_cache_(unicode_cache),	36 : unicode_cache_(unicode_cache),

40 bookmark_c0_(kNoBookmark),	37 bookmark_c0_(kNoBookmark),

41 octal_pos_(Location::invalid()),	38 octal_pos_(Location::invalid()),

42 decimal_with_leading_zero_pos_(Location::invalid()),	39 decimal_with_leading_zero_pos_(Location::invalid()),

(...skipping 262 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
305 return c == 0xFFFE;	302 return c == 0xFFFE;

306 }	303 }

307	304

308	305

309 bool Scanner::SkipWhiteSpace() {	306 bool Scanner::SkipWhiteSpace() {

310 int start_position = source_pos();	307 int start_position = source_pos();

311	308

312 while (true) {	309 while (true) {

313 while (true) {	310 while (true) {

314 // The unicode cache accepts unsigned inputs.	311 // The unicode cache accepts unsigned inputs.

315 if (c0_ < 0) break;	312 if (c0_ == kEndOfInput) break;

316 // Advance as long as character is a WhiteSpace or LineTerminator.	313 // Advance as long as character is a WhiteSpace or LineTerminator.

317 // Remember if the latter is the case.	314 // Remember if the latter is the case.

318 if (unicode_cache_->IsLineTerminator(c0_)) {	315 if (unicode_cache_->IsLineTerminator(c0_)) {

319 has_line_terminator_before_next_ = true;	316 has_line_terminator_before_next_ = true;

320 } else if (!unicode_cache_->IsWhiteSpace(c0_) &&	317 } else if (!unicode_cache_->IsWhiteSpace(c0_) &&

321 !IsLittleEndianByteOrderMark(c0_)) {	318 !IsLittleEndianByteOrderMark(c0_)) {

322 break;	319 break;

323 }	320 }

324 Advance();	321 Advance();

325 }	322 }

(...skipping 23 matching lines...) Expand all Loading...
349	346

350	347

351 Token::Value Scanner::SkipSingleLineComment() {	348 Token::Value Scanner::SkipSingleLineComment() {

352 Advance();	349 Advance();

353	350

354 // The line terminator at the end of the line is not considered	351 // The line terminator at the end of the line is not considered

355 // to be part of the single-line comment; it is recognized	352 // to be part of the single-line comment; it is recognized

356 // separately by the lexical grammar and becomes part of the	353 // separately by the lexical grammar and becomes part of the

357 // stream of input elements for the syntactic grammar (see	354 // stream of input elements for the syntactic grammar (see

358 // ECMA-262, section 7.4).	355 // ECMA-262, section 7.4).

359 while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) {	356 while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) {

360 Advance();	357 Advance();

361 }	358 }

362	359

363 return Token::WHITESPACE;	360 return Token::WHITESPACE;

364 }	361 }

365	362

366	363

367 Token::Value Scanner::SkipSourceURLComment() {	364 Token::Value Scanner::SkipSourceURLComment() {

368 TryToParseSourceURLComment();	365 TryToParseSourceURLComment();

369 while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) {	366 while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) {

370 Advance();	367 Advance();

371 }	368 }

372	369

373 return Token::WHITESPACE;	370 return Token::WHITESPACE;

374 }	371 }

375	372

376	373

377 void Scanner::TryToParseSourceURLComment() {	374 void Scanner::TryToParseSourceURLComment() {

378 // Magic comments are of the form: //[#@]\s<name>=\s<value>\s.* and this	375 // Magic comments are of the form: //[#@]\s<name>=\s<value>\s.* and this

379 // function will just return if it cannot parse a magic comment.	376 // function will just return if it cannot parse a magic comment.

380 if (c0_ < 0 \|\| !unicode_cache_->IsWhiteSpace(c0_)) return;	377 if (c0_ == kEndOfInput \|\| !unicode_cache_->IsWhiteSpace(c0_)) return;

381 Advance();	378 Advance();

382 LiteralBuffer name;	379 LiteralBuffer name;

383 while (c0_ >= 0 && !unicode_cache_->IsWhiteSpaceOrLineTerminator(c0_) &&	380 while (c0_ != kEndOfInput &&

384 c0_ != '=') {	381 !unicode_cache_->IsWhiteSpaceOrLineTerminator(c0_) && c0_ != '=') {

385 name.AddChar(c0_);	382 name.AddChar(c0_);

386 Advance();	383 Advance();

387 }	384 }

388 if (!name.is_one_byte()) return;	385 if (!name.is_one_byte()) return;

389 Vector<const uint8_t> name_literal = name.one_byte_literal();	386 Vector<const uint8_t> name_literal = name.one_byte_literal();

390 LiteralBuffer* value;	387 LiteralBuffer* value;

391 if (name_literal == STATIC_CHAR_VECTOR("sourceURL")) {	388 if (name_literal == STATIC_CHAR_VECTOR("sourceURL")) {

392 value = &source_url_;	389 value = &source_url_;

393 } else if (name_literal == STATIC_CHAR_VECTOR("sourceMappingURL")) {	390 } else if (name_literal == STATIC_CHAR_VECTOR("sourceMappingURL")) {

394 value = &source_mapping_url_;	391 value = &source_mapping_url_;

395 } else {	392 } else {

396 return;	393 return;

397 }	394 }

398 if (c0_ != '=')	395 if (c0_ != '=')

399 return;	396 return;

400 Advance();	397 Advance();

401 value->Reset();	398 value->Reset();

402 while (c0_ >= 0 && unicode_cache_->IsWhiteSpace(c0_)) {	399 while (c0_ != kEndOfInput && unicode_cache_->IsWhiteSpace(c0_)) {

403 Advance();	400 Advance();

404 }	401 }

405 while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) {	402 while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) {

406 // Disallowed characters.	403 // Disallowed characters.

407 if (c0_ == '"' \|\| c0_ == '\'') {	404 if (c0_ == '"' \|\| c0_ == '\'') {

408 value->Reset();	405 value->Reset();

409 return;	406 return;

410 }	407 }

411 if (unicode_cache_->IsWhiteSpace(c0_)) {	408 if (unicode_cache_->IsWhiteSpace(c0_)) {

412 break;	409 break;

413 }	410 }

414 value->AddChar(c0_);	411 value->AddChar(c0_);

415 Advance();	412 Advance();

416 }	413 }

417 // Allow whitespace at the end.	414 // Allow whitespace at the end.

418 while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) {	415 while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) {

419 if (!unicode_cache_->IsWhiteSpace(c0_)) {	416 if (!unicode_cache_->IsWhiteSpace(c0_)) {

420 value->Reset();	417 value->Reset();

421 break;	418 break;

422 }	419 }

423 Advance();	420 Advance();

424 }	421 }

425 }	422 }

426	423

427	424

428 Token::Value Scanner::SkipMultiLineComment() {	425 Token::Value Scanner::SkipMultiLineComment() {

429 DCHECK(c0_ == '*');	426 DCHECK(c0_ == '*');

430 Advance();	427 Advance();

431	428

432 while (c0_ >= 0) {	429 while (c0_ != kEndOfInput) {

433 uc32 ch = c0_;	430 uc32 ch = c0_;

434 Advance();	431 Advance();

435 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(ch)) {	432 if (c0_ != kEndOfInput && unicode_cache_->IsLineTerminator(ch)) {

436 // Following ECMA-262, section 7.4, a comment containing	433 // Following ECMA-262, section 7.4, a comment containing

437 // a newline will make the comment count as a line-terminator.	434 // a newline will make the comment count as a line-terminator.

438 has_multiline_comment_before_next_ = true;	435 has_multiline_comment_before_next_ = true;

439 }	436 }

440 // If we have reached the end of the multi-line comment, we	437 // If we have reached the end of the multi-line comment, we

441 // consume the '/' and insert a whitespace. This way all	438 // consume the '/' and insert a whitespace. This way all

442 // multi-line comments are treated as whitespace.	439 // multi-line comments are treated as whitespace.

443 if (ch == '*' && c0_ == '/') {	440 if (ch == '*' && c0_ == '/') {

444 c0_ = ' ';	441 c0_ = ' ';

445 return Token::WHITESPACE;	442 return Token::WHITESPACE;

(...skipping 263 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
709	706

710 case '~':	707 case '~':

711 token = Select(Token::BIT_NOT);	708 token = Select(Token::BIT_NOT);

712 break;	709 break;

713	710

714 case '`':	711 case '`':

715 token = ScanTemplateStart();	712 token = ScanTemplateStart();

716 break;	713 break;

717	714

718 default:	715 default:

719 if (c0_ < 0) {	716 if (c0_ == kEndOfInput) {

720 token = Token::EOS;	717 token = Token::EOS;

721 } else if (unicode_cache_->IsIdentifierStart(c0_)) {	718 } else if (unicode_cache_->IsIdentifierStart(c0_)) {

722 token = ScanIdentifierOrKeyword();	719 token = ScanIdentifierOrKeyword();

723 } else if (IsDecimalDigit(c0_)) {	720 } else if (IsDecimalDigit(c0_)) {

724 token = ScanNumber(false);	721 token = ScanNumber(false);

725 } else if (SkipWhiteSpace()) {	722 } else if (SkipWhiteSpace()) {

726 token = Token::WHITESPACE;	723 token = Token::WHITESPACE;

727 } else {	724 } else {

728 token = Select(Token::ILLEGAL);	725 token = Select(Token::ILLEGAL);

729 }	726 }

(...skipping 71 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
801 Scan();	798 Scan();

802 }	799 }

803	800

804	801

805 template <bool capture_raw, bool in_template_literal>	802 template <bool capture_raw, bool in_template_literal>

806 bool Scanner::ScanEscape() {	803 bool Scanner::ScanEscape() {

807 uc32 c = c0_;	804 uc32 c = c0_;

808 Advance<capture_raw>();	805 Advance<capture_raw>();

809	806

810 // Skip escaped newlines.	807 // Skip escaped newlines.

811 if (!in_template_literal && c0_ >= 0 && unicode_cache_->IsLineTerminator(c)) {	808 if (!in_template_literal && c0_ != kEndOfInput &&

	809 unicode_cache_->IsLineTerminator(c)) {

812 // Allow CR+LF newlines in multiline string literals.	810 // Allow CR+LF newlines in multiline string literals.

813 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance<capture_raw>();	811 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance<capture_raw>();

814 // Allow LF+CR newlines in multiline string literals.	812 // Allow LF+CR newlines in multiline string literals.

815 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance<capture_raw>();	813 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance<capture_raw>();

816 return true;	814 return true;

817 }	815 }

818	816

819 switch (c) {	817 switch (c) {

820 case '\'': // fall through	818 case '\'': // fall through

821 case '"' : // fall through	819 case '"' : // fall through

(...skipping 65 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
887 Token::Value Scanner::ScanString() {	885 Token::Value Scanner::ScanString() {

888 uc32 quote = c0_;	886 uc32 quote = c0_;

889 Advance<false, false>(); // consume quote	887 Advance<false, false>(); // consume quote

890	888

891 LiteralScope literal(this);	889 LiteralScope literal(this);

892 while (true) {	890 while (true) {

893 if (c0_ > kMaxAscii) {	891 if (c0_ > kMaxAscii) {

894 HandleLeadSurrogate();	892 HandleLeadSurrogate();

895 break;	893 break;

896 }	894 }

897 if (c0_ < 0 \|\| c0_ == '\n' \|\| c0_ == '\r') return Token::ILLEGAL;	895 if (c0_ == kEndOfInput \|\| c0_ == '\n' \|\| c0_ == '\r') return Token::ILLEGAL;

898 if (c0_ == quote) {	896 if (c0_ == quote) {

899 literal.Complete();	897 literal.Complete();

900 Advance<false, false>();	898 Advance<false, false>();

901 return Token::STRING;	899 return Token::STRING;

902 }	900 }

903 char c = static_cast<char>(c0_);	901 char c = static_cast<char>(c0_);

904 if (c == '\\') break;	902 if (c == '\\') break;

905 Advance<false, false>();	903 Advance<false, false>();

906 AddLiteralChar(c);	904 AddLiteralChar(c);

907 }	905 }

908	906

909 while (c0_ != quote && c0_ >= 0	907 while (c0_ != quote && c0_ != kEndOfInput &&

910 && !unicode_cache_->IsLineTerminator(c0_)) {	908 !unicode_cache_->IsLineTerminator(c0_)) {

911 uc32 c = c0_;	909 uc32 c = c0_;

912 Advance();	910 Advance();

913 if (c == '\\') {	911 if (c == '\\') {

914 if (c0_ < 0 \|\| !ScanEscape<false, false>()) {	912 if (c0_ == kEndOfInput \|\| !ScanEscape<false, false>()) {

915 return Token::ILLEGAL;	913 return Token::ILLEGAL;

916 }	914 }

917 } else {	915 } else {

918 AddLiteralChar(c);	916 AddLiteralChar(c);

919 }	917 }

920 }	918 }

921 if (c0_ != quote) return Token::ILLEGAL;	919 if (c0_ != quote) return Token::ILLEGAL;

922 literal.Complete();	920 literal.Complete();

923	921

924 Advance(); // consume quote	922 Advance(); // consume quote

(...skipping 25 matching lines...) Expand all Loading...
950 Advance<capture_raw>();	948 Advance<capture_raw>();

951 if (c == '`') {	949 if (c == '`') {

952 result = Token::TEMPLATE_TAIL;	950 result = Token::TEMPLATE_TAIL;

953 ReduceRawLiteralLength(1);	951 ReduceRawLiteralLength(1);

954 break;	952 break;

955 } else if (c == '$' && c0_ == '{') {	953 } else if (c == '$' && c0_ == '{') {

956 Advance<capture_raw>(); // Consume '{'	954 Advance<capture_raw>(); // Consume '{'

957 ReduceRawLiteralLength(2);	955 ReduceRawLiteralLength(2);

958 break;	956 break;

959 } else if (c == '\\') {	957 } else if (c == '\\') {

960 if (c0_ > 0 && unicode_cache_->IsLineTerminator(c0_)) {	958 if (c0_ != kEndOfInput && unicode_cache_->IsLineTerminator(c0_)) {

961 // The TV of LineContinuation :: \ LineTerminatorSequence is the empty	959 // The TV of LineContinuation :: \ LineTerminatorSequence is the empty

962 // code unit sequence.	960 // code unit sequence.

963 uc32 lastChar = c0_;	961 uc32 lastChar = c0_;

964 Advance<capture_raw>();	962 Advance<capture_raw>();

965 if (lastChar == '\r') {	963 if (lastChar == '\r') {

966 ReduceRawLiteralLength(1); // Remove \r	964 ReduceRawLiteralLength(1); // Remove \r

967 if (c0_ == '\n') {	965 if (c0_ == '\n') {

968 Advance<capture_raw>(); // Adds \n	966 Advance<capture_raw>(); // Adds \n

969 } else {	967 } else {

970 AddRawLiteralChar('\n');	968 AddRawLiteralChar('\n');

(...skipping 177 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1148 return Token::ILLEGAL;	1146 return Token::ILLEGAL;

1149 }	1147 }

1150 ScanDecimalDigits();	1148 ScanDecimalDigits();

1151 }	1149 }

1152	1150

1153 // The source character immediately following a numeric literal must	1151 // The source character immediately following a numeric literal must

1154 // not be an identifier start or a decimal digit; see ECMA-262	1152 // not be an identifier start or a decimal digit; see ECMA-262

1155 // section 7.8.3, page 17 (note that we read only one decimal digit	1153 // section 7.8.3, page 17 (note that we read only one decimal digit

1156 // if the value is 0).	1154 // if the value is 0).

1157 if (IsDecimalDigit(c0_) \|\|	1155 if (IsDecimalDigit(c0_) \|\|

1158 (c0_ >= 0 && unicode_cache_->IsIdentifierStart(c0_)))	1156 (c0_ != kEndOfInput && unicode_cache_->IsIdentifierStart(c0_)))

1159 return Token::ILLEGAL;	1157 return Token::ILLEGAL;

1160	1158

1161 literal.Complete();	1159 literal.Complete();

1162	1160

1163 if (kind == DECIMAL_WITH_LEADING_ZERO)	1161 if (kind == DECIMAL_WITH_LEADING_ZERO)

1164 decimal_with_leading_zero_pos_ = Location(start_pos, source_pos());	1162 decimal_with_leading_zero_pos_ = Location(start_pos, source_pos());

1165 return Token::NUMBER;	1163 return Token::NUMBER;

1166 }	1164 }

1167	1165

1168	1166

(...skipping 206 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1375 }	1373 }

1376 AddLiteralChar(c);	1374 AddLiteralChar(c);

1377 return ScanIdentifierSuffix(&literal, true);	1375 return ScanIdentifierSuffix(&literal, true);

1378 } else {	1376 } else {

1379 uc32 first_char = c0_;	1377 uc32 first_char = c0_;

1380 Advance();	1378 Advance();

1381 AddLiteralChar(first_char);	1379 AddLiteralChar(first_char);

1382 }	1380 }

1383	1381

1384 // Scan the rest of the identifier characters.	1382 // Scan the rest of the identifier characters.

1385 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) {	1383 while (c0_ != kEndOfInput && unicode_cache_->IsIdentifierPart(c0_)) {

1386 if (c0_ != '\\') {	1384 if (c0_ != '\\') {

1387 uc32 next_char = c0_;	1385 uc32 next_char = c0_;

1388 Advance();	1386 Advance();

1389 AddLiteralChar(next_char);	1387 AddLiteralChar(next_char);

1390 continue;	1388 continue;

1391 }	1389 }

1392 // Fallthrough if no longer able to complete keyword.	1390 // Fallthrough if no longer able to complete keyword.

1393 return ScanIdentifierSuffix(&literal, false);	1391 return ScanIdentifierSuffix(&literal, false);

1394 }	1392 }

1395	1393

1396 if (next_.literal_chars->is_one_byte()) {	1394 if (next_.literal_chars->is_one_byte()) {

1397 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();	1395 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();

1398 Token::Value token =	1396 Token::Value token =

1399 KeywordOrIdentifierToken(chars.start(), chars.length());	1397 KeywordOrIdentifierToken(chars.start(), chars.length());

1400 if (token == Token::IDENTIFIER) literal.Complete();	1398 if (token == Token::IDENTIFIER) literal.Complete();

1401 return token;	1399 return token;

1402 }	1400 }

1403 literal.Complete();	1401 literal.Complete();

1404 return Token::IDENTIFIER;	1402 return Token::IDENTIFIER;

1405 }	1403 }

1406	1404

1407	1405

1408 Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal,	1406 Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal,

1409 bool escaped) {	1407 bool escaped) {

1410 // Scan the rest of the identifier characters.	1408 // Scan the rest of the identifier characters.

1411 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) {	1409 while (c0_ != kEndOfInput && unicode_cache_->IsIdentifierPart(c0_)) {

1412 if (c0_ == '\\') {	1410 if (c0_ == '\\') {

1413 uc32 c = ScanIdentifierUnicodeEscape();	1411 uc32 c = ScanIdentifierUnicodeEscape();

1414 escaped = true;	1412 escaped = true;

1415 // Only allow legal identifier part characters.	1413 // Only allow legal identifier part characters.

1416 if (c < 0 \|\|	1414 if (c < 0 \|\|

1417 c == '\\' \|\|	1415 c == '\\' \|\|

1418 !unicode_cache_->IsIdentifierPart(c)) {	1416 !unicode_cache_->IsIdentifierPart(c)) {

1419 return Token::ILLEGAL;	1417 return Token::ILLEGAL;

1420 }	1418 }

1421 AddLiteralChar(c);	1419 AddLiteralChar(c);

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1458	1456

1459 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,	1457 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,

1460 // the scanner should pass uninterpreted bodies to the RegExp	1458 // the scanner should pass uninterpreted bodies to the RegExp

1461 // constructor.	1459 // constructor.

1462 LiteralScope literal(this);	1460 LiteralScope literal(this);

1463 if (seen_equal) {	1461 if (seen_equal) {

1464 AddLiteralChar('=');	1462 AddLiteralChar('=');

1465 }	1463 }

1466	1464

1467 while (c0_ != '/' \|\| in_character_class) {	1465 while (c0_ != '/' \|\| in_character_class) {

1468 if (c0_ < 0 \|\| unicode_cache_->IsLineTerminator(c0_)) return false;	1466 if (c0_ == kEndOfInput \|\| unicode_cache_->IsLineTerminator(c0_))

	1467 return false;

1469 if (c0_ == '\\') { // Escape sequence.	1468 if (c0_ == '\\') { // Escape sequence.

1470 AddLiteralCharAdvance();	1469 AddLiteralCharAdvance();

1471 if (c0_ < 0 \|\| unicode_cache_->IsLineTerminator(c0_)) return false;	1470 if (c0_ == kEndOfInput \|\| unicode_cache_->IsLineTerminator(c0_))

	1471 return false;

1472 AddLiteralCharAdvance();	1472 AddLiteralCharAdvance();

1473 // If the escape allows more characters, i.e., \x??, \u????, or \c?,	1473 // If the escape allows more characters, i.e., \x??, \u????, or \c?,

1474 // only "safe" characters are allowed (letters, digits, underscore),	1474 // only "safe" characters are allowed (letters, digits, underscore),

1475 // otherwise the escape isn't valid and the invalid character has	1475 // otherwise the escape isn't valid and the invalid character has

1476 // its normal meaning. I.e., we can just continue scanning without	1476 // its normal meaning. I.e., we can just continue scanning without

1477 // worrying whether the following characters are part of the escape	1477 // worrying whether the following characters are part of the escape

1478 // or not, since any '/', '\\' or '[' is guaranteed to not be part	1478 // or not, since any '/', '\\' or '[' is guaranteed to not be part

1479 // of the escape sequence.	1479 // of the escape sequence.

1480	1480

1481 // TODO(896): At some point, parse RegExps more throughly to capture	1481 // TODO(896): At some point, parse RegExps more throughly to capture

(...skipping 10 matching lines...) Expand all Loading...
1492 next_.token = Token::REGEXP_LITERAL;	1492 next_.token = Token::REGEXP_LITERAL;

1493 return true;	1493 return true;

1494 }	1494 }

1495	1495

1496	1496

1497 Maybe<RegExp::Flags> Scanner::ScanRegExpFlags() {	1497 Maybe<RegExp::Flags> Scanner::ScanRegExpFlags() {

1498 DCHECK(next_.token == Token::REGEXP_LITERAL);	1498 DCHECK(next_.token == Token::REGEXP_LITERAL);

1499	1499

1500 // Scan regular expression flags.	1500 // Scan regular expression flags.

1501 int flags = 0;	1501 int flags = 0;

1502 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) {	1502 while (c0_ != kEndOfInput && unicode_cache_->IsIdentifierPart(c0_)) {

1503 RegExp::Flags flag = RegExp::kNone;	1503 RegExp::Flags flag = RegExp::kNone;

1504 switch (c0_) {	1504 switch (c0_) {

1505 case 'g':	1505 case 'g':

1506 flag = RegExp::kGlobal;	1506 flag = RegExp::kGlobal;

1507 break;	1507 break;

1508 case 'i':	1508 case 'i':

1509 flag = RegExp::kIgnoreCase;	1509 flag = RegExp::kIgnoreCase;

1510 break;	1510 break;

1511 case 'm':	1511 case 'm':

1512 flag = RegExp::kMultiline;	1512 flag = RegExp::kMultiline;

(...skipping 118 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1631 to->token = from->token;	1631 to->token = from->token;

1632 to->location = from->location;	1632 to->location = from->location;

1633 to->literal_chars->CopyFrom(from->literal_chars);	1633 to->literal_chars->CopyFrom(from->literal_chars);

1634 to->raw_literal_chars->CopyFrom(from->raw_literal_chars);	1634 to->raw_literal_chars->CopyFrom(from->raw_literal_chars);

1635 }	1635 }

1636	1636

1637	1637

1638	1638

1639 } // namespace internal	1639 } // namespace internal

1640 } // namespace v8	1640 } // namespace v8

OLD	NEW

« src/parsing/scanner.h ('K') | « src/parsing/scanner.h ('k') | src/parsing/scanner-character-streams.h » ('j') | src/parsing/scanner-character-streams.cc » ('J')