src/scanner.cc - Issue 8384003: Merged Scanner and JavaScriptScanner.

Side by Side Diff: src/scanner.cc

Issue 8384003: Merged Scanner and JavaScriptScanner. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge

Patch Set: Created 9 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 18 matching lines...) Expand all Loading...
29	29

30 #include "scanner.h"	30 #include "scanner.h"

31	31

32 #include "../include/v8stdint.h"	32 #include "../include/v8stdint.h"

33 #include "char-predicates-inl.h"	33 #include "char-predicates-inl.h"

34	34

35 namespace v8 {	35 namespace v8 {

36 namespace internal {	36 namespace internal {

37	37

38 // ----------------------------------------------------------------------------	38 // ----------------------------------------------------------------------------

39 // Scanner::LiteralScope

40

41 Scanner::LiteralScope::LiteralScope(Scanner* self)

42 : scanner_(self), complete_(false) {

43 self->StartLiteral();

44 }

45

46

47 Scanner::LiteralScope::~LiteralScope() {

48 if (!complete_) scanner_->DropLiteral();

49 }

50

51

52 void Scanner::LiteralScope::Complete() {

53 scanner_->TerminateLiteral();

54 complete_ = true;

55 }

56

57 // ----------------------------------------------------------------------------

58 // Scanner	39 // Scanner

59	40

60 Scanner::Scanner(UnicodeCache* unicode_cache)	41 Scanner::Scanner(UnicodeCache* unicode_cache)

61 : unicode_cache_(unicode_cache) { }	42 : unicode_cache_(unicode_cache),

	43 octal_pos_(Location::invalid()),

	44 harmony_scoping_(false) { }

	45

	46

	47 void Scanner::Initialize(UC16CharacterStream* source) {

	48 source_ = source;

	49 // Need to capture identifiers in order to recognize "get" and "set"

	50 // in object literals.

	51 Init();

	52 // Skip initial whitespace allowing HTML comment ends just like

	53 // after a newline and scan first token.

	54 has_line_terminator_before_next_ = true;

	55 SkipWhiteSpace();

	56 Scan();

	57 }

62	58

63	59

64 uc32 Scanner::ScanHexNumber(int expected_length) {	60 uc32 Scanner::ScanHexNumber(int expected_length) {

65 ASSERT(expected_length <= 4); // prevent overflow	61 ASSERT(expected_length <= 4); // prevent overflow

66	62

67 uc32 digits[4] = { 0, 0, 0, 0 };	63 uc32 digits[4] = { 0, 0, 0, 0 };

68 uc32 x = 0;	64 uc32 x = 0;

69 for (int i = 0; i < expected_length; i++) {	65 for (int i = 0; i < expected_length; i++) {

70 digits[i] = c0_;	66 digits[i] = c0_;

71 int d = HexValue(c0_);	67 int d = HexValue(c0_);

72 if (d < 0) {	68 if (d < 0) {

73 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes	69 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes

74 // should be illegal, but other JS VMs just return the	70 // should be illegal, but other JS VMs just return the

75 // non-escaped version of the original character.	71 // non-escaped version of the original character.

76	72

77 // Push back digits that we have advanced past.	73 // Push back digits that we have advanced past.

78 for (int j = i-1; j >= 0; j--) {	74 for (int j = i-1; j >= 0; j--) {

79 PushBack(digits[j]);	75 PushBack(digits[j]);

80 }	76 }

81 return -1;	77 return -1;

82 }	78 }

83 x = x * 16 + d;	79 x = x * 16 + d;

84 Advance();	80 Advance();

85 }	81 }

86	82

87 return x;	83 return x;

88 }	84 }

89	85

90	86

91

92 // ----------------------------------------------------------------------------

93 // JavaScriptScanner

94

95 JavaScriptScanner::JavaScriptScanner(UnicodeCache* scanner_contants)

96 : Scanner(scanner_contants),

97 octal_pos_(Location::invalid()),

98 harmony_scoping_(false) { }

99

100

101 void JavaScriptScanner::Initialize(UC16CharacterStream* source) {

102 source_ = source;

103 // Need to capture identifiers in order to recognize "get" and "set"

104 // in object literals.

105 Init();

106 // Skip initial whitespace allowing HTML comment ends just like

107 // after a newline and scan first token.

108 has_line_terminator_before_next_ = true;

109 SkipWhiteSpace();

110 Scan();

111 }

112

113

114 // Ensure that tokens can be stored in a byte.	87 // Ensure that tokens can be stored in a byte.

115 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);	88 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);

116	89

117 // Table of one-character tokens, by character (0x00..0x7f only).	90 // Table of one-character tokens, by character (0x00..0x7f only).

118 static const byte one_char_tokens[] = {	91 static const byte one_char_tokens[] = {

119 Token::ILLEGAL,	92 Token::ILLEGAL,

120 Token::ILLEGAL,	93 Token::ILLEGAL,

121 Token::ILLEGAL,	94 Token::ILLEGAL,

122 Token::ILLEGAL,	95 Token::ILLEGAL,

123 Token::ILLEGAL,	96 Token::ILLEGAL,

(...skipping 116 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
240 Token::ILLEGAL,	213 Token::ILLEGAL,

241 Token::ILLEGAL,	214 Token::ILLEGAL,

242 Token::LBRACE, // 0x7b	215 Token::LBRACE, // 0x7b

243 Token::ILLEGAL,	216 Token::ILLEGAL,

244 Token::RBRACE, // 0x7d	217 Token::RBRACE, // 0x7d

245 Token::BIT_NOT, // 0x7e	218 Token::BIT_NOT, // 0x7e

246 Token::ILLEGAL	219 Token::ILLEGAL

247 };	220 };

248	221

249	222

250 Token::Value JavaScriptScanner::Next() {	223 Token::Value Scanner::Next() {

251 current_ = next_;	224 current_ = next_;

252 has_line_terminator_before_next_ = false;	225 has_line_terminator_before_next_ = false;

253 has_multiline_comment_before_next_ = false;	226 has_multiline_comment_before_next_ = false;

254 if (static_cast<unsigned>(c0_) <= 0x7f) {	227 if (static_cast<unsigned>(c0_) <= 0x7f) {

255 Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]);	228 Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]);

256 if (token != Token::ILLEGAL) {	229 if (token != Token::ILLEGAL) {

257 int pos = source_pos();	230 int pos = source_pos();

258 next_.token = token;	231 next_.token = token;

259 next_.location.beg_pos = pos;	232 next_.location.beg_pos = pos;

260 next_.location.end_pos = pos + 1;	233 next_.location.end_pos = pos + 1;

(...skipping 11 matching lines...) Expand all Loading...
272 // Unicode character; this implies that in a Unicode context the	245 // Unicode character; this implies that in a Unicode context the

273 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF	246 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF

274 // character expressed in little-endian byte order (since it could	247 // character expressed in little-endian byte order (since it could

275 // not be a U+FFFE character expressed in big-endian byte	248 // not be a U+FFFE character expressed in big-endian byte

276 // order). Nevertheless, we check for it to be compatible with	249 // order). Nevertheless, we check for it to be compatible with

277 // Spidermonkey.	250 // Spidermonkey.

278 return c == 0xFEFF \|\| c == 0xFFFE;	251 return c == 0xFEFF \|\| c == 0xFFFE;

279 }	252 }

280	253

281	254

282 bool JavaScriptScanner::SkipWhiteSpace() {	255 bool Scanner::SkipWhiteSpace() {

283 int start_position = source_pos();	256 int start_position = source_pos();

284	257

285 while (true) {	258 while (true) {

286 // We treat byte-order marks (BOMs) as whitespace for better	259 // We treat byte-order marks (BOMs) as whitespace for better

287 // compatibility with Spidermonkey and other JavaScript engines.	260 // compatibility with Spidermonkey and other JavaScript engines.

288 while (unicode_cache_->IsWhiteSpace(c0_) \|\| IsByteOrderMark(c0_)) {	261 while (unicode_cache_->IsWhiteSpace(c0_) \|\| IsByteOrderMark(c0_)) {

289 // IsWhiteSpace() includes line terminators!	262 // IsWhiteSpace() includes line terminators!

290 if (unicode_cache_->IsLineTerminator(c0_)) {	263 if (unicode_cache_->IsLineTerminator(c0_)) {

291 // Ignore line terminators, but remember them. This is necessary	264 // Ignore line terminators, but remember them. This is necessary

292 // for automatic semicolon insertion.	265 // for automatic semicolon insertion.

(...skipping 19 matching lines...) Expand all Loading...
312 PushBack('-'); // undo Advance()	285 PushBack('-'); // undo Advance()

313 }	286 }

314 PushBack('-'); // undo Advance()	287 PushBack('-'); // undo Advance()

315 }	288 }

316 // Return whether or not we skipped any characters.	289 // Return whether or not we skipped any characters.

317 return source_pos() != start_position;	290 return source_pos() != start_position;

318 }	291 }

319 }	292 }

320	293

321	294

322 Token::Value JavaScriptScanner::SkipSingleLineComment() {	295 Token::Value Scanner::SkipSingleLineComment() {

323 Advance();	296 Advance();

324	297

325 // The line terminator at the end of the line is not considered	298 // The line terminator at the end of the line is not considered

326 // to be part of the single-line comment; it is recognized	299 // to be part of the single-line comment; it is recognized

327 // separately by the lexical grammar and becomes part of the	300 // separately by the lexical grammar and becomes part of the

328 // stream of input elements for the syntactic grammar (see	301 // stream of input elements for the syntactic grammar (see

329 // ECMA-262, section 7.4).	302 // ECMA-262, section 7.4).

330 while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) {	303 while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) {

331 Advance();	304 Advance();

332 }	305 }

333	306

334 return Token::WHITESPACE;	307 return Token::WHITESPACE;

335 }	308 }

336	309

337	310

338 Token::Value JavaScriptScanner::SkipMultiLineComment() {	311 Token::Value Scanner::SkipMultiLineComment() {

339 ASSERT(c0_ == '*');	312 ASSERT(c0_ == '*');

340 Advance();	313 Advance();

341	314

342 while (c0_ >= 0) {	315 while (c0_ >= 0) {

343 uc32 ch = c0_;	316 uc32 ch = c0_;

344 Advance();	317 Advance();

345 if (unicode_cache_->IsLineTerminator(ch)) {	318 if (unicode_cache_->IsLineTerminator(ch)) {

346 // Following ECMA-262, section 7.4, a comment containing	319 // Following ECMA-262, section 7.4, a comment containing

347 // a newline will make the comment count as a line-terminator.	320 // a newline will make the comment count as a line-terminator.

348 has_multiline_comment_before_next_ = true;	321 has_multiline_comment_before_next_ = true;

349 }	322 }

350 // If we have reached the end of the multi-line comment, we	323 // If we have reached the end of the multi-line comment, we

351 // consume the '/' and insert a whitespace. This way all	324 // consume the '/' and insert a whitespace. This way all

352 // multi-line comments are treated as whitespace.	325 // multi-line comments are treated as whitespace.

353 if (ch == '*' && c0_ == '/') {	326 if (ch == '*' && c0_ == '/') {

354 c0_ = ' ';	327 c0_ = ' ';

355 return Token::WHITESPACE;	328 return Token::WHITESPACE;

356 }	329 }

357 }	330 }

358	331

359 // Unterminated multi-line comment.	332 // Unterminated multi-line comment.

360 return Token::ILLEGAL;	333 return Token::ILLEGAL;

361 }	334 }

362	335

363	336

364 Token::Value JavaScriptScanner::ScanHtmlComment() {	337 Token::Value Scanner::ScanHtmlComment() {

365 // Check for <!-- comments.	338 // Check for <!-- comments.

366 ASSERT(c0_ == '!');	339 ASSERT(c0_ == '!');

367 Advance();	340 Advance();

368 if (c0_ == '-') {	341 if (c0_ == '-') {

369 Advance();	342 Advance();

370 if (c0_ == '-') return SkipSingleLineComment();	343 if (c0_ == '-') return SkipSingleLineComment();

371 PushBack('-'); // undo Advance()	344 PushBack('-'); // undo Advance()

372 }	345 }

373 PushBack('!'); // undo Advance()	346 PushBack('!'); // undo Advance()

374 ASSERT(c0_ == '!');	347 ASSERT(c0_ == '!');

375 return Token::LT;	348 return Token::LT;

376 }	349 }

377	350

378	351

379 void JavaScriptScanner::Scan() {	352 void Scanner::Scan() {

380 next_.literal_chars = NULL;	353 next_.literal_chars = NULL;

381 Token::Value token;	354 Token::Value token;

382 do {	355 do {

383 // Remember the position of the next token	356 // Remember the position of the next token

384 next_.location.beg_pos = source_pos();	357 next_.location.beg_pos = source_pos();

385	358

386 switch (c0_) {	359 switch (c0_) {

387 case ' ':	360 case ' ':

388 case '\t':	361 case '\t':

389 Advance();	362 Advance();

(...skipping 219 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
609	582

610 // Continue scanning for tokens as long as we're just skipping	583 // Continue scanning for tokens as long as we're just skipping

611 // whitespace.	584 // whitespace.

612 } while (token == Token::WHITESPACE);	585 } while (token == Token::WHITESPACE);

613	586

614 next_.location.end_pos = source_pos();	587 next_.location.end_pos = source_pos();

615 next_.token = token;	588 next_.token = token;

616 }	589 }

617	590

618	591

619 void JavaScriptScanner::SeekForward(int pos) {	592 void Scanner::SeekForward(int pos) {

620 // After this call, we will have the token at the given position as	593 // After this call, we will have the token at the given position as

621 // the "next" token. The "current" token will be invalid.	594 // the "next" token. The "current" token will be invalid.

622 if (pos == next_.location.beg_pos) return;	595 if (pos == next_.location.beg_pos) return;

623 int current_pos = source_pos();	596 int current_pos = source_pos();

624 ASSERT_EQ(next_.location.end_pos, current_pos);	597 ASSERT_EQ(next_.location.end_pos, current_pos);

625 // Positions inside the lookahead token aren't supported.	598 // Positions inside the lookahead token aren't supported.

626 ASSERT(pos >= current_pos);	599 ASSERT(pos >= current_pos);

627 if (pos != current_pos) {	600 if (pos != current_pos) {

628 source_->SeekForward(pos - source_->pos());	601 source_->SeekForward(pos - source_->pos());

629 Advance();	602 Advance();

630 // This function is only called to seek to the location	603 // This function is only called to seek to the location

631 // of the end of a function (at the "}" token). It doesn't matter	604 // of the end of a function (at the "}" token). It doesn't matter

632 // whether there was a line terminator in the part we skip.	605 // whether there was a line terminator in the part we skip.

633 has_line_terminator_before_next_ = false;	606 has_line_terminator_before_next_ = false;

634 has_multiline_comment_before_next_ = false;	607 has_multiline_comment_before_next_ = false;

635 }	608 }

636 Scan();	609 Scan();

637 }	610 }

638	611

639	612

640 void JavaScriptScanner::ScanEscape() {	613 void Scanner::ScanEscape() {

641 uc32 c = c0_;	614 uc32 c = c0_;

642 Advance();	615 Advance();

643	616

644 // Skip escaped newlines.	617 // Skip escaped newlines.

645 if (unicode_cache_->IsLineTerminator(c)) {	618 if (unicode_cache_->IsLineTerminator(c)) {

646 // Allow CR+LF newlines in multiline string literals.	619 // Allow CR+LF newlines in multiline string literals.

647 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();	620 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();

648 // Allow LF+CR newlines in multiline string literals.	621 // Allow LF+CR newlines in multiline string literals.

649 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();	622 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();

650 return;	623 return;

(...skipping 31 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
682	655

683 // According to ECMA-262, 3rd, 7.8.4 (p 18ff) these	656 // According to ECMA-262, 3rd, 7.8.4 (p 18ff) these

684 // should be illegal, but they are commonly handled	657 // should be illegal, but they are commonly handled

685 // as non-escaped characters by JS VMs.	658 // as non-escaped characters by JS VMs.

686 AddLiteralChar(c);	659 AddLiteralChar(c);

687 }	660 }

688	661

689	662

690 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of	663 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of

691 // ECMA-262. Other JS VMs support them.	664 // ECMA-262. Other JS VMs support them.

692 uc32 JavaScriptScanner::ScanOctalEscape(uc32 c, int length) {	665 uc32 Scanner::ScanOctalEscape(uc32 c, int length) {

693 uc32 x = c - '0';	666 uc32 x = c - '0';

694 int i = 0;	667 int i = 0;

695 for (; i < length; i++) {	668 for (; i < length; i++) {

696 int d = c0_ - '0';	669 int d = c0_ - '0';

697 if (d < 0 \|\| d > 7) break;	670 if (d < 0 \|\| d > 7) break;

698 int nx = x * 8 + d;	671 int nx = x * 8 + d;

699 if (nx >= 256) break;	672 if (nx >= 256) break;

700 x = nx;	673 x = nx;

701 Advance();	674 Advance();

702 }	675 }

703 // Anything except '\0' is an octal escape sequence, illegal in strict mode.	676 // Anything except '\0' is an octal escape sequence, illegal in strict mode.

704 // Remember the position of octal escape sequences so that an error	677 // Remember the position of octal escape sequences so that an error

705 // can be reported later (in strict mode).	678 // can be reported later (in strict mode).

706 // We don't report the error immediately, because the octal escape can	679 // We don't report the error immediately, because the octal escape can

707 // occur before the "use strict" directive.	680 // occur before the "use strict" directive.

708 if (c != '0' \|\| i > 0) {	681 if (c != '0' \|\| i > 0) {

709 octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1);	682 octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1);

710 }	683 }

711 return x;	684 return x;

712 }	685 }

713	686

714	687

715 Token::Value JavaScriptScanner::ScanString() {	688 Token::Value Scanner::ScanString() {

716 uc32 quote = c0_;	689 uc32 quote = c0_;

717 Advance(); // consume quote	690 Advance(); // consume quote

718	691

719 LiteralScope literal(this);	692 LiteralScope literal(this);

720 while (c0_ != quote && c0_ >= 0	693 while (c0_ != quote && c0_ >= 0

721 && !unicode_cache_->IsLineTerminator(c0_)) {	694 && !unicode_cache_->IsLineTerminator(c0_)) {

722 uc32 c = c0_;	695 uc32 c = c0_;

723 Advance();	696 Advance();

724 if (c == '\\') {	697 if (c == '\\') {

725 if (c0_ < 0) return Token::ILLEGAL;	698 if (c0_ < 0) return Token::ILLEGAL;

726 ScanEscape();	699 ScanEscape();

727 } else {	700 } else {

728 AddLiteralChar(c);	701 AddLiteralChar(c);

729 }	702 }

730 }	703 }

731 if (c0_ != quote) return Token::ILLEGAL;	704 if (c0_ != quote) return Token::ILLEGAL;

732 literal.Complete();	705 literal.Complete();

733	706

734 Advance(); // consume quote	707 Advance(); // consume quote

735 return Token::STRING;	708 return Token::STRING;

736 }	709 }

737	710

738	711

739 void JavaScriptScanner::ScanDecimalDigits() {	712 void Scanner::ScanDecimalDigits() {

740 while (IsDecimalDigit(c0_))	713 while (IsDecimalDigit(c0_))

741 AddLiteralCharAdvance();	714 AddLiteralCharAdvance();

742 }	715 }

743	716

744	717

745 Token::Value JavaScriptScanner::ScanNumber(bool seen_period) {	718 Token::Value Scanner::ScanNumber(bool seen_period) {

746 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction	719 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction

747	720

748 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL;	721 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL;

749	722

750 LiteralScope literal(this);	723 LiteralScope literal(this);

751 if (seen_period) {	724 if (seen_period) {

752 // we have already seen a decimal point of the float	725 // we have already seen a decimal point of the float

753 AddLiteralChar('.');	726 AddLiteralChar('.');

754 ScanDecimalDigits(); // we know we have at least one digit	727 ScanDecimalDigits(); // we know we have at least one digit

755	728

(...skipping 64 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
820 // if the value is 0).	793 // if the value is 0).

821 if (IsDecimalDigit(c0_) \|\| unicode_cache_->IsIdentifierStart(c0_))	794 if (IsDecimalDigit(c0_) \|\| unicode_cache_->IsIdentifierStart(c0_))

822 return Token::ILLEGAL;	795 return Token::ILLEGAL;

823	796

824 literal.Complete();	797 literal.Complete();

825	798

826 return Token::NUMBER;	799 return Token::NUMBER;

827 }	800 }

828	801

829	802

830 uc32 JavaScriptScanner::ScanIdentifierUnicodeEscape() {	803 uc32 Scanner::ScanIdentifierUnicodeEscape() {

831 Advance();	804 Advance();

832 if (c0_ != 'u') return -1;	805 if (c0_ != 'u') return -1;

833 Advance();	806 Advance();

834 uc32 result = ScanHexNumber(4);	807 uc32 result = ScanHexNumber(4);

835 if (result < 0) PushBack('u');	808 if (result < 0) PushBack('u');

836 return result;	809 return result;

837 }	810 }

838	811

839	812

840 // ----------------------------------------------------------------------------	813 // ----------------------------------------------------------------------------

(...skipping 96 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
937 (keyword_length <= 9 \|\| input[9] == keyword[9])) { \	910 (keyword_length <= 9 \|\| input[9] == keyword[9])) { \

938 return token; \	911 return token; \

939 } \	912 } \

940 }	913 }

941 KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD)	914 KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD)

942 }	915 }

943 return Token::IDENTIFIER;	916 return Token::IDENTIFIER;

944 }	917 }

945	918

946	919

947 Token::Value JavaScriptScanner::ScanIdentifierOrKeyword() {	920 Token::Value Scanner::ScanIdentifierOrKeyword() {

948 ASSERT(unicode_cache_->IsIdentifierStart(c0_));	921 ASSERT(unicode_cache_->IsIdentifierStart(c0_));

949 LiteralScope literal(this);	922 LiteralScope literal(this);

950 // Scan identifier start character.	923 // Scan identifier start character.

951 if (c0_ == '\\') {	924 if (c0_ == '\\') {

952 uc32 c = ScanIdentifierUnicodeEscape();	925 uc32 c = ScanIdentifierUnicodeEscape();

953 // Only allow legal identifier start characters.	926 // Only allow legal identifier start characters.

954 if (c < 0 \|\|	927 if (c < 0 \|\|

955 c == '\\' \|\| // No recursive escapes.	928 c == '\\' \|\| // No recursive escapes.

956 !unicode_cache_->IsIdentifierStart(c)) {	929 !unicode_cache_->IsIdentifierStart(c)) {

957 return Token::ILLEGAL;	930 return Token::ILLEGAL;

(...skipping 24 matching lines...) Expand all Loading...
982 Vector<const char> chars = next_.literal_chars->ascii_literal();	955 Vector<const char> chars = next_.literal_chars->ascii_literal();

983 return KeywordOrIdentifierToken(chars.start(),	956 return KeywordOrIdentifierToken(chars.start(),

984 chars.length(),	957 chars.length(),

985 harmony_scoping_);	958 harmony_scoping_);

986 }	959 }

987	960

988 return Token::IDENTIFIER;	961 return Token::IDENTIFIER;

989 }	962 }

990	963

991	964

992 Token::Value JavaScriptScanner::ScanIdentifierSuffix(LiteralScope* literal) {	965 Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal) {

993 // Scan the rest of the identifier characters.	966 // Scan the rest of the identifier characters.

994 while (unicode_cache_->IsIdentifierPart(c0_)) {	967 while (unicode_cache_->IsIdentifierPart(c0_)) {

995 if (c0_ == '\\') {	968 if (c0_ == '\\') {

996 uc32 c = ScanIdentifierUnicodeEscape();	969 uc32 c = ScanIdentifierUnicodeEscape();

997 // Only allow legal identifier part characters.	970 // Only allow legal identifier part characters.

998 if (c < 0 \|\|	971 if (c < 0 \|\|

999 c == '\\' \|\|	972 c == '\\' \|\|

1000 !unicode_cache_->IsIdentifierPart(c)) {	973 !unicode_cache_->IsIdentifierPart(c)) {

1001 return Token::ILLEGAL;	974 return Token::ILLEGAL;

1002 }	975 }

1003 AddLiteralChar(c);	976 AddLiteralChar(c);

1004 } else {	977 } else {

1005 AddLiteralChar(c0_);	978 AddLiteralChar(c0_);

1006 Advance();	979 Advance();

1007 }	980 }

1008 }	981 }

1009 literal->Complete();	982 literal->Complete();

1010	983

1011 return Token::IDENTIFIER;	984 return Token::IDENTIFIER;

1012 }	985 }

1013	986

1014	987

1015 bool JavaScriptScanner::ScanRegExpPattern(bool seen_equal) {	988 bool Scanner::ScanRegExpPattern(bool seen_equal) {

1016 // Scan: ('/' \| '/=') RegularExpressionBody '/' RegularExpressionFlags	989 // Scan: ('/' \| '/=') RegularExpressionBody '/' RegularExpressionFlags

1017 bool in_character_class = false;	990 bool in_character_class = false;

1018	991

1019 // Previous token is either '/' or '/=', in the second case, the	992 // Previous token is either '/' or '/=', in the second case, the

1020 // pattern starts at =.	993 // pattern starts at =.

1021 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1);	994 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1);

1022 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0);	995 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0);

1023	996

1024 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,	997 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,

1025 // the scanner should pass uninterpreted bodies to the RegExp	998 // the scanner should pass uninterpreted bodies to the RegExp

(...skipping 26 matching lines...) Expand all Loading...
1052 }	1025 }

1053 }	1026 }

1054 Advance(); // consume '/'	1027 Advance(); // consume '/'

1055	1028

1056 literal.Complete();	1029 literal.Complete();

1057	1030

1058 return true;	1031 return true;

1059 }	1032 }

1060	1033

1061	1034

1062 bool JavaScriptScanner::ScanLiteralUnicodeEscape() {	1035 bool Scanner::ScanLiteralUnicodeEscape() {

1063 ASSERT(c0_ == '\\');	1036 ASSERT(c0_ == '\\');

1064 uc32 chars_read[6] = {'\\', 'u', 0, 0, 0, 0};	1037 uc32 chars_read[6] = {'\\', 'u', 0, 0, 0, 0};

1065 Advance();	1038 Advance();

1066 int i = 1;	1039 int i = 1;

1067 if (c0_ == 'u') {	1040 if (c0_ == 'u') {

1068 i++;	1041 i++;

1069 while (i < 6) {	1042 while (i < 6) {

1070 Advance();	1043 Advance();

1071 if (!IsHexDigit(c0_)) break;	1044 if (!IsHexDigit(c0_)) break;

1072 chars_read[i] = c0_;	1045 chars_read[i] = c0_;

1073 i++;	1046 i++;

1074 }	1047 }

1075 }	1048 }

1076 if (i < 6) {	1049 if (i < 6) {

1077 // Incomplete escape. Undo all advances and return false.	1050 // Incomplete escape. Undo all advances and return false.

1078 while (i > 0) {	1051 while (i > 0) {

1079 i--;	1052 i--;

1080 PushBack(chars_read[i]);	1053 PushBack(chars_read[i]);

1081 }	1054 }

1082 return false;	1055 return false;

1083 }	1056 }

1084 // Complete escape. Add all chars to current literal buffer.	1057 // Complete escape. Add all chars to current literal buffer.

1085 for (int i = 0; i < 6; i++) {	1058 for (int i = 0; i < 6; i++) {

1086 AddLiteralChar(chars_read[i]);	1059 AddLiteralChar(chars_read[i]);

1087 }	1060 }

1088 return true;	1061 return true;

1089 }	1062 }

1090	1063

1091	1064

1092 bool JavaScriptScanner::ScanRegExpFlags() {	1065 bool Scanner::ScanRegExpFlags() {

1093 // Scan regular expression flags.	1066 // Scan regular expression flags.

1094 LiteralScope literal(this);	1067 LiteralScope literal(this);

1095 while (unicode_cache_->IsIdentifierPart(c0_)) {	1068 while (unicode_cache_->IsIdentifierPart(c0_)) {

1096 if (c0_ != '\\') {	1069 if (c0_ != '\\') {

1097 AddLiteralCharAdvance();	1070 AddLiteralCharAdvance();

1098 } else {	1071 } else {

1099 if (!ScanLiteralUnicodeEscape()) {	1072 if (!ScanLiteralUnicodeEscape()) {

1100 break;	1073 break;

1101 }	1074 }

1102 }	1075 }

1103 }	1076 }

1104 literal.Complete();	1077 literal.Complete();

1105	1078

1106 next_.location.end_pos = source_pos() - 1;	1079 next_.location.end_pos = source_pos() - 1;

1107 return true;	1080 return true;

1108 }	1081 }

1109	1082

1110 } } // namespace v8::internal	1083 } } // namespace v8::internal

OLD	NEW

« no previous file with comments | « src/scanner.h ('k') | test/cctest/test-parsing.cc » ('j') | no next file with comments »