Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(176)

Side by Side Diff: src/scanner.cc

Issue 8384003: Merged Scanner and JavaScriptScanner. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: Created 9 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/scanner.h ('k') | test/cctest/test-parsing.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 18 matching lines...) Expand all
29 29
30 #include "scanner.h" 30 #include "scanner.h"
31 31
32 #include "../include/v8stdint.h" 32 #include "../include/v8stdint.h"
33 #include "char-predicates-inl.h" 33 #include "char-predicates-inl.h"
34 34
35 namespace v8 { 35 namespace v8 {
36 namespace internal { 36 namespace internal {
37 37
38 // ---------------------------------------------------------------------------- 38 // ----------------------------------------------------------------------------
39 // Scanner::LiteralScope
40
41 Scanner::LiteralScope::LiteralScope(Scanner* self)
42 : scanner_(self), complete_(false) {
43 self->StartLiteral();
44 }
45
46
47 Scanner::LiteralScope::~LiteralScope() {
48 if (!complete_) scanner_->DropLiteral();
49 }
50
51
52 void Scanner::LiteralScope::Complete() {
53 scanner_->TerminateLiteral();
54 complete_ = true;
55 }
56
57 // ----------------------------------------------------------------------------
58 // Scanner 39 // Scanner
59 40
60 Scanner::Scanner(UnicodeCache* unicode_cache) 41 Scanner::Scanner(UnicodeCache* unicode_cache)
61 : unicode_cache_(unicode_cache) { } 42 : unicode_cache_(unicode_cache),
43 octal_pos_(Location::invalid()),
44 harmony_scoping_(false) { }
45
46
47 void Scanner::Initialize(UC16CharacterStream* source) {
48 source_ = source;
49 // Need to capture identifiers in order to recognize "get" and "set"
50 // in object literals.
51 Init();
52 // Skip initial whitespace allowing HTML comment ends just like
53 // after a newline and scan first token.
54 has_line_terminator_before_next_ = true;
55 SkipWhiteSpace();
56 Scan();
57 }
62 58
63 59
64 uc32 Scanner::ScanHexNumber(int expected_length) { 60 uc32 Scanner::ScanHexNumber(int expected_length) {
65 ASSERT(expected_length <= 4); // prevent overflow 61 ASSERT(expected_length <= 4); // prevent overflow
66 62
67 uc32 digits[4] = { 0, 0, 0, 0 }; 63 uc32 digits[4] = { 0, 0, 0, 0 };
68 uc32 x = 0; 64 uc32 x = 0;
69 for (int i = 0; i < expected_length; i++) { 65 for (int i = 0; i < expected_length; i++) {
70 digits[i] = c0_; 66 digits[i] = c0_;
71 int d = HexValue(c0_); 67 int d = HexValue(c0_);
72 if (d < 0) { 68 if (d < 0) {
73 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes 69 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes
74 // should be illegal, but other JS VMs just return the 70 // should be illegal, but other JS VMs just return the
75 // non-escaped version of the original character. 71 // non-escaped version of the original character.
76 72
77 // Push back digits that we have advanced past. 73 // Push back digits that we have advanced past.
78 for (int j = i-1; j >= 0; j--) { 74 for (int j = i-1; j >= 0; j--) {
79 PushBack(digits[j]); 75 PushBack(digits[j]);
80 } 76 }
81 return -1; 77 return -1;
82 } 78 }
83 x = x * 16 + d; 79 x = x * 16 + d;
84 Advance(); 80 Advance();
85 } 81 }
86 82
87 return x; 83 return x;
88 } 84 }
89 85
90 86
91
92 // ----------------------------------------------------------------------------
93 // JavaScriptScanner
94
95 JavaScriptScanner::JavaScriptScanner(UnicodeCache* scanner_contants)
96 : Scanner(scanner_contants),
97 octal_pos_(Location::invalid()),
98 harmony_scoping_(false) { }
99
100
101 void JavaScriptScanner::Initialize(UC16CharacterStream* source) {
102 source_ = source;
103 // Need to capture identifiers in order to recognize "get" and "set"
104 // in object literals.
105 Init();
106 // Skip initial whitespace allowing HTML comment ends just like
107 // after a newline and scan first token.
108 has_line_terminator_before_next_ = true;
109 SkipWhiteSpace();
110 Scan();
111 }
112
113
114 // Ensure that tokens can be stored in a byte. 87 // Ensure that tokens can be stored in a byte.
115 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100); 88 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);
116 89
117 // Table of one-character tokens, by character (0x00..0x7f only). 90 // Table of one-character tokens, by character (0x00..0x7f only).
118 static const byte one_char_tokens[] = { 91 static const byte one_char_tokens[] = {
119 Token::ILLEGAL, 92 Token::ILLEGAL,
120 Token::ILLEGAL, 93 Token::ILLEGAL,
121 Token::ILLEGAL, 94 Token::ILLEGAL,
122 Token::ILLEGAL, 95 Token::ILLEGAL,
123 Token::ILLEGAL, 96 Token::ILLEGAL,
(...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after
240 Token::ILLEGAL, 213 Token::ILLEGAL,
241 Token::ILLEGAL, 214 Token::ILLEGAL,
242 Token::LBRACE, // 0x7b 215 Token::LBRACE, // 0x7b
243 Token::ILLEGAL, 216 Token::ILLEGAL,
244 Token::RBRACE, // 0x7d 217 Token::RBRACE, // 0x7d
245 Token::BIT_NOT, // 0x7e 218 Token::BIT_NOT, // 0x7e
246 Token::ILLEGAL 219 Token::ILLEGAL
247 }; 220 };
248 221
249 222
250 Token::Value JavaScriptScanner::Next() { 223 Token::Value Scanner::Next() {
251 current_ = next_; 224 current_ = next_;
252 has_line_terminator_before_next_ = false; 225 has_line_terminator_before_next_ = false;
253 has_multiline_comment_before_next_ = false; 226 has_multiline_comment_before_next_ = false;
254 if (static_cast<unsigned>(c0_) <= 0x7f) { 227 if (static_cast<unsigned>(c0_) <= 0x7f) {
255 Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]); 228 Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]);
256 if (token != Token::ILLEGAL) { 229 if (token != Token::ILLEGAL) {
257 int pos = source_pos(); 230 int pos = source_pos();
258 next_.token = token; 231 next_.token = token;
259 next_.location.beg_pos = pos; 232 next_.location.beg_pos = pos;
260 next_.location.end_pos = pos + 1; 233 next_.location.end_pos = pos + 1;
(...skipping 11 matching lines...) Expand all
272 // Unicode character; this implies that in a Unicode context the 245 // Unicode character; this implies that in a Unicode context the
273 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF 246 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF
274 // character expressed in little-endian byte order (since it could 247 // character expressed in little-endian byte order (since it could
275 // not be a U+FFFE character expressed in big-endian byte 248 // not be a U+FFFE character expressed in big-endian byte
276 // order). Nevertheless, we check for it to be compatible with 249 // order). Nevertheless, we check for it to be compatible with
277 // Spidermonkey. 250 // Spidermonkey.
278 return c == 0xFEFF || c == 0xFFFE; 251 return c == 0xFEFF || c == 0xFFFE;
279 } 252 }
280 253
281 254
282 bool JavaScriptScanner::SkipWhiteSpace() { 255 bool Scanner::SkipWhiteSpace() {
283 int start_position = source_pos(); 256 int start_position = source_pos();
284 257
285 while (true) { 258 while (true) {
286 // We treat byte-order marks (BOMs) as whitespace for better 259 // We treat byte-order marks (BOMs) as whitespace for better
287 // compatibility with Spidermonkey and other JavaScript engines. 260 // compatibility with Spidermonkey and other JavaScript engines.
288 while (unicode_cache_->IsWhiteSpace(c0_) || IsByteOrderMark(c0_)) { 261 while (unicode_cache_->IsWhiteSpace(c0_) || IsByteOrderMark(c0_)) {
289 // IsWhiteSpace() includes line terminators! 262 // IsWhiteSpace() includes line terminators!
290 if (unicode_cache_->IsLineTerminator(c0_)) { 263 if (unicode_cache_->IsLineTerminator(c0_)) {
291 // Ignore line terminators, but remember them. This is necessary 264 // Ignore line terminators, but remember them. This is necessary
292 // for automatic semicolon insertion. 265 // for automatic semicolon insertion.
(...skipping 19 matching lines...) Expand all
312 PushBack('-'); // undo Advance() 285 PushBack('-'); // undo Advance()
313 } 286 }
314 PushBack('-'); // undo Advance() 287 PushBack('-'); // undo Advance()
315 } 288 }
316 // Return whether or not we skipped any characters. 289 // Return whether or not we skipped any characters.
317 return source_pos() != start_position; 290 return source_pos() != start_position;
318 } 291 }
319 } 292 }
320 293
321 294
322 Token::Value JavaScriptScanner::SkipSingleLineComment() { 295 Token::Value Scanner::SkipSingleLineComment() {
323 Advance(); 296 Advance();
324 297
325 // The line terminator at the end of the line is not considered 298 // The line terminator at the end of the line is not considered
326 // to be part of the single-line comment; it is recognized 299 // to be part of the single-line comment; it is recognized
327 // separately by the lexical grammar and becomes part of the 300 // separately by the lexical grammar and becomes part of the
328 // stream of input elements for the syntactic grammar (see 301 // stream of input elements for the syntactic grammar (see
329 // ECMA-262, section 7.4). 302 // ECMA-262, section 7.4).
330 while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) { 303 while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) {
331 Advance(); 304 Advance();
332 } 305 }
333 306
334 return Token::WHITESPACE; 307 return Token::WHITESPACE;
335 } 308 }
336 309
337 310
338 Token::Value JavaScriptScanner::SkipMultiLineComment() { 311 Token::Value Scanner::SkipMultiLineComment() {
339 ASSERT(c0_ == '*'); 312 ASSERT(c0_ == '*');
340 Advance(); 313 Advance();
341 314
342 while (c0_ >= 0) { 315 while (c0_ >= 0) {
343 uc32 ch = c0_; 316 uc32 ch = c0_;
344 Advance(); 317 Advance();
345 if (unicode_cache_->IsLineTerminator(ch)) { 318 if (unicode_cache_->IsLineTerminator(ch)) {
346 // Following ECMA-262, section 7.4, a comment containing 319 // Following ECMA-262, section 7.4, a comment containing
347 // a newline will make the comment count as a line-terminator. 320 // a newline will make the comment count as a line-terminator.
348 has_multiline_comment_before_next_ = true; 321 has_multiline_comment_before_next_ = true;
349 } 322 }
350 // If we have reached the end of the multi-line comment, we 323 // If we have reached the end of the multi-line comment, we
351 // consume the '/' and insert a whitespace. This way all 324 // consume the '/' and insert a whitespace. This way all
352 // multi-line comments are treated as whitespace. 325 // multi-line comments are treated as whitespace.
353 if (ch == '*' && c0_ == '/') { 326 if (ch == '*' && c0_ == '/') {
354 c0_ = ' '; 327 c0_ = ' ';
355 return Token::WHITESPACE; 328 return Token::WHITESPACE;
356 } 329 }
357 } 330 }
358 331
359 // Unterminated multi-line comment. 332 // Unterminated multi-line comment.
360 return Token::ILLEGAL; 333 return Token::ILLEGAL;
361 } 334 }
362 335
363 336
364 Token::Value JavaScriptScanner::ScanHtmlComment() { 337 Token::Value Scanner::ScanHtmlComment() {
365 // Check for <!-- comments. 338 // Check for <!-- comments.
366 ASSERT(c0_ == '!'); 339 ASSERT(c0_ == '!');
367 Advance(); 340 Advance();
368 if (c0_ == '-') { 341 if (c0_ == '-') {
369 Advance(); 342 Advance();
370 if (c0_ == '-') return SkipSingleLineComment(); 343 if (c0_ == '-') return SkipSingleLineComment();
371 PushBack('-'); // undo Advance() 344 PushBack('-'); // undo Advance()
372 } 345 }
373 PushBack('!'); // undo Advance() 346 PushBack('!'); // undo Advance()
374 ASSERT(c0_ == '!'); 347 ASSERT(c0_ == '!');
375 return Token::LT; 348 return Token::LT;
376 } 349 }
377 350
378 351
379 void JavaScriptScanner::Scan() { 352 void Scanner::Scan() {
380 next_.literal_chars = NULL; 353 next_.literal_chars = NULL;
381 Token::Value token; 354 Token::Value token;
382 do { 355 do {
383 // Remember the position of the next token 356 // Remember the position of the next token
384 next_.location.beg_pos = source_pos(); 357 next_.location.beg_pos = source_pos();
385 358
386 switch (c0_) { 359 switch (c0_) {
387 case ' ': 360 case ' ':
388 case '\t': 361 case '\t':
389 Advance(); 362 Advance();
(...skipping 219 matching lines...) Expand 10 before | Expand all | Expand 10 after
609 582
610 // Continue scanning for tokens as long as we're just skipping 583 // Continue scanning for tokens as long as we're just skipping
611 // whitespace. 584 // whitespace.
612 } while (token == Token::WHITESPACE); 585 } while (token == Token::WHITESPACE);
613 586
614 next_.location.end_pos = source_pos(); 587 next_.location.end_pos = source_pos();
615 next_.token = token; 588 next_.token = token;
616 } 589 }
617 590
618 591
619 void JavaScriptScanner::SeekForward(int pos) { 592 void Scanner::SeekForward(int pos) {
620 // After this call, we will have the token at the given position as 593 // After this call, we will have the token at the given position as
621 // the "next" token. The "current" token will be invalid. 594 // the "next" token. The "current" token will be invalid.
622 if (pos == next_.location.beg_pos) return; 595 if (pos == next_.location.beg_pos) return;
623 int current_pos = source_pos(); 596 int current_pos = source_pos();
624 ASSERT_EQ(next_.location.end_pos, current_pos); 597 ASSERT_EQ(next_.location.end_pos, current_pos);
625 // Positions inside the lookahead token aren't supported. 598 // Positions inside the lookahead token aren't supported.
626 ASSERT(pos >= current_pos); 599 ASSERT(pos >= current_pos);
627 if (pos != current_pos) { 600 if (pos != current_pos) {
628 source_->SeekForward(pos - source_->pos()); 601 source_->SeekForward(pos - source_->pos());
629 Advance(); 602 Advance();
630 // This function is only called to seek to the location 603 // This function is only called to seek to the location
631 // of the end of a function (at the "}" token). It doesn't matter 604 // of the end of a function (at the "}" token). It doesn't matter
632 // whether there was a line terminator in the part we skip. 605 // whether there was a line terminator in the part we skip.
633 has_line_terminator_before_next_ = false; 606 has_line_terminator_before_next_ = false;
634 has_multiline_comment_before_next_ = false; 607 has_multiline_comment_before_next_ = false;
635 } 608 }
636 Scan(); 609 Scan();
637 } 610 }
638 611
639 612
640 void JavaScriptScanner::ScanEscape() { 613 void Scanner::ScanEscape() {
641 uc32 c = c0_; 614 uc32 c = c0_;
642 Advance(); 615 Advance();
643 616
644 // Skip escaped newlines. 617 // Skip escaped newlines.
645 if (unicode_cache_->IsLineTerminator(c)) { 618 if (unicode_cache_->IsLineTerminator(c)) {
646 // Allow CR+LF newlines in multiline string literals. 619 // Allow CR+LF newlines in multiline string literals.
647 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); 620 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();
648 // Allow LF+CR newlines in multiline string literals. 621 // Allow LF+CR newlines in multiline string literals.
649 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); 622 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();
650 return; 623 return;
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
682 655
683 // According to ECMA-262, 3rd, 7.8.4 (p 18ff) these 656 // According to ECMA-262, 3rd, 7.8.4 (p 18ff) these
684 // should be illegal, but they are commonly handled 657 // should be illegal, but they are commonly handled
685 // as non-escaped characters by JS VMs. 658 // as non-escaped characters by JS VMs.
686 AddLiteralChar(c); 659 AddLiteralChar(c);
687 } 660 }
688 661
689 662
690 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of 663 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of
691 // ECMA-262. Other JS VMs support them. 664 // ECMA-262. Other JS VMs support them.
692 uc32 JavaScriptScanner::ScanOctalEscape(uc32 c, int length) { 665 uc32 Scanner::ScanOctalEscape(uc32 c, int length) {
693 uc32 x = c - '0'; 666 uc32 x = c - '0';
694 int i = 0; 667 int i = 0;
695 for (; i < length; i++) { 668 for (; i < length; i++) {
696 int d = c0_ - '0'; 669 int d = c0_ - '0';
697 if (d < 0 || d > 7) break; 670 if (d < 0 || d > 7) break;
698 int nx = x * 8 + d; 671 int nx = x * 8 + d;
699 if (nx >= 256) break; 672 if (nx >= 256) break;
700 x = nx; 673 x = nx;
701 Advance(); 674 Advance();
702 } 675 }
703 // Anything except '\0' is an octal escape sequence, illegal in strict mode. 676 // Anything except '\0' is an octal escape sequence, illegal in strict mode.
704 // Remember the position of octal escape sequences so that an error 677 // Remember the position of octal escape sequences so that an error
705 // can be reported later (in strict mode). 678 // can be reported later (in strict mode).
706 // We don't report the error immediately, because the octal escape can 679 // We don't report the error immediately, because the octal escape can
707 // occur before the "use strict" directive. 680 // occur before the "use strict" directive.
708 if (c != '0' || i > 0) { 681 if (c != '0' || i > 0) {
709 octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1); 682 octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1);
710 } 683 }
711 return x; 684 return x;
712 } 685 }
713 686
714 687
715 Token::Value JavaScriptScanner::ScanString() { 688 Token::Value Scanner::ScanString() {
716 uc32 quote = c0_; 689 uc32 quote = c0_;
717 Advance(); // consume quote 690 Advance(); // consume quote
718 691
719 LiteralScope literal(this); 692 LiteralScope literal(this);
720 while (c0_ != quote && c0_ >= 0 693 while (c0_ != quote && c0_ >= 0
721 && !unicode_cache_->IsLineTerminator(c0_)) { 694 && !unicode_cache_->IsLineTerminator(c0_)) {
722 uc32 c = c0_; 695 uc32 c = c0_;
723 Advance(); 696 Advance();
724 if (c == '\\') { 697 if (c == '\\') {
725 if (c0_ < 0) return Token::ILLEGAL; 698 if (c0_ < 0) return Token::ILLEGAL;
726 ScanEscape(); 699 ScanEscape();
727 } else { 700 } else {
728 AddLiteralChar(c); 701 AddLiteralChar(c);
729 } 702 }
730 } 703 }
731 if (c0_ != quote) return Token::ILLEGAL; 704 if (c0_ != quote) return Token::ILLEGAL;
732 literal.Complete(); 705 literal.Complete();
733 706
734 Advance(); // consume quote 707 Advance(); // consume quote
735 return Token::STRING; 708 return Token::STRING;
736 } 709 }
737 710
738 711
739 void JavaScriptScanner::ScanDecimalDigits() { 712 void Scanner::ScanDecimalDigits() {
740 while (IsDecimalDigit(c0_)) 713 while (IsDecimalDigit(c0_))
741 AddLiteralCharAdvance(); 714 AddLiteralCharAdvance();
742 } 715 }
743 716
744 717
745 Token::Value JavaScriptScanner::ScanNumber(bool seen_period) { 718 Token::Value Scanner::ScanNumber(bool seen_period) {
746 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction 719 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction
747 720
748 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL; 721 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL;
749 722
750 LiteralScope literal(this); 723 LiteralScope literal(this);
751 if (seen_period) { 724 if (seen_period) {
752 // we have already seen a decimal point of the float 725 // we have already seen a decimal point of the float
753 AddLiteralChar('.'); 726 AddLiteralChar('.');
754 ScanDecimalDigits(); // we know we have at least one digit 727 ScanDecimalDigits(); // we know we have at least one digit
755 728
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after
820 // if the value is 0). 793 // if the value is 0).
821 if (IsDecimalDigit(c0_) || unicode_cache_->IsIdentifierStart(c0_)) 794 if (IsDecimalDigit(c0_) || unicode_cache_->IsIdentifierStart(c0_))
822 return Token::ILLEGAL; 795 return Token::ILLEGAL;
823 796
824 literal.Complete(); 797 literal.Complete();
825 798
826 return Token::NUMBER; 799 return Token::NUMBER;
827 } 800 }
828 801
829 802
830 uc32 JavaScriptScanner::ScanIdentifierUnicodeEscape() { 803 uc32 Scanner::ScanIdentifierUnicodeEscape() {
831 Advance(); 804 Advance();
832 if (c0_ != 'u') return -1; 805 if (c0_ != 'u') return -1;
833 Advance(); 806 Advance();
834 uc32 result = ScanHexNumber(4); 807 uc32 result = ScanHexNumber(4);
835 if (result < 0) PushBack('u'); 808 if (result < 0) PushBack('u');
836 return result; 809 return result;
837 } 810 }
838 811
839 812
840 // ---------------------------------------------------------------------------- 813 // ----------------------------------------------------------------------------
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after
937 (keyword_length <= 9 || input[9] == keyword[9])) { \ 910 (keyword_length <= 9 || input[9] == keyword[9])) { \
938 return token; \ 911 return token; \
939 } \ 912 } \
940 } 913 }
941 KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD) 914 KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD)
942 } 915 }
943 return Token::IDENTIFIER; 916 return Token::IDENTIFIER;
944 } 917 }
945 918
946 919
947 Token::Value JavaScriptScanner::ScanIdentifierOrKeyword() { 920 Token::Value Scanner::ScanIdentifierOrKeyword() {
948 ASSERT(unicode_cache_->IsIdentifierStart(c0_)); 921 ASSERT(unicode_cache_->IsIdentifierStart(c0_));
949 LiteralScope literal(this); 922 LiteralScope literal(this);
950 // Scan identifier start character. 923 // Scan identifier start character.
951 if (c0_ == '\\') { 924 if (c0_ == '\\') {
952 uc32 c = ScanIdentifierUnicodeEscape(); 925 uc32 c = ScanIdentifierUnicodeEscape();
953 // Only allow legal identifier start characters. 926 // Only allow legal identifier start characters.
954 if (c < 0 || 927 if (c < 0 ||
955 c == '\\' || // No recursive escapes. 928 c == '\\' || // No recursive escapes.
956 !unicode_cache_->IsIdentifierStart(c)) { 929 !unicode_cache_->IsIdentifierStart(c)) {
957 return Token::ILLEGAL; 930 return Token::ILLEGAL;
(...skipping 24 matching lines...) Expand all
982 Vector<const char> chars = next_.literal_chars->ascii_literal(); 955 Vector<const char> chars = next_.literal_chars->ascii_literal();
983 return KeywordOrIdentifierToken(chars.start(), 956 return KeywordOrIdentifierToken(chars.start(),
984 chars.length(), 957 chars.length(),
985 harmony_scoping_); 958 harmony_scoping_);
986 } 959 }
987 960
988 return Token::IDENTIFIER; 961 return Token::IDENTIFIER;
989 } 962 }
990 963
991 964
992 Token::Value JavaScriptScanner::ScanIdentifierSuffix(LiteralScope* literal) { 965 Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal) {
993 // Scan the rest of the identifier characters. 966 // Scan the rest of the identifier characters.
994 while (unicode_cache_->IsIdentifierPart(c0_)) { 967 while (unicode_cache_->IsIdentifierPart(c0_)) {
995 if (c0_ == '\\') { 968 if (c0_ == '\\') {
996 uc32 c = ScanIdentifierUnicodeEscape(); 969 uc32 c = ScanIdentifierUnicodeEscape();
997 // Only allow legal identifier part characters. 970 // Only allow legal identifier part characters.
998 if (c < 0 || 971 if (c < 0 ||
999 c == '\\' || 972 c == '\\' ||
1000 !unicode_cache_->IsIdentifierPart(c)) { 973 !unicode_cache_->IsIdentifierPart(c)) {
1001 return Token::ILLEGAL; 974 return Token::ILLEGAL;
1002 } 975 }
1003 AddLiteralChar(c); 976 AddLiteralChar(c);
1004 } else { 977 } else {
1005 AddLiteralChar(c0_); 978 AddLiteralChar(c0_);
1006 Advance(); 979 Advance();
1007 } 980 }
1008 } 981 }
1009 literal->Complete(); 982 literal->Complete();
1010 983
1011 return Token::IDENTIFIER; 984 return Token::IDENTIFIER;
1012 } 985 }
1013 986
1014 987
1015 bool JavaScriptScanner::ScanRegExpPattern(bool seen_equal) { 988 bool Scanner::ScanRegExpPattern(bool seen_equal) {
1016 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags 989 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags
1017 bool in_character_class = false; 990 bool in_character_class = false;
1018 991
1019 // Previous token is either '/' or '/=', in the second case, the 992 // Previous token is either '/' or '/=', in the second case, the
1020 // pattern starts at =. 993 // pattern starts at =.
1021 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); 994 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1);
1022 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); 995 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0);
1023 996
1024 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, 997 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,
1025 // the scanner should pass uninterpreted bodies to the RegExp 998 // the scanner should pass uninterpreted bodies to the RegExp
(...skipping 26 matching lines...) Expand all
1052 } 1025 }
1053 } 1026 }
1054 Advance(); // consume '/' 1027 Advance(); // consume '/'
1055 1028
1056 literal.Complete(); 1029 literal.Complete();
1057 1030
1058 return true; 1031 return true;
1059 } 1032 }
1060 1033
1061 1034
1062 bool JavaScriptScanner::ScanLiteralUnicodeEscape() { 1035 bool Scanner::ScanLiteralUnicodeEscape() {
1063 ASSERT(c0_ == '\\'); 1036 ASSERT(c0_ == '\\');
1064 uc32 chars_read[6] = {'\\', 'u', 0, 0, 0, 0}; 1037 uc32 chars_read[6] = {'\\', 'u', 0, 0, 0, 0};
1065 Advance(); 1038 Advance();
1066 int i = 1; 1039 int i = 1;
1067 if (c0_ == 'u') { 1040 if (c0_ == 'u') {
1068 i++; 1041 i++;
1069 while (i < 6) { 1042 while (i < 6) {
1070 Advance(); 1043 Advance();
1071 if (!IsHexDigit(c0_)) break; 1044 if (!IsHexDigit(c0_)) break;
1072 chars_read[i] = c0_; 1045 chars_read[i] = c0_;
1073 i++; 1046 i++;
1074 } 1047 }
1075 } 1048 }
1076 if (i < 6) { 1049 if (i < 6) {
1077 // Incomplete escape. Undo all advances and return false. 1050 // Incomplete escape. Undo all advances and return false.
1078 while (i > 0) { 1051 while (i > 0) {
1079 i--; 1052 i--;
1080 PushBack(chars_read[i]); 1053 PushBack(chars_read[i]);
1081 } 1054 }
1082 return false; 1055 return false;
1083 } 1056 }
1084 // Complete escape. Add all chars to current literal buffer. 1057 // Complete escape. Add all chars to current literal buffer.
1085 for (int i = 0; i < 6; i++) { 1058 for (int i = 0; i < 6; i++) {
1086 AddLiteralChar(chars_read[i]); 1059 AddLiteralChar(chars_read[i]);
1087 } 1060 }
1088 return true; 1061 return true;
1089 } 1062 }
1090 1063
1091 1064
1092 bool JavaScriptScanner::ScanRegExpFlags() { 1065 bool Scanner::ScanRegExpFlags() {
1093 // Scan regular expression flags. 1066 // Scan regular expression flags.
1094 LiteralScope literal(this); 1067 LiteralScope literal(this);
1095 while (unicode_cache_->IsIdentifierPart(c0_)) { 1068 while (unicode_cache_->IsIdentifierPart(c0_)) {
1096 if (c0_ != '\\') { 1069 if (c0_ != '\\') {
1097 AddLiteralCharAdvance(); 1070 AddLiteralCharAdvance();
1098 } else { 1071 } else {
1099 if (!ScanLiteralUnicodeEscape()) { 1072 if (!ScanLiteralUnicodeEscape()) {
1100 break; 1073 break;
1101 } 1074 }
1102 } 1075 }
1103 } 1076 }
1104 literal.Complete(); 1077 literal.Complete();
1105 1078
1106 next_.location.end_pos = source_pos() - 1; 1079 next_.location.end_pos = source_pos() - 1;
1107 return true; 1080 return true;
1108 } 1081 }
1109 1082
1110 } } // namespace v8::internal 1083 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « src/scanner.h ('k') | test/cctest/test-parsing.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698