OLD | NEW |
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 18 matching lines...) Expand all Loading... |
29 | 29 |
30 #include "scanner.h" | 30 #include "scanner.h" |
31 | 31 |
32 #include "../include/v8stdint.h" | 32 #include "../include/v8stdint.h" |
33 #include "char-predicates-inl.h" | 33 #include "char-predicates-inl.h" |
34 | 34 |
35 namespace v8 { | 35 namespace v8 { |
36 namespace internal { | 36 namespace internal { |
37 | 37 |
38 // ---------------------------------------------------------------------------- | 38 // ---------------------------------------------------------------------------- |
39 // Scanner::LiteralScope | |
40 | |
41 Scanner::LiteralScope::LiteralScope(Scanner* self) | |
42 : scanner_(self), complete_(false) { | |
43 self->StartLiteral(); | |
44 } | |
45 | |
46 | |
47 Scanner::LiteralScope::~LiteralScope() { | |
48 if (!complete_) scanner_->DropLiteral(); | |
49 } | |
50 | |
51 | |
52 void Scanner::LiteralScope::Complete() { | |
53 scanner_->TerminateLiteral(); | |
54 complete_ = true; | |
55 } | |
56 | |
57 // ---------------------------------------------------------------------------- | |
58 // Scanner | 39 // Scanner |
59 | 40 |
60 Scanner::Scanner(UnicodeCache* unicode_cache) | 41 Scanner::Scanner(UnicodeCache* unicode_cache) |
61 : unicode_cache_(unicode_cache) { } | 42 : unicode_cache_(unicode_cache), |
| 43 octal_pos_(Location::invalid()), |
| 44 harmony_scoping_(false) { } |
| 45 |
| 46 |
| 47 void Scanner::Initialize(UC16CharacterStream* source) { |
| 48 source_ = source; |
| 49 // Need to capture identifiers in order to recognize "get" and "set" |
| 50 // in object literals. |
| 51 Init(); |
| 52 // Skip initial whitespace allowing HTML comment ends just like |
| 53 // after a newline and scan first token. |
| 54 has_line_terminator_before_next_ = true; |
| 55 SkipWhiteSpace(); |
| 56 Scan(); |
| 57 } |
62 | 58 |
63 | 59 |
64 uc32 Scanner::ScanHexNumber(int expected_length) { | 60 uc32 Scanner::ScanHexNumber(int expected_length) { |
65 ASSERT(expected_length <= 4); // prevent overflow | 61 ASSERT(expected_length <= 4); // prevent overflow |
66 | 62 |
67 uc32 digits[4] = { 0, 0, 0, 0 }; | 63 uc32 digits[4] = { 0, 0, 0, 0 }; |
68 uc32 x = 0; | 64 uc32 x = 0; |
69 for (int i = 0; i < expected_length; i++) { | 65 for (int i = 0; i < expected_length; i++) { |
70 digits[i] = c0_; | 66 digits[i] = c0_; |
71 int d = HexValue(c0_); | 67 int d = HexValue(c0_); |
72 if (d < 0) { | 68 if (d < 0) { |
73 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes | 69 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes |
74 // should be illegal, but other JS VMs just return the | 70 // should be illegal, but other JS VMs just return the |
75 // non-escaped version of the original character. | 71 // non-escaped version of the original character. |
76 | 72 |
77 // Push back digits that we have advanced past. | 73 // Push back digits that we have advanced past. |
78 for (int j = i-1; j >= 0; j--) { | 74 for (int j = i-1; j >= 0; j--) { |
79 PushBack(digits[j]); | 75 PushBack(digits[j]); |
80 } | 76 } |
81 return -1; | 77 return -1; |
82 } | 78 } |
83 x = x * 16 + d; | 79 x = x * 16 + d; |
84 Advance(); | 80 Advance(); |
85 } | 81 } |
86 | 82 |
87 return x; | 83 return x; |
88 } | 84 } |
89 | 85 |
90 | 86 |
91 | |
92 // ---------------------------------------------------------------------------- | |
93 // JavaScriptScanner | |
94 | |
95 JavaScriptScanner::JavaScriptScanner(UnicodeCache* scanner_contants) | |
96 : Scanner(scanner_contants), | |
97 octal_pos_(Location::invalid()), | |
98 harmony_scoping_(false) { } | |
99 | |
100 | |
101 void JavaScriptScanner::Initialize(UC16CharacterStream* source) { | |
102 source_ = source; | |
103 // Need to capture identifiers in order to recognize "get" and "set" | |
104 // in object literals. | |
105 Init(); | |
106 // Skip initial whitespace allowing HTML comment ends just like | |
107 // after a newline and scan first token. | |
108 has_line_terminator_before_next_ = true; | |
109 SkipWhiteSpace(); | |
110 Scan(); | |
111 } | |
112 | |
113 | |
114 // Ensure that tokens can be stored in a byte. | 87 // Ensure that tokens can be stored in a byte. |
115 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100); | 88 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100); |
116 | 89 |
117 // Table of one-character tokens, by character (0x00..0x7f only). | 90 // Table of one-character tokens, by character (0x00..0x7f only). |
118 static const byte one_char_tokens[] = { | 91 static const byte one_char_tokens[] = { |
119 Token::ILLEGAL, | 92 Token::ILLEGAL, |
120 Token::ILLEGAL, | 93 Token::ILLEGAL, |
121 Token::ILLEGAL, | 94 Token::ILLEGAL, |
122 Token::ILLEGAL, | 95 Token::ILLEGAL, |
123 Token::ILLEGAL, | 96 Token::ILLEGAL, |
(...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
240 Token::ILLEGAL, | 213 Token::ILLEGAL, |
241 Token::ILLEGAL, | 214 Token::ILLEGAL, |
242 Token::LBRACE, // 0x7b | 215 Token::LBRACE, // 0x7b |
243 Token::ILLEGAL, | 216 Token::ILLEGAL, |
244 Token::RBRACE, // 0x7d | 217 Token::RBRACE, // 0x7d |
245 Token::BIT_NOT, // 0x7e | 218 Token::BIT_NOT, // 0x7e |
246 Token::ILLEGAL | 219 Token::ILLEGAL |
247 }; | 220 }; |
248 | 221 |
249 | 222 |
250 Token::Value JavaScriptScanner::Next() { | 223 Token::Value Scanner::Next() { |
251 current_ = next_; | 224 current_ = next_; |
252 has_line_terminator_before_next_ = false; | 225 has_line_terminator_before_next_ = false; |
253 has_multiline_comment_before_next_ = false; | 226 has_multiline_comment_before_next_ = false; |
254 if (static_cast<unsigned>(c0_) <= 0x7f) { | 227 if (static_cast<unsigned>(c0_) <= 0x7f) { |
255 Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]); | 228 Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]); |
256 if (token != Token::ILLEGAL) { | 229 if (token != Token::ILLEGAL) { |
257 int pos = source_pos(); | 230 int pos = source_pos(); |
258 next_.token = token; | 231 next_.token = token; |
259 next_.location.beg_pos = pos; | 232 next_.location.beg_pos = pos; |
260 next_.location.end_pos = pos + 1; | 233 next_.location.end_pos = pos + 1; |
(...skipping 11 matching lines...) Expand all Loading... |
272 // Unicode character; this implies that in a Unicode context the | 245 // Unicode character; this implies that in a Unicode context the |
273 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF | 246 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF |
274 // character expressed in little-endian byte order (since it could | 247 // character expressed in little-endian byte order (since it could |
275 // not be a U+FFFE character expressed in big-endian byte | 248 // not be a U+FFFE character expressed in big-endian byte |
276 // order). Nevertheless, we check for it to be compatible with | 249 // order). Nevertheless, we check for it to be compatible with |
277 // Spidermonkey. | 250 // Spidermonkey. |
278 return c == 0xFEFF || c == 0xFFFE; | 251 return c == 0xFEFF || c == 0xFFFE; |
279 } | 252 } |
280 | 253 |
281 | 254 |
282 bool JavaScriptScanner::SkipWhiteSpace() { | 255 bool Scanner::SkipWhiteSpace() { |
283 int start_position = source_pos(); | 256 int start_position = source_pos(); |
284 | 257 |
285 while (true) { | 258 while (true) { |
286 // We treat byte-order marks (BOMs) as whitespace for better | 259 // We treat byte-order marks (BOMs) as whitespace for better |
287 // compatibility with Spidermonkey and other JavaScript engines. | 260 // compatibility with Spidermonkey and other JavaScript engines. |
288 while (unicode_cache_->IsWhiteSpace(c0_) || IsByteOrderMark(c0_)) { | 261 while (unicode_cache_->IsWhiteSpace(c0_) || IsByteOrderMark(c0_)) { |
289 // IsWhiteSpace() includes line terminators! | 262 // IsWhiteSpace() includes line terminators! |
290 if (unicode_cache_->IsLineTerminator(c0_)) { | 263 if (unicode_cache_->IsLineTerminator(c0_)) { |
291 // Ignore line terminators, but remember them. This is necessary | 264 // Ignore line terminators, but remember them. This is necessary |
292 // for automatic semicolon insertion. | 265 // for automatic semicolon insertion. |
(...skipping 19 matching lines...) Expand all Loading... |
312 PushBack('-'); // undo Advance() | 285 PushBack('-'); // undo Advance() |
313 } | 286 } |
314 PushBack('-'); // undo Advance() | 287 PushBack('-'); // undo Advance() |
315 } | 288 } |
316 // Return whether or not we skipped any characters. | 289 // Return whether or not we skipped any characters. |
317 return source_pos() != start_position; | 290 return source_pos() != start_position; |
318 } | 291 } |
319 } | 292 } |
320 | 293 |
321 | 294 |
322 Token::Value JavaScriptScanner::SkipSingleLineComment() { | 295 Token::Value Scanner::SkipSingleLineComment() { |
323 Advance(); | 296 Advance(); |
324 | 297 |
325 // The line terminator at the end of the line is not considered | 298 // The line terminator at the end of the line is not considered |
326 // to be part of the single-line comment; it is recognized | 299 // to be part of the single-line comment; it is recognized |
327 // separately by the lexical grammar and becomes part of the | 300 // separately by the lexical grammar and becomes part of the |
328 // stream of input elements for the syntactic grammar (see | 301 // stream of input elements for the syntactic grammar (see |
329 // ECMA-262, section 7.4). | 302 // ECMA-262, section 7.4). |
330 while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) { | 303 while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) { |
331 Advance(); | 304 Advance(); |
332 } | 305 } |
333 | 306 |
334 return Token::WHITESPACE; | 307 return Token::WHITESPACE; |
335 } | 308 } |
336 | 309 |
337 | 310 |
338 Token::Value JavaScriptScanner::SkipMultiLineComment() { | 311 Token::Value Scanner::SkipMultiLineComment() { |
339 ASSERT(c0_ == '*'); | 312 ASSERT(c0_ == '*'); |
340 Advance(); | 313 Advance(); |
341 | 314 |
342 while (c0_ >= 0) { | 315 while (c0_ >= 0) { |
343 uc32 ch = c0_; | 316 uc32 ch = c0_; |
344 Advance(); | 317 Advance(); |
345 if (unicode_cache_->IsLineTerminator(ch)) { | 318 if (unicode_cache_->IsLineTerminator(ch)) { |
346 // Following ECMA-262, section 7.4, a comment containing | 319 // Following ECMA-262, section 7.4, a comment containing |
347 // a newline will make the comment count as a line-terminator. | 320 // a newline will make the comment count as a line-terminator. |
348 has_multiline_comment_before_next_ = true; | 321 has_multiline_comment_before_next_ = true; |
349 } | 322 } |
350 // If we have reached the end of the multi-line comment, we | 323 // If we have reached the end of the multi-line comment, we |
351 // consume the '/' and insert a whitespace. This way all | 324 // consume the '/' and insert a whitespace. This way all |
352 // multi-line comments are treated as whitespace. | 325 // multi-line comments are treated as whitespace. |
353 if (ch == '*' && c0_ == '/') { | 326 if (ch == '*' && c0_ == '/') { |
354 c0_ = ' '; | 327 c0_ = ' '; |
355 return Token::WHITESPACE; | 328 return Token::WHITESPACE; |
356 } | 329 } |
357 } | 330 } |
358 | 331 |
359 // Unterminated multi-line comment. | 332 // Unterminated multi-line comment. |
360 return Token::ILLEGAL; | 333 return Token::ILLEGAL; |
361 } | 334 } |
362 | 335 |
363 | 336 |
364 Token::Value JavaScriptScanner::ScanHtmlComment() { | 337 Token::Value Scanner::ScanHtmlComment() { |
365 // Check for <!-- comments. | 338 // Check for <!-- comments. |
366 ASSERT(c0_ == '!'); | 339 ASSERT(c0_ == '!'); |
367 Advance(); | 340 Advance(); |
368 if (c0_ == '-') { | 341 if (c0_ == '-') { |
369 Advance(); | 342 Advance(); |
370 if (c0_ == '-') return SkipSingleLineComment(); | 343 if (c0_ == '-') return SkipSingleLineComment(); |
371 PushBack('-'); // undo Advance() | 344 PushBack('-'); // undo Advance() |
372 } | 345 } |
373 PushBack('!'); // undo Advance() | 346 PushBack('!'); // undo Advance() |
374 ASSERT(c0_ == '!'); | 347 ASSERT(c0_ == '!'); |
375 return Token::LT; | 348 return Token::LT; |
376 } | 349 } |
377 | 350 |
378 | 351 |
379 void JavaScriptScanner::Scan() { | 352 void Scanner::Scan() { |
380 next_.literal_chars = NULL; | 353 next_.literal_chars = NULL; |
381 Token::Value token; | 354 Token::Value token; |
382 do { | 355 do { |
383 // Remember the position of the next token | 356 // Remember the position of the next token |
384 next_.location.beg_pos = source_pos(); | 357 next_.location.beg_pos = source_pos(); |
385 | 358 |
386 switch (c0_) { | 359 switch (c0_) { |
387 case ' ': | 360 case ' ': |
388 case '\t': | 361 case '\t': |
389 Advance(); | 362 Advance(); |
(...skipping 219 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
609 | 582 |
610 // Continue scanning for tokens as long as we're just skipping | 583 // Continue scanning for tokens as long as we're just skipping |
611 // whitespace. | 584 // whitespace. |
612 } while (token == Token::WHITESPACE); | 585 } while (token == Token::WHITESPACE); |
613 | 586 |
614 next_.location.end_pos = source_pos(); | 587 next_.location.end_pos = source_pos(); |
615 next_.token = token; | 588 next_.token = token; |
616 } | 589 } |
617 | 590 |
618 | 591 |
619 void JavaScriptScanner::SeekForward(int pos) { | 592 void Scanner::SeekForward(int pos) { |
620 // After this call, we will have the token at the given position as | 593 // After this call, we will have the token at the given position as |
621 // the "next" token. The "current" token will be invalid. | 594 // the "next" token. The "current" token will be invalid. |
622 if (pos == next_.location.beg_pos) return; | 595 if (pos == next_.location.beg_pos) return; |
623 int current_pos = source_pos(); | 596 int current_pos = source_pos(); |
624 ASSERT_EQ(next_.location.end_pos, current_pos); | 597 ASSERT_EQ(next_.location.end_pos, current_pos); |
625 // Positions inside the lookahead token aren't supported. | 598 // Positions inside the lookahead token aren't supported. |
626 ASSERT(pos >= current_pos); | 599 ASSERT(pos >= current_pos); |
627 if (pos != current_pos) { | 600 if (pos != current_pos) { |
628 source_->SeekForward(pos - source_->pos()); | 601 source_->SeekForward(pos - source_->pos()); |
629 Advance(); | 602 Advance(); |
630 // This function is only called to seek to the location | 603 // This function is only called to seek to the location |
631 // of the end of a function (at the "}" token). It doesn't matter | 604 // of the end of a function (at the "}" token). It doesn't matter |
632 // whether there was a line terminator in the part we skip. | 605 // whether there was a line terminator in the part we skip. |
633 has_line_terminator_before_next_ = false; | 606 has_line_terminator_before_next_ = false; |
634 has_multiline_comment_before_next_ = false; | 607 has_multiline_comment_before_next_ = false; |
635 } | 608 } |
636 Scan(); | 609 Scan(); |
637 } | 610 } |
638 | 611 |
639 | 612 |
640 void JavaScriptScanner::ScanEscape() { | 613 void Scanner::ScanEscape() { |
641 uc32 c = c0_; | 614 uc32 c = c0_; |
642 Advance(); | 615 Advance(); |
643 | 616 |
644 // Skip escaped newlines. | 617 // Skip escaped newlines. |
645 if (unicode_cache_->IsLineTerminator(c)) { | 618 if (unicode_cache_->IsLineTerminator(c)) { |
646 // Allow CR+LF newlines in multiline string literals. | 619 // Allow CR+LF newlines in multiline string literals. |
647 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); | 620 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); |
648 // Allow LF+CR newlines in multiline string literals. | 621 // Allow LF+CR newlines in multiline string literals. |
649 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); | 622 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); |
650 return; | 623 return; |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
682 | 655 |
683 // According to ECMA-262, 3rd, 7.8.4 (p 18ff) these | 656 // According to ECMA-262, 3rd, 7.8.4 (p 18ff) these |
684 // should be illegal, but they are commonly handled | 657 // should be illegal, but they are commonly handled |
685 // as non-escaped characters by JS VMs. | 658 // as non-escaped characters by JS VMs. |
686 AddLiteralChar(c); | 659 AddLiteralChar(c); |
687 } | 660 } |
688 | 661 |
689 | 662 |
690 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of | 663 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of |
691 // ECMA-262. Other JS VMs support them. | 664 // ECMA-262. Other JS VMs support them. |
692 uc32 JavaScriptScanner::ScanOctalEscape(uc32 c, int length) { | 665 uc32 Scanner::ScanOctalEscape(uc32 c, int length) { |
693 uc32 x = c - '0'; | 666 uc32 x = c - '0'; |
694 int i = 0; | 667 int i = 0; |
695 for (; i < length; i++) { | 668 for (; i < length; i++) { |
696 int d = c0_ - '0'; | 669 int d = c0_ - '0'; |
697 if (d < 0 || d > 7) break; | 670 if (d < 0 || d > 7) break; |
698 int nx = x * 8 + d; | 671 int nx = x * 8 + d; |
699 if (nx >= 256) break; | 672 if (nx >= 256) break; |
700 x = nx; | 673 x = nx; |
701 Advance(); | 674 Advance(); |
702 } | 675 } |
703 // Anything except '\0' is an octal escape sequence, illegal in strict mode. | 676 // Anything except '\0' is an octal escape sequence, illegal in strict mode. |
704 // Remember the position of octal escape sequences so that an error | 677 // Remember the position of octal escape sequences so that an error |
705 // can be reported later (in strict mode). | 678 // can be reported later (in strict mode). |
706 // We don't report the error immediately, because the octal escape can | 679 // We don't report the error immediately, because the octal escape can |
707 // occur before the "use strict" directive. | 680 // occur before the "use strict" directive. |
708 if (c != '0' || i > 0) { | 681 if (c != '0' || i > 0) { |
709 octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1); | 682 octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1); |
710 } | 683 } |
711 return x; | 684 return x; |
712 } | 685 } |
713 | 686 |
714 | 687 |
715 Token::Value JavaScriptScanner::ScanString() { | 688 Token::Value Scanner::ScanString() { |
716 uc32 quote = c0_; | 689 uc32 quote = c0_; |
717 Advance(); // consume quote | 690 Advance(); // consume quote |
718 | 691 |
719 LiteralScope literal(this); | 692 LiteralScope literal(this); |
720 while (c0_ != quote && c0_ >= 0 | 693 while (c0_ != quote && c0_ >= 0 |
721 && !unicode_cache_->IsLineTerminator(c0_)) { | 694 && !unicode_cache_->IsLineTerminator(c0_)) { |
722 uc32 c = c0_; | 695 uc32 c = c0_; |
723 Advance(); | 696 Advance(); |
724 if (c == '\\') { | 697 if (c == '\\') { |
725 if (c0_ < 0) return Token::ILLEGAL; | 698 if (c0_ < 0) return Token::ILLEGAL; |
726 ScanEscape(); | 699 ScanEscape(); |
727 } else { | 700 } else { |
728 AddLiteralChar(c); | 701 AddLiteralChar(c); |
729 } | 702 } |
730 } | 703 } |
731 if (c0_ != quote) return Token::ILLEGAL; | 704 if (c0_ != quote) return Token::ILLEGAL; |
732 literal.Complete(); | 705 literal.Complete(); |
733 | 706 |
734 Advance(); // consume quote | 707 Advance(); // consume quote |
735 return Token::STRING; | 708 return Token::STRING; |
736 } | 709 } |
737 | 710 |
738 | 711 |
739 void JavaScriptScanner::ScanDecimalDigits() { | 712 void Scanner::ScanDecimalDigits() { |
740 while (IsDecimalDigit(c0_)) | 713 while (IsDecimalDigit(c0_)) |
741 AddLiteralCharAdvance(); | 714 AddLiteralCharAdvance(); |
742 } | 715 } |
743 | 716 |
744 | 717 |
745 Token::Value JavaScriptScanner::ScanNumber(bool seen_period) { | 718 Token::Value Scanner::ScanNumber(bool seen_period) { |
746 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction | 719 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction |
747 | 720 |
748 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL; | 721 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL; |
749 | 722 |
750 LiteralScope literal(this); | 723 LiteralScope literal(this); |
751 if (seen_period) { | 724 if (seen_period) { |
752 // we have already seen a decimal point of the float | 725 // we have already seen a decimal point of the float |
753 AddLiteralChar('.'); | 726 AddLiteralChar('.'); |
754 ScanDecimalDigits(); // we know we have at least one digit | 727 ScanDecimalDigits(); // we know we have at least one digit |
755 | 728 |
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
820 // if the value is 0). | 793 // if the value is 0). |
821 if (IsDecimalDigit(c0_) || unicode_cache_->IsIdentifierStart(c0_)) | 794 if (IsDecimalDigit(c0_) || unicode_cache_->IsIdentifierStart(c0_)) |
822 return Token::ILLEGAL; | 795 return Token::ILLEGAL; |
823 | 796 |
824 literal.Complete(); | 797 literal.Complete(); |
825 | 798 |
826 return Token::NUMBER; | 799 return Token::NUMBER; |
827 } | 800 } |
828 | 801 |
829 | 802 |
830 uc32 JavaScriptScanner::ScanIdentifierUnicodeEscape() { | 803 uc32 Scanner::ScanIdentifierUnicodeEscape() { |
831 Advance(); | 804 Advance(); |
832 if (c0_ != 'u') return -1; | 805 if (c0_ != 'u') return -1; |
833 Advance(); | 806 Advance(); |
834 uc32 result = ScanHexNumber(4); | 807 uc32 result = ScanHexNumber(4); |
835 if (result < 0) PushBack('u'); | 808 if (result < 0) PushBack('u'); |
836 return result; | 809 return result; |
837 } | 810 } |
838 | 811 |
839 | 812 |
840 // ---------------------------------------------------------------------------- | 813 // ---------------------------------------------------------------------------- |
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
937 (keyword_length <= 9 || input[9] == keyword[9])) { \ | 910 (keyword_length <= 9 || input[9] == keyword[9])) { \ |
938 return token; \ | 911 return token; \ |
939 } \ | 912 } \ |
940 } | 913 } |
941 KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD) | 914 KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD) |
942 } | 915 } |
943 return Token::IDENTIFIER; | 916 return Token::IDENTIFIER; |
944 } | 917 } |
945 | 918 |
946 | 919 |
947 Token::Value JavaScriptScanner::ScanIdentifierOrKeyword() { | 920 Token::Value Scanner::ScanIdentifierOrKeyword() { |
948 ASSERT(unicode_cache_->IsIdentifierStart(c0_)); | 921 ASSERT(unicode_cache_->IsIdentifierStart(c0_)); |
949 LiteralScope literal(this); | 922 LiteralScope literal(this); |
950 // Scan identifier start character. | 923 // Scan identifier start character. |
951 if (c0_ == '\\') { | 924 if (c0_ == '\\') { |
952 uc32 c = ScanIdentifierUnicodeEscape(); | 925 uc32 c = ScanIdentifierUnicodeEscape(); |
953 // Only allow legal identifier start characters. | 926 // Only allow legal identifier start characters. |
954 if (c < 0 || | 927 if (c < 0 || |
955 c == '\\' || // No recursive escapes. | 928 c == '\\' || // No recursive escapes. |
956 !unicode_cache_->IsIdentifierStart(c)) { | 929 !unicode_cache_->IsIdentifierStart(c)) { |
957 return Token::ILLEGAL; | 930 return Token::ILLEGAL; |
(...skipping 24 matching lines...) Expand all Loading... |
982 Vector<const char> chars = next_.literal_chars->ascii_literal(); | 955 Vector<const char> chars = next_.literal_chars->ascii_literal(); |
983 return KeywordOrIdentifierToken(chars.start(), | 956 return KeywordOrIdentifierToken(chars.start(), |
984 chars.length(), | 957 chars.length(), |
985 harmony_scoping_); | 958 harmony_scoping_); |
986 } | 959 } |
987 | 960 |
988 return Token::IDENTIFIER; | 961 return Token::IDENTIFIER; |
989 } | 962 } |
990 | 963 |
991 | 964 |
992 Token::Value JavaScriptScanner::ScanIdentifierSuffix(LiteralScope* literal) { | 965 Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal) { |
993 // Scan the rest of the identifier characters. | 966 // Scan the rest of the identifier characters. |
994 while (unicode_cache_->IsIdentifierPart(c0_)) { | 967 while (unicode_cache_->IsIdentifierPart(c0_)) { |
995 if (c0_ == '\\') { | 968 if (c0_ == '\\') { |
996 uc32 c = ScanIdentifierUnicodeEscape(); | 969 uc32 c = ScanIdentifierUnicodeEscape(); |
997 // Only allow legal identifier part characters. | 970 // Only allow legal identifier part characters. |
998 if (c < 0 || | 971 if (c < 0 || |
999 c == '\\' || | 972 c == '\\' || |
1000 !unicode_cache_->IsIdentifierPart(c)) { | 973 !unicode_cache_->IsIdentifierPart(c)) { |
1001 return Token::ILLEGAL; | 974 return Token::ILLEGAL; |
1002 } | 975 } |
1003 AddLiteralChar(c); | 976 AddLiteralChar(c); |
1004 } else { | 977 } else { |
1005 AddLiteralChar(c0_); | 978 AddLiteralChar(c0_); |
1006 Advance(); | 979 Advance(); |
1007 } | 980 } |
1008 } | 981 } |
1009 literal->Complete(); | 982 literal->Complete(); |
1010 | 983 |
1011 return Token::IDENTIFIER; | 984 return Token::IDENTIFIER; |
1012 } | 985 } |
1013 | 986 |
1014 | 987 |
1015 bool JavaScriptScanner::ScanRegExpPattern(bool seen_equal) { | 988 bool Scanner::ScanRegExpPattern(bool seen_equal) { |
1016 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags | 989 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags |
1017 bool in_character_class = false; | 990 bool in_character_class = false; |
1018 | 991 |
1019 // Previous token is either '/' or '/=', in the second case, the | 992 // Previous token is either '/' or '/=', in the second case, the |
1020 // pattern starts at =. | 993 // pattern starts at =. |
1021 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); | 994 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); |
1022 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); | 995 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); |
1023 | 996 |
1024 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, | 997 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, |
1025 // the scanner should pass uninterpreted bodies to the RegExp | 998 // the scanner should pass uninterpreted bodies to the RegExp |
(...skipping 26 matching lines...) Expand all Loading... |
1052 } | 1025 } |
1053 } | 1026 } |
1054 Advance(); // consume '/' | 1027 Advance(); // consume '/' |
1055 | 1028 |
1056 literal.Complete(); | 1029 literal.Complete(); |
1057 | 1030 |
1058 return true; | 1031 return true; |
1059 } | 1032 } |
1060 | 1033 |
1061 | 1034 |
1062 bool JavaScriptScanner::ScanLiteralUnicodeEscape() { | 1035 bool Scanner::ScanLiteralUnicodeEscape() { |
1063 ASSERT(c0_ == '\\'); | 1036 ASSERT(c0_ == '\\'); |
1064 uc32 chars_read[6] = {'\\', 'u', 0, 0, 0, 0}; | 1037 uc32 chars_read[6] = {'\\', 'u', 0, 0, 0, 0}; |
1065 Advance(); | 1038 Advance(); |
1066 int i = 1; | 1039 int i = 1; |
1067 if (c0_ == 'u') { | 1040 if (c0_ == 'u') { |
1068 i++; | 1041 i++; |
1069 while (i < 6) { | 1042 while (i < 6) { |
1070 Advance(); | 1043 Advance(); |
1071 if (!IsHexDigit(c0_)) break; | 1044 if (!IsHexDigit(c0_)) break; |
1072 chars_read[i] = c0_; | 1045 chars_read[i] = c0_; |
1073 i++; | 1046 i++; |
1074 } | 1047 } |
1075 } | 1048 } |
1076 if (i < 6) { | 1049 if (i < 6) { |
1077 // Incomplete escape. Undo all advances and return false. | 1050 // Incomplete escape. Undo all advances and return false. |
1078 while (i > 0) { | 1051 while (i > 0) { |
1079 i--; | 1052 i--; |
1080 PushBack(chars_read[i]); | 1053 PushBack(chars_read[i]); |
1081 } | 1054 } |
1082 return false; | 1055 return false; |
1083 } | 1056 } |
1084 // Complete escape. Add all chars to current literal buffer. | 1057 // Complete escape. Add all chars to current literal buffer. |
1085 for (int i = 0; i < 6; i++) { | 1058 for (int i = 0; i < 6; i++) { |
1086 AddLiteralChar(chars_read[i]); | 1059 AddLiteralChar(chars_read[i]); |
1087 } | 1060 } |
1088 return true; | 1061 return true; |
1089 } | 1062 } |
1090 | 1063 |
1091 | 1064 |
1092 bool JavaScriptScanner::ScanRegExpFlags() { | 1065 bool Scanner::ScanRegExpFlags() { |
1093 // Scan regular expression flags. | 1066 // Scan regular expression flags. |
1094 LiteralScope literal(this); | 1067 LiteralScope literal(this); |
1095 while (unicode_cache_->IsIdentifierPart(c0_)) { | 1068 while (unicode_cache_->IsIdentifierPart(c0_)) { |
1096 if (c0_ != '\\') { | 1069 if (c0_ != '\\') { |
1097 AddLiteralCharAdvance(); | 1070 AddLiteralCharAdvance(); |
1098 } else { | 1071 } else { |
1099 if (!ScanLiteralUnicodeEscape()) { | 1072 if (!ScanLiteralUnicodeEscape()) { |
1100 break; | 1073 break; |
1101 } | 1074 } |
1102 } | 1075 } |
1103 } | 1076 } |
1104 literal.Complete(); | 1077 literal.Complete(); |
1105 | 1078 |
1106 next_.location.end_pos = source_pos() - 1; | 1079 next_.location.end_pos = source_pos() - 1; |
1107 return true; | 1080 return true; |
1108 } | 1081 } |
1109 | 1082 |
1110 } } // namespace v8::internal | 1083 } } // namespace v8::internal |
OLD | NEW |