| OLD | NEW |
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 18 matching lines...) Expand all Loading... |
| 29 | 29 |
| 30 #include "scanner.h" | 30 #include "scanner.h" |
| 31 | 31 |
| 32 #include "../include/v8stdint.h" | 32 #include "../include/v8stdint.h" |
| 33 #include "char-predicates-inl.h" | 33 #include "char-predicates-inl.h" |
| 34 | 34 |
| 35 namespace v8 { | 35 namespace v8 { |
| 36 namespace internal { | 36 namespace internal { |
| 37 | 37 |
| 38 // ---------------------------------------------------------------------------- | 38 // ---------------------------------------------------------------------------- |
| 39 // Scanner::LiteralScope | |
| 40 | |
| 41 Scanner::LiteralScope::LiteralScope(Scanner* self) | |
| 42 : scanner_(self), complete_(false) { | |
| 43 self->StartLiteral(); | |
| 44 } | |
| 45 | |
| 46 | |
| 47 Scanner::LiteralScope::~LiteralScope() { | |
| 48 if (!complete_) scanner_->DropLiteral(); | |
| 49 } | |
| 50 | |
| 51 | |
| 52 void Scanner::LiteralScope::Complete() { | |
| 53 scanner_->TerminateLiteral(); | |
| 54 complete_ = true; | |
| 55 } | |
| 56 | |
| 57 // ---------------------------------------------------------------------------- | |
| 58 // Scanner | 39 // Scanner |
| 59 | 40 |
| 60 Scanner::Scanner(UnicodeCache* unicode_cache) | 41 Scanner::Scanner(UnicodeCache* unicode_cache) |
| 61 : unicode_cache_(unicode_cache) { } | 42 : unicode_cache_(unicode_cache), |
| 43 octal_pos_(Location::invalid()), |
| 44 harmony_scoping_(false) { } |
| 45 |
| 46 |
| 47 void Scanner::Initialize(UC16CharacterStream* source) { |
| 48 source_ = source; |
| 49 // Need to capture identifiers in order to recognize "get" and "set" |
| 50 // in object literals. |
| 51 Init(); |
| 52 // Skip initial whitespace allowing HTML comment ends just like |
| 53 // after a newline and scan first token. |
| 54 has_line_terminator_before_next_ = true; |
| 55 SkipWhiteSpace(); |
| 56 Scan(); |
| 57 } |
| 62 | 58 |
| 63 | 59 |
| 64 uc32 Scanner::ScanHexNumber(int expected_length) { | 60 uc32 Scanner::ScanHexNumber(int expected_length) { |
| 65 ASSERT(expected_length <= 4); // prevent overflow | 61 ASSERT(expected_length <= 4); // prevent overflow |
| 66 | 62 |
| 67 uc32 digits[4] = { 0, 0, 0, 0 }; | 63 uc32 digits[4] = { 0, 0, 0, 0 }; |
| 68 uc32 x = 0; | 64 uc32 x = 0; |
| 69 for (int i = 0; i < expected_length; i++) { | 65 for (int i = 0; i < expected_length; i++) { |
| 70 digits[i] = c0_; | 66 digits[i] = c0_; |
| 71 int d = HexValue(c0_); | 67 int d = HexValue(c0_); |
| 72 if (d < 0) { | 68 if (d < 0) { |
| 73 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes | 69 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes |
| 74 // should be illegal, but other JS VMs just return the | 70 // should be illegal, but other JS VMs just return the |
| 75 // non-escaped version of the original character. | 71 // non-escaped version of the original character. |
| 76 | 72 |
| 77 // Push back digits that we have advanced past. | 73 // Push back digits that we have advanced past. |
| 78 for (int j = i-1; j >= 0; j--) { | 74 for (int j = i-1; j >= 0; j--) { |
| 79 PushBack(digits[j]); | 75 PushBack(digits[j]); |
| 80 } | 76 } |
| 81 return -1; | 77 return -1; |
| 82 } | 78 } |
| 83 x = x * 16 + d; | 79 x = x * 16 + d; |
| 84 Advance(); | 80 Advance(); |
| 85 } | 81 } |
| 86 | 82 |
| 87 return x; | 83 return x; |
| 88 } | 84 } |
| 89 | 85 |
| 90 | 86 |
| 91 | |
| 92 // ---------------------------------------------------------------------------- | |
| 93 // JavaScriptScanner | |
| 94 | |
| 95 JavaScriptScanner::JavaScriptScanner(UnicodeCache* scanner_contants) | |
| 96 : Scanner(scanner_contants), | |
| 97 octal_pos_(Location::invalid()), | |
| 98 harmony_scoping_(false) { } | |
| 99 | |
| 100 | |
| 101 void JavaScriptScanner::Initialize(UC16CharacterStream* source) { | |
| 102 source_ = source; | |
| 103 // Need to capture identifiers in order to recognize "get" and "set" | |
| 104 // in object literals. | |
| 105 Init(); | |
| 106 // Skip initial whitespace allowing HTML comment ends just like | |
| 107 // after a newline and scan first token. | |
| 108 has_line_terminator_before_next_ = true; | |
| 109 SkipWhiteSpace(); | |
| 110 Scan(); | |
| 111 } | |
| 112 | |
| 113 | |
| 114 // Ensure that tokens can be stored in a byte. | 87 // Ensure that tokens can be stored in a byte. |
| 115 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100); | 88 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100); |
| 116 | 89 |
| 117 // Table of one-character tokens, by character (0x00..0x7f only). | 90 // Table of one-character tokens, by character (0x00..0x7f only). |
| 118 static const byte one_char_tokens[] = { | 91 static const byte one_char_tokens[] = { |
| 119 Token::ILLEGAL, | 92 Token::ILLEGAL, |
| 120 Token::ILLEGAL, | 93 Token::ILLEGAL, |
| 121 Token::ILLEGAL, | 94 Token::ILLEGAL, |
| 122 Token::ILLEGAL, | 95 Token::ILLEGAL, |
| 123 Token::ILLEGAL, | 96 Token::ILLEGAL, |
| (...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 240 Token::ILLEGAL, | 213 Token::ILLEGAL, |
| 241 Token::ILLEGAL, | 214 Token::ILLEGAL, |
| 242 Token::LBRACE, // 0x7b | 215 Token::LBRACE, // 0x7b |
| 243 Token::ILLEGAL, | 216 Token::ILLEGAL, |
| 244 Token::RBRACE, // 0x7d | 217 Token::RBRACE, // 0x7d |
| 245 Token::BIT_NOT, // 0x7e | 218 Token::BIT_NOT, // 0x7e |
| 246 Token::ILLEGAL | 219 Token::ILLEGAL |
| 247 }; | 220 }; |
| 248 | 221 |
| 249 | 222 |
| 250 Token::Value JavaScriptScanner::Next() { | 223 Token::Value Scanner::Next() { |
| 251 current_ = next_; | 224 current_ = next_; |
| 252 has_line_terminator_before_next_ = false; | 225 has_line_terminator_before_next_ = false; |
| 253 has_multiline_comment_before_next_ = false; | 226 has_multiline_comment_before_next_ = false; |
| 254 if (static_cast<unsigned>(c0_) <= 0x7f) { | 227 if (static_cast<unsigned>(c0_) <= 0x7f) { |
| 255 Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]); | 228 Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]); |
| 256 if (token != Token::ILLEGAL) { | 229 if (token != Token::ILLEGAL) { |
| 257 int pos = source_pos(); | 230 int pos = source_pos(); |
| 258 next_.token = token; | 231 next_.token = token; |
| 259 next_.location.beg_pos = pos; | 232 next_.location.beg_pos = pos; |
| 260 next_.location.end_pos = pos + 1; | 233 next_.location.end_pos = pos + 1; |
| (...skipping 11 matching lines...) Expand all Loading... |
| 272 // Unicode character; this implies that in a Unicode context the | 245 // Unicode character; this implies that in a Unicode context the |
| 273 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF | 246 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF |
| 274 // character expressed in little-endian byte order (since it could | 247 // character expressed in little-endian byte order (since it could |
| 275 // not be a U+FFFE character expressed in big-endian byte | 248 // not be a U+FFFE character expressed in big-endian byte |
| 276 // order). Nevertheless, we check for it to be compatible with | 249 // order). Nevertheless, we check for it to be compatible with |
| 277 // Spidermonkey. | 250 // Spidermonkey. |
| 278 return c == 0xFEFF || c == 0xFFFE; | 251 return c == 0xFEFF || c == 0xFFFE; |
| 279 } | 252 } |
| 280 | 253 |
| 281 | 254 |
| 282 bool JavaScriptScanner::SkipWhiteSpace() { | 255 bool Scanner::SkipWhiteSpace() { |
| 283 int start_position = source_pos(); | 256 int start_position = source_pos(); |
| 284 | 257 |
| 285 while (true) { | 258 while (true) { |
| 286 // We treat byte-order marks (BOMs) as whitespace for better | 259 // We treat byte-order marks (BOMs) as whitespace for better |
| 287 // compatibility with Spidermonkey and other JavaScript engines. | 260 // compatibility with Spidermonkey and other JavaScript engines. |
| 288 while (unicode_cache_->IsWhiteSpace(c0_) || IsByteOrderMark(c0_)) { | 261 while (unicode_cache_->IsWhiteSpace(c0_) || IsByteOrderMark(c0_)) { |
| 289 // IsWhiteSpace() includes line terminators! | 262 // IsWhiteSpace() includes line terminators! |
| 290 if (unicode_cache_->IsLineTerminator(c0_)) { | 263 if (unicode_cache_->IsLineTerminator(c0_)) { |
| 291 // Ignore line terminators, but remember them. This is necessary | 264 // Ignore line terminators, but remember them. This is necessary |
| 292 // for automatic semicolon insertion. | 265 // for automatic semicolon insertion. |
| (...skipping 19 matching lines...) Expand all Loading... |
| 312 PushBack('-'); // undo Advance() | 285 PushBack('-'); // undo Advance() |
| 313 } | 286 } |
| 314 PushBack('-'); // undo Advance() | 287 PushBack('-'); // undo Advance() |
| 315 } | 288 } |
| 316 // Return whether or not we skipped any characters. | 289 // Return whether or not we skipped any characters. |
| 317 return source_pos() != start_position; | 290 return source_pos() != start_position; |
| 318 } | 291 } |
| 319 } | 292 } |
| 320 | 293 |
| 321 | 294 |
| 322 Token::Value JavaScriptScanner::SkipSingleLineComment() { | 295 Token::Value Scanner::SkipSingleLineComment() { |
| 323 Advance(); | 296 Advance(); |
| 324 | 297 |
| 325 // The line terminator at the end of the line is not considered | 298 // The line terminator at the end of the line is not considered |
| 326 // to be part of the single-line comment; it is recognized | 299 // to be part of the single-line comment; it is recognized |
| 327 // separately by the lexical grammar and becomes part of the | 300 // separately by the lexical grammar and becomes part of the |
| 328 // stream of input elements for the syntactic grammar (see | 301 // stream of input elements for the syntactic grammar (see |
| 329 // ECMA-262, section 7.4). | 302 // ECMA-262, section 7.4). |
| 330 while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) { | 303 while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) { |
| 331 Advance(); | 304 Advance(); |
| 332 } | 305 } |
| 333 | 306 |
| 334 return Token::WHITESPACE; | 307 return Token::WHITESPACE; |
| 335 } | 308 } |
| 336 | 309 |
| 337 | 310 |
| 338 Token::Value JavaScriptScanner::SkipMultiLineComment() { | 311 Token::Value Scanner::SkipMultiLineComment() { |
| 339 ASSERT(c0_ == '*'); | 312 ASSERT(c0_ == '*'); |
| 340 Advance(); | 313 Advance(); |
| 341 | 314 |
| 342 while (c0_ >= 0) { | 315 while (c0_ >= 0) { |
| 343 uc32 ch = c0_; | 316 uc32 ch = c0_; |
| 344 Advance(); | 317 Advance(); |
| 345 if (unicode_cache_->IsLineTerminator(ch)) { | 318 if (unicode_cache_->IsLineTerminator(ch)) { |
| 346 // Following ECMA-262, section 7.4, a comment containing | 319 // Following ECMA-262, section 7.4, a comment containing |
| 347 // a newline will make the comment count as a line-terminator. | 320 // a newline will make the comment count as a line-terminator. |
| 348 has_multiline_comment_before_next_ = true; | 321 has_multiline_comment_before_next_ = true; |
| 349 } | 322 } |
| 350 // If we have reached the end of the multi-line comment, we | 323 // If we have reached the end of the multi-line comment, we |
| 351 // consume the '/' and insert a whitespace. This way all | 324 // consume the '/' and insert a whitespace. This way all |
| 352 // multi-line comments are treated as whitespace. | 325 // multi-line comments are treated as whitespace. |
| 353 if (ch == '*' && c0_ == '/') { | 326 if (ch == '*' && c0_ == '/') { |
| 354 c0_ = ' '; | 327 c0_ = ' '; |
| 355 return Token::WHITESPACE; | 328 return Token::WHITESPACE; |
| 356 } | 329 } |
| 357 } | 330 } |
| 358 | 331 |
| 359 // Unterminated multi-line comment. | 332 // Unterminated multi-line comment. |
| 360 return Token::ILLEGAL; | 333 return Token::ILLEGAL; |
| 361 } | 334 } |
| 362 | 335 |
| 363 | 336 |
| 364 Token::Value JavaScriptScanner::ScanHtmlComment() { | 337 Token::Value Scanner::ScanHtmlComment() { |
| 365 // Check for <!-- comments. | 338 // Check for <!-- comments. |
| 366 ASSERT(c0_ == '!'); | 339 ASSERT(c0_ == '!'); |
| 367 Advance(); | 340 Advance(); |
| 368 if (c0_ == '-') { | 341 if (c0_ == '-') { |
| 369 Advance(); | 342 Advance(); |
| 370 if (c0_ == '-') return SkipSingleLineComment(); | 343 if (c0_ == '-') return SkipSingleLineComment(); |
| 371 PushBack('-'); // undo Advance() | 344 PushBack('-'); // undo Advance() |
| 372 } | 345 } |
| 373 PushBack('!'); // undo Advance() | 346 PushBack('!'); // undo Advance() |
| 374 ASSERT(c0_ == '!'); | 347 ASSERT(c0_ == '!'); |
| 375 return Token::LT; | 348 return Token::LT; |
| 376 } | 349 } |
| 377 | 350 |
| 378 | 351 |
| 379 void JavaScriptScanner::Scan() { | 352 void Scanner::Scan() { |
| 380 next_.literal_chars = NULL; | 353 next_.literal_chars = NULL; |
| 381 Token::Value token; | 354 Token::Value token; |
| 382 do { | 355 do { |
| 383 // Remember the position of the next token | 356 // Remember the position of the next token |
| 384 next_.location.beg_pos = source_pos(); | 357 next_.location.beg_pos = source_pos(); |
| 385 | 358 |
| 386 switch (c0_) { | 359 switch (c0_) { |
| 387 case ' ': | 360 case ' ': |
| 388 case '\t': | 361 case '\t': |
| 389 Advance(); | 362 Advance(); |
| (...skipping 219 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 609 | 582 |
| 610 // Continue scanning for tokens as long as we're just skipping | 583 // Continue scanning for tokens as long as we're just skipping |
| 611 // whitespace. | 584 // whitespace. |
| 612 } while (token == Token::WHITESPACE); | 585 } while (token == Token::WHITESPACE); |
| 613 | 586 |
| 614 next_.location.end_pos = source_pos(); | 587 next_.location.end_pos = source_pos(); |
| 615 next_.token = token; | 588 next_.token = token; |
| 616 } | 589 } |
| 617 | 590 |
| 618 | 591 |
| 619 void JavaScriptScanner::SeekForward(int pos) { | 592 void Scanner::SeekForward(int pos) { |
| 620 // After this call, we will have the token at the given position as | 593 // After this call, we will have the token at the given position as |
| 621 // the "next" token. The "current" token will be invalid. | 594 // the "next" token. The "current" token will be invalid. |
| 622 if (pos == next_.location.beg_pos) return; | 595 if (pos == next_.location.beg_pos) return; |
| 623 int current_pos = source_pos(); | 596 int current_pos = source_pos(); |
| 624 ASSERT_EQ(next_.location.end_pos, current_pos); | 597 ASSERT_EQ(next_.location.end_pos, current_pos); |
| 625 // Positions inside the lookahead token aren't supported. | 598 // Positions inside the lookahead token aren't supported. |
| 626 ASSERT(pos >= current_pos); | 599 ASSERT(pos >= current_pos); |
| 627 if (pos != current_pos) { | 600 if (pos != current_pos) { |
| 628 source_->SeekForward(pos - source_->pos()); | 601 source_->SeekForward(pos - source_->pos()); |
| 629 Advance(); | 602 Advance(); |
| 630 // This function is only called to seek to the location | 603 // This function is only called to seek to the location |
| 631 // of the end of a function (at the "}" token). It doesn't matter | 604 // of the end of a function (at the "}" token). It doesn't matter |
| 632 // whether there was a line terminator in the part we skip. | 605 // whether there was a line terminator in the part we skip. |
| 633 has_line_terminator_before_next_ = false; | 606 has_line_terminator_before_next_ = false; |
| 634 has_multiline_comment_before_next_ = false; | 607 has_multiline_comment_before_next_ = false; |
| 635 } | 608 } |
| 636 Scan(); | 609 Scan(); |
| 637 } | 610 } |
| 638 | 611 |
| 639 | 612 |
| 640 void JavaScriptScanner::ScanEscape() { | 613 void Scanner::ScanEscape() { |
| 641 uc32 c = c0_; | 614 uc32 c = c0_; |
| 642 Advance(); | 615 Advance(); |
| 643 | 616 |
| 644 // Skip escaped newlines. | 617 // Skip escaped newlines. |
| 645 if (unicode_cache_->IsLineTerminator(c)) { | 618 if (unicode_cache_->IsLineTerminator(c)) { |
| 646 // Allow CR+LF newlines in multiline string literals. | 619 // Allow CR+LF newlines in multiline string literals. |
| 647 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); | 620 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); |
| 648 // Allow LF+CR newlines in multiline string literals. | 621 // Allow LF+CR newlines in multiline string literals. |
| 649 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); | 622 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); |
| 650 return; | 623 return; |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 682 | 655 |
| 683 // According to ECMA-262, 3rd, 7.8.4 (p 18ff) these | 656 // According to ECMA-262, 3rd, 7.8.4 (p 18ff) these |
| 684 // should be illegal, but they are commonly handled | 657 // should be illegal, but they are commonly handled |
| 685 // as non-escaped characters by JS VMs. | 658 // as non-escaped characters by JS VMs. |
| 686 AddLiteralChar(c); | 659 AddLiteralChar(c); |
| 687 } | 660 } |
| 688 | 661 |
| 689 | 662 |
| 690 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of | 663 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of |
| 691 // ECMA-262. Other JS VMs support them. | 664 // ECMA-262. Other JS VMs support them. |
| 692 uc32 JavaScriptScanner::ScanOctalEscape(uc32 c, int length) { | 665 uc32 Scanner::ScanOctalEscape(uc32 c, int length) { |
| 693 uc32 x = c - '0'; | 666 uc32 x = c - '0'; |
| 694 int i = 0; | 667 int i = 0; |
| 695 for (; i < length; i++) { | 668 for (; i < length; i++) { |
| 696 int d = c0_ - '0'; | 669 int d = c0_ - '0'; |
| 697 if (d < 0 || d > 7) break; | 670 if (d < 0 || d > 7) break; |
| 698 int nx = x * 8 + d; | 671 int nx = x * 8 + d; |
| 699 if (nx >= 256) break; | 672 if (nx >= 256) break; |
| 700 x = nx; | 673 x = nx; |
| 701 Advance(); | 674 Advance(); |
| 702 } | 675 } |
| 703 // Anything except '\0' is an octal escape sequence, illegal in strict mode. | 676 // Anything except '\0' is an octal escape sequence, illegal in strict mode. |
| 704 // Remember the position of octal escape sequences so that an error | 677 // Remember the position of octal escape sequences so that an error |
| 705 // can be reported later (in strict mode). | 678 // can be reported later (in strict mode). |
| 706 // We don't report the error immediately, because the octal escape can | 679 // We don't report the error immediately, because the octal escape can |
| 707 // occur before the "use strict" directive. | 680 // occur before the "use strict" directive. |
| 708 if (c != '0' || i > 0) { | 681 if (c != '0' || i > 0) { |
| 709 octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1); | 682 octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1); |
| 710 } | 683 } |
| 711 return x; | 684 return x; |
| 712 } | 685 } |
| 713 | 686 |
| 714 | 687 |
| 715 Token::Value JavaScriptScanner::ScanString() { | 688 Token::Value Scanner::ScanString() { |
| 716 uc32 quote = c0_; | 689 uc32 quote = c0_; |
| 717 Advance(); // consume quote | 690 Advance(); // consume quote |
| 718 | 691 |
| 719 LiteralScope literal(this); | 692 LiteralScope literal(this); |
| 720 while (c0_ != quote && c0_ >= 0 | 693 while (c0_ != quote && c0_ >= 0 |
| 721 && !unicode_cache_->IsLineTerminator(c0_)) { | 694 && !unicode_cache_->IsLineTerminator(c0_)) { |
| 722 uc32 c = c0_; | 695 uc32 c = c0_; |
| 723 Advance(); | 696 Advance(); |
| 724 if (c == '\\') { | 697 if (c == '\\') { |
| 725 if (c0_ < 0) return Token::ILLEGAL; | 698 if (c0_ < 0) return Token::ILLEGAL; |
| 726 ScanEscape(); | 699 ScanEscape(); |
| 727 } else { | 700 } else { |
| 728 AddLiteralChar(c); | 701 AddLiteralChar(c); |
| 729 } | 702 } |
| 730 } | 703 } |
| 731 if (c0_ != quote) return Token::ILLEGAL; | 704 if (c0_ != quote) return Token::ILLEGAL; |
| 732 literal.Complete(); | 705 literal.Complete(); |
| 733 | 706 |
| 734 Advance(); // consume quote | 707 Advance(); // consume quote |
| 735 return Token::STRING; | 708 return Token::STRING; |
| 736 } | 709 } |
| 737 | 710 |
| 738 | 711 |
| 739 void JavaScriptScanner::ScanDecimalDigits() { | 712 void Scanner::ScanDecimalDigits() { |
| 740 while (IsDecimalDigit(c0_)) | 713 while (IsDecimalDigit(c0_)) |
| 741 AddLiteralCharAdvance(); | 714 AddLiteralCharAdvance(); |
| 742 } | 715 } |
| 743 | 716 |
| 744 | 717 |
| 745 Token::Value JavaScriptScanner::ScanNumber(bool seen_period) { | 718 Token::Value Scanner::ScanNumber(bool seen_period) { |
| 746 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction | 719 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction |
| 747 | 720 |
| 748 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL; | 721 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL; |
| 749 | 722 |
| 750 LiteralScope literal(this); | 723 LiteralScope literal(this); |
| 751 if (seen_period) { | 724 if (seen_period) { |
| 752 // we have already seen a decimal point of the float | 725 // we have already seen a decimal point of the float |
| 753 AddLiteralChar('.'); | 726 AddLiteralChar('.'); |
| 754 ScanDecimalDigits(); // we know we have at least one digit | 727 ScanDecimalDigits(); // we know we have at least one digit |
| 755 | 728 |
| (...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 820 // if the value is 0). | 793 // if the value is 0). |
| 821 if (IsDecimalDigit(c0_) || unicode_cache_->IsIdentifierStart(c0_)) | 794 if (IsDecimalDigit(c0_) || unicode_cache_->IsIdentifierStart(c0_)) |
| 822 return Token::ILLEGAL; | 795 return Token::ILLEGAL; |
| 823 | 796 |
| 824 literal.Complete(); | 797 literal.Complete(); |
| 825 | 798 |
| 826 return Token::NUMBER; | 799 return Token::NUMBER; |
| 827 } | 800 } |
| 828 | 801 |
| 829 | 802 |
| 830 uc32 JavaScriptScanner::ScanIdentifierUnicodeEscape() { | 803 uc32 Scanner::ScanIdentifierUnicodeEscape() { |
| 831 Advance(); | 804 Advance(); |
| 832 if (c0_ != 'u') return -1; | 805 if (c0_ != 'u') return -1; |
| 833 Advance(); | 806 Advance(); |
| 834 uc32 result = ScanHexNumber(4); | 807 uc32 result = ScanHexNumber(4); |
| 835 if (result < 0) PushBack('u'); | 808 if (result < 0) PushBack('u'); |
| 836 return result; | 809 return result; |
| 837 } | 810 } |
| 838 | 811 |
| 839 | 812 |
| 840 // ---------------------------------------------------------------------------- | 813 // ---------------------------------------------------------------------------- |
| (...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 937 (keyword_length <= 9 || input[9] == keyword[9])) { \ | 910 (keyword_length <= 9 || input[9] == keyword[9])) { \ |
| 938 return token; \ | 911 return token; \ |
| 939 } \ | 912 } \ |
| 940 } | 913 } |
| 941 KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD) | 914 KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD) |
| 942 } | 915 } |
| 943 return Token::IDENTIFIER; | 916 return Token::IDENTIFIER; |
| 944 } | 917 } |
| 945 | 918 |
| 946 | 919 |
| 947 Token::Value JavaScriptScanner::ScanIdentifierOrKeyword() { | 920 Token::Value Scanner::ScanIdentifierOrKeyword() { |
| 948 ASSERT(unicode_cache_->IsIdentifierStart(c0_)); | 921 ASSERT(unicode_cache_->IsIdentifierStart(c0_)); |
| 949 LiteralScope literal(this); | 922 LiteralScope literal(this); |
| 950 // Scan identifier start character. | 923 // Scan identifier start character. |
| 951 if (c0_ == '\\') { | 924 if (c0_ == '\\') { |
| 952 uc32 c = ScanIdentifierUnicodeEscape(); | 925 uc32 c = ScanIdentifierUnicodeEscape(); |
| 953 // Only allow legal identifier start characters. | 926 // Only allow legal identifier start characters. |
| 954 if (c < 0 || | 927 if (c < 0 || |
| 955 c == '\\' || // No recursive escapes. | 928 c == '\\' || // No recursive escapes. |
| 956 !unicode_cache_->IsIdentifierStart(c)) { | 929 !unicode_cache_->IsIdentifierStart(c)) { |
| 957 return Token::ILLEGAL; | 930 return Token::ILLEGAL; |
| (...skipping 24 matching lines...) Expand all Loading... |
| 982 Vector<const char> chars = next_.literal_chars->ascii_literal(); | 955 Vector<const char> chars = next_.literal_chars->ascii_literal(); |
| 983 return KeywordOrIdentifierToken(chars.start(), | 956 return KeywordOrIdentifierToken(chars.start(), |
| 984 chars.length(), | 957 chars.length(), |
| 985 harmony_scoping_); | 958 harmony_scoping_); |
| 986 } | 959 } |
| 987 | 960 |
| 988 return Token::IDENTIFIER; | 961 return Token::IDENTIFIER; |
| 989 } | 962 } |
| 990 | 963 |
| 991 | 964 |
| 992 Token::Value JavaScriptScanner::ScanIdentifierSuffix(LiteralScope* literal) { | 965 Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal) { |
| 993 // Scan the rest of the identifier characters. | 966 // Scan the rest of the identifier characters. |
| 994 while (unicode_cache_->IsIdentifierPart(c0_)) { | 967 while (unicode_cache_->IsIdentifierPart(c0_)) { |
| 995 if (c0_ == '\\') { | 968 if (c0_ == '\\') { |
| 996 uc32 c = ScanIdentifierUnicodeEscape(); | 969 uc32 c = ScanIdentifierUnicodeEscape(); |
| 997 // Only allow legal identifier part characters. | 970 // Only allow legal identifier part characters. |
| 998 if (c < 0 || | 971 if (c < 0 || |
| 999 c == '\\' || | 972 c == '\\' || |
| 1000 !unicode_cache_->IsIdentifierPart(c)) { | 973 !unicode_cache_->IsIdentifierPart(c)) { |
| 1001 return Token::ILLEGAL; | 974 return Token::ILLEGAL; |
| 1002 } | 975 } |
| 1003 AddLiteralChar(c); | 976 AddLiteralChar(c); |
| 1004 } else { | 977 } else { |
| 1005 AddLiteralChar(c0_); | 978 AddLiteralChar(c0_); |
| 1006 Advance(); | 979 Advance(); |
| 1007 } | 980 } |
| 1008 } | 981 } |
| 1009 literal->Complete(); | 982 literal->Complete(); |
| 1010 | 983 |
| 1011 return Token::IDENTIFIER; | 984 return Token::IDENTIFIER; |
| 1012 } | 985 } |
| 1013 | 986 |
| 1014 | 987 |
| 1015 bool JavaScriptScanner::ScanRegExpPattern(bool seen_equal) { | 988 bool Scanner::ScanRegExpPattern(bool seen_equal) { |
| 1016 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags | 989 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags |
| 1017 bool in_character_class = false; | 990 bool in_character_class = false; |
| 1018 | 991 |
| 1019 // Previous token is either '/' or '/=', in the second case, the | 992 // Previous token is either '/' or '/=', in the second case, the |
| 1020 // pattern starts at =. | 993 // pattern starts at =. |
| 1021 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); | 994 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); |
| 1022 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); | 995 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); |
| 1023 | 996 |
| 1024 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, | 997 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, |
| 1025 // the scanner should pass uninterpreted bodies to the RegExp | 998 // the scanner should pass uninterpreted bodies to the RegExp |
| (...skipping 26 matching lines...) Expand all Loading... |
| 1052 } | 1025 } |
| 1053 } | 1026 } |
| 1054 Advance(); // consume '/' | 1027 Advance(); // consume '/' |
| 1055 | 1028 |
| 1056 literal.Complete(); | 1029 literal.Complete(); |
| 1057 | 1030 |
| 1058 return true; | 1031 return true; |
| 1059 } | 1032 } |
| 1060 | 1033 |
| 1061 | 1034 |
| 1062 bool JavaScriptScanner::ScanLiteralUnicodeEscape() { | 1035 bool Scanner::ScanLiteralUnicodeEscape() { |
| 1063 ASSERT(c0_ == '\\'); | 1036 ASSERT(c0_ == '\\'); |
| 1064 uc32 chars_read[6] = {'\\', 'u', 0, 0, 0, 0}; | 1037 uc32 chars_read[6] = {'\\', 'u', 0, 0, 0, 0}; |
| 1065 Advance(); | 1038 Advance(); |
| 1066 int i = 1; | 1039 int i = 1; |
| 1067 if (c0_ == 'u') { | 1040 if (c0_ == 'u') { |
| 1068 i++; | 1041 i++; |
| 1069 while (i < 6) { | 1042 while (i < 6) { |
| 1070 Advance(); | 1043 Advance(); |
| 1071 if (!IsHexDigit(c0_)) break; | 1044 if (!IsHexDigit(c0_)) break; |
| 1072 chars_read[i] = c0_; | 1045 chars_read[i] = c0_; |
| 1073 i++; | 1046 i++; |
| 1074 } | 1047 } |
| 1075 } | 1048 } |
| 1076 if (i < 6) { | 1049 if (i < 6) { |
| 1077 // Incomplete escape. Undo all advances and return false. | 1050 // Incomplete escape. Undo all advances and return false. |
| 1078 while (i > 0) { | 1051 while (i > 0) { |
| 1079 i--; | 1052 i--; |
| 1080 PushBack(chars_read[i]); | 1053 PushBack(chars_read[i]); |
| 1081 } | 1054 } |
| 1082 return false; | 1055 return false; |
| 1083 } | 1056 } |
| 1084 // Complete escape. Add all chars to current literal buffer. | 1057 // Complete escape. Add all chars to current literal buffer. |
| 1085 for (int i = 0; i < 6; i++) { | 1058 for (int i = 0; i < 6; i++) { |
| 1086 AddLiteralChar(chars_read[i]); | 1059 AddLiteralChar(chars_read[i]); |
| 1087 } | 1060 } |
| 1088 return true; | 1061 return true; |
| 1089 } | 1062 } |
| 1090 | 1063 |
| 1091 | 1064 |
| 1092 bool JavaScriptScanner::ScanRegExpFlags() { | 1065 bool Scanner::ScanRegExpFlags() { |
| 1093 // Scan regular expression flags. | 1066 // Scan regular expression flags. |
| 1094 LiteralScope literal(this); | 1067 LiteralScope literal(this); |
| 1095 while (unicode_cache_->IsIdentifierPart(c0_)) { | 1068 while (unicode_cache_->IsIdentifierPart(c0_)) { |
| 1096 if (c0_ != '\\') { | 1069 if (c0_ != '\\') { |
| 1097 AddLiteralCharAdvance(); | 1070 AddLiteralCharAdvance(); |
| 1098 } else { | 1071 } else { |
| 1099 if (!ScanLiteralUnicodeEscape()) { | 1072 if (!ScanLiteralUnicodeEscape()) { |
| 1100 break; | 1073 break; |
| 1101 } | 1074 } |
| 1102 } | 1075 } |
| 1103 } | 1076 } |
| 1104 literal.Complete(); | 1077 literal.Complete(); |
| 1105 | 1078 |
| 1106 next_.location.end_pos = source_pos() - 1; | 1079 next_.location.end_pos = source_pos() - 1; |
| 1107 return true; | 1080 return true; |
| 1108 } | 1081 } |
| 1109 | 1082 |
| 1110 } } // namespace v8::internal | 1083 } } // namespace v8::internal |
| OLD | NEW |