| OLD | NEW |
| 1 // Copyright 2010 the V8 project authors. All rights reserved. | 1 // Copyright 2010 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 12 matching lines...) Expand all Loading... |
| 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 27 | 27 |
| 28 #include "v8.h" | 28 #include "v8.h" |
| 29 | 29 |
| 30 #include "ast.h" | 30 #include "ast.h" |
| 31 #include "handles.h" | 31 #include "handles.h" |
| 32 #include "scanner.h" | 32 #include "scanner.h" |
| 33 #include "unicode-inl.h" |
| 33 | 34 |
| 34 namespace v8 { | 35 namespace v8 { |
| 35 namespace internal { | 36 namespace internal { |
| 36 | 37 |
| 37 // ---------------------------------------------------------------------------- | 38 // ---------------------------------------------------------------------------- |
| 38 // Character predicates | |
| 39 | |
| 40 | |
| 41 unibrow::Predicate<IdentifierStart, 128> Scanner::kIsIdentifierStart; | |
| 42 unibrow::Predicate<IdentifierPart, 128> Scanner::kIsIdentifierPart; | |
| 43 unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator; | |
| 44 unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace; | |
| 45 | |
| 46 | |
| 47 StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_; | |
| 48 | |
| 49 | |
| 50 // ---------------------------------------------------------------------------- | |
| 51 // UTF8Buffer | 39 // UTF8Buffer |
| 52 | 40 |
| 53 UTF8Buffer::UTF8Buffer() : buffer_(kInitialCapacity) { } | 41 UTF8Buffer::UTF8Buffer() : buffer_(kInitialCapacity), recording_(false) { } |
| 54 | 42 |
| 55 | 43 |
| 56 UTF8Buffer::~UTF8Buffer() {} | 44 UTF8Buffer::~UTF8Buffer() {} |
| 57 | 45 |
| 58 | 46 |
| 59 void UTF8Buffer::AddCharSlow(uc32 c) { | 47 void UTF8Buffer::AddCharSlow(uc32 c) { |
| 60 ASSERT(static_cast<unsigned>(c) > unibrow::Utf8::kMaxOneByteChar); | 48 ASSERT(static_cast<unsigned>(c) > unibrow::Utf8::kMaxOneByteChar); |
| 61 int length = unibrow::Utf8::Length(c); | 49 int length = unibrow::Utf8::Length(c); |
| 62 Vector<char> block = buffer_.AddBlock(length, '\0'); | 50 Vector<char> block = buffer_.AddBlock(length, '\0'); |
| 63 #ifdef DEBUG | 51 #ifdef DEBUG |
| (...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 128 } | 116 } |
| 129 | 117 |
| 130 | 118 |
| 131 void CharacterStreamUTF16Buffer::SeekForward(int pos) { | 119 void CharacterStreamUTF16Buffer::SeekForward(int pos) { |
| 132 pos_ = pos; | 120 pos_ = pos; |
| 133 ASSERT(pushback_buffer()->is_empty()); | 121 ASSERT(pushback_buffer()->is_empty()); |
| 134 stream_->Seek(pos); | 122 stream_->Seek(pos); |
| 135 } | 123 } |
| 136 | 124 |
| 137 | 125 |
| 138 // ExternalStringUTF16Buffer | |
| 139 template <typename StringType, typename CharType> | |
| 140 ExternalStringUTF16Buffer<StringType, CharType>::ExternalStringUTF16Buffer() | |
| 141 : raw_data_(NULL) { } | |
| 142 | |
| 143 | |
| 144 template <typename StringType, typename CharType> | |
| 145 void ExternalStringUTF16Buffer<StringType, CharType>::Initialize( | |
| 146 Handle<StringType> data, | |
| 147 int start_position, | |
| 148 int end_position) { | |
| 149 ASSERT(!data.is_null()); | |
| 150 raw_data_ = data->resource()->data(); | |
| 151 | |
| 152 ASSERT(end_position <= data->length()); | |
| 153 if (start_position > 0) { | |
| 154 SeekForward(start_position); | |
| 155 } | |
| 156 end_ = | |
| 157 end_position != Scanner::kNoEndPosition ? end_position : data->length(); | |
| 158 } | |
| 159 | |
| 160 | |
| 161 template <typename StringType, typename CharType> | |
| 162 uc32 ExternalStringUTF16Buffer<StringType, CharType>::Advance() { | |
| 163 if (pos_ < end_) { | |
| 164 return raw_data_[pos_++]; | |
| 165 } else { | |
| 166 // note: currently the following increment is necessary to avoid a | |
| 167 // test-parser problem! | |
| 168 pos_++; | |
| 169 return static_cast<uc32>(-1); | |
| 170 } | |
| 171 } | |
| 172 | |
| 173 | |
| 174 template <typename StringType, typename CharType> | |
| 175 void ExternalStringUTF16Buffer<StringType, CharType>::PushBack(uc32 ch) { | |
| 176 pos_--; | |
| 177 ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize); | |
| 178 ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch); | |
| 179 } | |
| 180 | |
| 181 | |
| 182 template <typename StringType, typename CharType> | |
| 183 void ExternalStringUTF16Buffer<StringType, CharType>::SeekForward(int pos) { | |
| 184 pos_ = pos; | |
| 185 } | |
| 186 | |
| 187 // ---------------------------------------------------------------------------- | 126 // ---------------------------------------------------------------------------- |
| 188 // Scanner::LiteralScope | 127 // Scanner::LiteralScope |
| 189 | 128 |
| 190 Scanner::LiteralScope::LiteralScope(Scanner* self) | 129 Scanner::LiteralScope::LiteralScope(Scanner* self) |
| 191 : scanner_(self), complete_(false) { | 130 : scanner_(self), complete_(false) { |
| 192 self->StartLiteral(); | 131 self->StartLiteral(); |
| 193 } | 132 } |
| 194 | 133 |
| 195 | 134 |
| 196 Scanner::LiteralScope::~LiteralScope() { | 135 Scanner::LiteralScope::~LiteralScope() { |
| (...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 302 } | 241 } |
| 303 return current_.token; | 242 return current_.token; |
| 304 } | 243 } |
| 305 | 244 |
| 306 | 245 |
| 307 void Scanner::StartLiteral() { | 246 void Scanner::StartLiteral() { |
| 308 literal_buffer_.StartLiteral(); | 247 literal_buffer_.StartLiteral(); |
| 309 } | 248 } |
| 310 | 249 |
| 311 | 250 |
| 312 void Scanner::AddChar(uc32 c) { | 251 void Scanner::AddLiteralChar(uc32 c) { |
| 313 literal_buffer_.AddChar(c); | 252 literal_buffer_.AddChar(c); |
| 314 } | 253 } |
| 315 | 254 |
| 316 | 255 |
| 317 void Scanner::TerminateLiteral() { | 256 void Scanner::TerminateLiteral() { |
| 318 next_.literal_chars = literal_buffer_.EndLiteral(); | 257 next_.literal_chars = literal_buffer_.EndLiteral(); |
| 319 } | 258 } |
| 320 | 259 |
| 321 | 260 |
| 322 void Scanner::DropLiteral() { | 261 void Scanner::DropLiteral() { |
| 323 literal_buffer_.DropLiteral(); | 262 literal_buffer_.DropLiteral(); |
| 324 } | 263 } |
| 325 | 264 |
| 326 | 265 |
| 327 void Scanner::AddCharAdvance() { | 266 void Scanner::AddLiteralCharAdvance() { |
| 328 AddChar(c0_); | 267 AddLiteralChar(c0_); |
| 329 Advance(); | 268 Advance(); |
| 330 } | 269 } |
| 331 | 270 |
| 332 | 271 |
| 333 static inline bool IsByteOrderMark(uc32 c) { | 272 static inline bool IsByteOrderMark(uc32 c) { |
| 334 // The Unicode value U+FFFE is guaranteed never to be assigned as a | 273 // The Unicode value U+FFFE is guaranteed never to be assigned as a |
| 335 // Unicode character; this implies that in a Unicode context the | 274 // Unicode character; this implies that in a Unicode context the |
| 336 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF | 275 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF |
| 337 // character expressed in little-endian byte order (since it could | 276 // character expressed in little-endian byte order (since it could |
| 338 // not be a U+FFFE character expressed in big-endian byte | 277 // not be a U+FFFE character expressed in big-endian byte |
| (...skipping 12 matching lines...) Expand all Loading... |
| 351 return source_pos() != start_position; | 290 return source_pos() != start_position; |
| 352 } | 291 } |
| 353 | 292 |
| 354 | 293 |
| 355 bool Scanner::SkipJavaScriptWhiteSpace() { | 294 bool Scanner::SkipJavaScriptWhiteSpace() { |
| 356 int start_position = source_pos(); | 295 int start_position = source_pos(); |
| 357 | 296 |
| 358 while (true) { | 297 while (true) { |
| 359 // We treat byte-order marks (BOMs) as whitespace for better | 298 // We treat byte-order marks (BOMs) as whitespace for better |
| 360 // compatibility with Spidermonkey and other JavaScript engines. | 299 // compatibility with Spidermonkey and other JavaScript engines. |
| 361 while (kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) { | 300 while (ScannerConstants::kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) { |
| 362 // IsWhiteSpace() includes line terminators! | 301 // IsWhiteSpace() includes line terminators! |
| 363 if (kIsLineTerminator.get(c0_)) { | 302 if (ScannerConstants::kIsLineTerminator.get(c0_)) { |
| 364 // Ignore line terminators, but remember them. This is necessary | 303 // Ignore line terminators, but remember them. This is necessary |
| 365 // for automatic semicolon insertion. | 304 // for automatic semicolon insertion. |
| 366 has_line_terminator_before_next_ = true; | 305 has_line_terminator_before_next_ = true; |
| 367 } | 306 } |
| 368 Advance(); | 307 Advance(); |
| 369 } | 308 } |
| 370 | 309 |
| 371 // If there is an HTML comment end '-->' at the beginning of a | 310 // If there is an HTML comment end '-->' at the beginning of a |
| 372 // line (with only whitespace in front of it), we treat the rest | 311 // line (with only whitespace in front of it), we treat the rest |
| 373 // of the line as a comment. This is in line with the way | 312 // of the line as a comment. This is in line with the way |
| (...skipping 19 matching lines...) Expand all Loading... |
| 393 | 332 |
| 394 | 333 |
| 395 Token::Value Scanner::SkipSingleLineComment() { | 334 Token::Value Scanner::SkipSingleLineComment() { |
| 396 Advance(); | 335 Advance(); |
| 397 | 336 |
| 398 // The line terminator at the end of the line is not considered | 337 // The line terminator at the end of the line is not considered |
| 399 // to be part of the single-line comment; it is recognized | 338 // to be part of the single-line comment; it is recognized |
| 400 // separately by the lexical grammar and becomes part of the | 339 // separately by the lexical grammar and becomes part of the |
| 401 // stream of input elements for the syntactic grammar (see | 340 // stream of input elements for the syntactic grammar (see |
| 402 // ECMA-262, section 7.4, page 12). | 341 // ECMA-262, section 7.4, page 12). |
| 403 while (c0_ >= 0 && !kIsLineTerminator.get(c0_)) { | 342 while (c0_ >= 0 && !ScannerConstants::kIsLineTerminator.get(c0_)) { |
| 404 Advance(); | 343 Advance(); |
| 405 } | 344 } |
| 406 | 345 |
| 407 return Token::WHITESPACE; | 346 return Token::WHITESPACE; |
| 408 } | 347 } |
| 409 | 348 |
| 410 | 349 |
| 411 Token::Value Scanner::SkipMultiLineComment() { | 350 Token::Value Scanner::SkipMultiLineComment() { |
| 412 ASSERT(c0_ == '*'); | 351 ASSERT(c0_ == '*'); |
| 413 Advance(); | 352 Advance(); |
| (...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 530 | 469 |
| 531 | 470 |
| 532 Token::Value Scanner::ScanJsonString() { | 471 Token::Value Scanner::ScanJsonString() { |
| 533 ASSERT_EQ('"', c0_); | 472 ASSERT_EQ('"', c0_); |
| 534 Advance(); | 473 Advance(); |
| 535 LiteralScope literal(this); | 474 LiteralScope literal(this); |
| 536 while (c0_ != '"' && c0_ > 0) { | 475 while (c0_ != '"' && c0_ > 0) { |
| 537 // Check for control character (0x00-0x1f) or unterminated string (<0). | 476 // Check for control character (0x00-0x1f) or unterminated string (<0). |
| 538 if (c0_ < 0x20) return Token::ILLEGAL; | 477 if (c0_ < 0x20) return Token::ILLEGAL; |
| 539 if (c0_ != '\\') { | 478 if (c0_ != '\\') { |
| 540 AddCharAdvance(); | 479 AddLiteralCharAdvance(); |
| 541 } else { | 480 } else { |
| 542 Advance(); | 481 Advance(); |
| 543 switch (c0_) { | 482 switch (c0_) { |
| 544 case '"': | 483 case '"': |
| 545 case '\\': | 484 case '\\': |
| 546 case '/': | 485 case '/': |
| 547 AddChar(c0_); | 486 AddLiteralChar(c0_); |
| 548 break; | 487 break; |
| 549 case 'b': | 488 case 'b': |
| 550 AddChar('\x08'); | 489 AddLiteralChar('\x08'); |
| 551 break; | 490 break; |
| 552 case 'f': | 491 case 'f': |
| 553 AddChar('\x0c'); | 492 AddLiteralChar('\x0c'); |
| 554 break; | 493 break; |
| 555 case 'n': | 494 case 'n': |
| 556 AddChar('\x0a'); | 495 AddLiteralChar('\x0a'); |
| 557 break; | 496 break; |
| 558 case 'r': | 497 case 'r': |
| 559 AddChar('\x0d'); | 498 AddLiteralChar('\x0d'); |
| 560 break; | 499 break; |
| 561 case 't': | 500 case 't': |
| 562 AddChar('\x09'); | 501 AddLiteralChar('\x09'); |
| 563 break; | 502 break; |
| 564 case 'u': { | 503 case 'u': { |
| 565 uc32 value = 0; | 504 uc32 value = 0; |
| 566 for (int i = 0; i < 4; i++) { | 505 for (int i = 0; i < 4; i++) { |
| 567 Advance(); | 506 Advance(); |
| 568 int digit = HexValue(c0_); | 507 int digit = HexValue(c0_); |
| 569 if (digit < 0) { | 508 if (digit < 0) { |
| 570 return Token::ILLEGAL; | 509 return Token::ILLEGAL; |
| 571 } | 510 } |
| 572 value = value * 16 + digit; | 511 value = value * 16 + digit; |
| 573 } | 512 } |
| 574 AddChar(value); | 513 AddLiteralChar(value); |
| 575 break; | 514 break; |
| 576 } | 515 } |
| 577 default: | 516 default: |
| 578 return Token::ILLEGAL; | 517 return Token::ILLEGAL; |
| 579 } | 518 } |
| 580 Advance(); | 519 Advance(); |
| 581 } | 520 } |
| 582 } | 521 } |
| 583 if (c0_ != '"') { | 522 if (c0_ != '"') { |
| 584 return Token::ILLEGAL; | 523 return Token::ILLEGAL; |
| 585 } | 524 } |
| 586 literal.Complete(); | 525 literal.Complete(); |
| 587 Advance(); | 526 Advance(); |
| 588 return Token::STRING; | 527 return Token::STRING; |
| 589 } | 528 } |
| 590 | 529 |
| 591 | 530 |
| 592 Token::Value Scanner::ScanJsonNumber() { | 531 Token::Value Scanner::ScanJsonNumber() { |
| 593 LiteralScope literal(this); | 532 LiteralScope literal(this); |
| 594 if (c0_ == '-') AddCharAdvance(); | 533 if (c0_ == '-') AddLiteralCharAdvance(); |
| 595 if (c0_ == '0') { | 534 if (c0_ == '0') { |
| 596 AddCharAdvance(); | 535 AddLiteralCharAdvance(); |
| 597 // Prefix zero is only allowed if it's the only digit before | 536 // Prefix zero is only allowed if it's the only digit before |
| 598 // a decimal point or exponent. | 537 // a decimal point or exponent. |
| 599 if ('0' <= c0_ && c0_ <= '9') return Token::ILLEGAL; | 538 if ('0' <= c0_ && c0_ <= '9') return Token::ILLEGAL; |
| 600 } else { | 539 } else { |
| 601 if (c0_ < '1' || c0_ > '9') return Token::ILLEGAL; | 540 if (c0_ < '1' || c0_ > '9') return Token::ILLEGAL; |
| 602 do { | 541 do { |
| 603 AddCharAdvance(); | 542 AddLiteralCharAdvance(); |
| 604 } while (c0_ >= '0' && c0_ <= '9'); | 543 } while (c0_ >= '0' && c0_ <= '9'); |
| 605 } | 544 } |
| 606 if (c0_ == '.') { | 545 if (c0_ == '.') { |
| 607 AddCharAdvance(); | 546 AddLiteralCharAdvance(); |
| 608 if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL; | 547 if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL; |
| 609 do { | 548 do { |
| 610 AddCharAdvance(); | 549 AddLiteralCharAdvance(); |
| 611 } while (c0_ >= '0' && c0_ <= '9'); | 550 } while (c0_ >= '0' && c0_ <= '9'); |
| 612 } | 551 } |
| 613 if (AsciiAlphaToLower(c0_) == 'e') { | 552 if (AsciiAlphaToLower(c0_) == 'e') { |
| 614 AddCharAdvance(); | 553 AddLiteralCharAdvance(); |
| 615 if (c0_ == '-' || c0_ == '+') AddCharAdvance(); | 554 if (c0_ == '-' || c0_ == '+') AddLiteralCharAdvance(); |
| 616 if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL; | 555 if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL; |
| 617 do { | 556 do { |
| 618 AddCharAdvance(); | 557 AddLiteralCharAdvance(); |
| 619 } while (c0_ >= '0' && c0_ <= '9'); | 558 } while (c0_ >= '0' && c0_ <= '9'); |
| 620 } | 559 } |
| 621 literal.Complete(); | 560 literal.Complete(); |
| 622 return Token::NUMBER; | 561 return Token::NUMBER; |
| 623 } | 562 } |
| 624 | 563 |
| 625 | 564 |
| 626 Token::Value Scanner::ScanJsonIdentifier(const char* text, | 565 Token::Value Scanner::ScanJsonIdentifier(const char* text, |
| 627 Token::Value token) { | 566 Token::Value token) { |
| 628 LiteralScope literal(this); | 567 LiteralScope literal(this); |
| 629 while (*text != '\0') { | 568 while (*text != '\0') { |
| 630 if (c0_ != *text) return Token::ILLEGAL; | 569 if (c0_ != *text) return Token::ILLEGAL; |
| 631 Advance(); | 570 Advance(); |
| 632 text++; | 571 text++; |
| 633 } | 572 } |
| 634 if (kIsIdentifierPart.get(c0_)) return Token::ILLEGAL; | 573 if (ScannerConstants::kIsIdentifierPart.get(c0_)) return Token::ILLEGAL; |
| 635 literal.Complete(); | 574 literal.Complete(); |
| 636 return token; | 575 return token; |
| 637 } | 576 } |
| 638 | 577 |
| 639 | 578 |
| 640 void Scanner::ScanJavaScript() { | 579 void Scanner::ScanJavaScript() { |
| 641 next_.literal_chars = Vector<const char>(); | 580 next_.literal_chars = Vector<const char>(); |
| 642 Token::Value token; | 581 Token::Value token; |
| 643 do { | 582 do { |
| 644 // Remember the position of the next token | 583 // Remember the position of the next token |
| (...skipping 202 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 847 | 786 |
| 848 case '?': | 787 case '?': |
| 849 token = Select(Token::CONDITIONAL); | 788 token = Select(Token::CONDITIONAL); |
| 850 break; | 789 break; |
| 851 | 790 |
| 852 case '~': | 791 case '~': |
| 853 token = Select(Token::BIT_NOT); | 792 token = Select(Token::BIT_NOT); |
| 854 break; | 793 break; |
| 855 | 794 |
| 856 default: | 795 default: |
| 857 if (kIsIdentifierStart.get(c0_)) { | 796 if (ScannerConstants::kIsIdentifierStart.get(c0_)) { |
| 858 token = ScanIdentifier(); | 797 token = ScanIdentifier(); |
| 859 } else if (IsDecimalDigit(c0_)) { | 798 } else if (IsDecimalDigit(c0_)) { |
| 860 token = ScanNumber(false); | 799 token = ScanNumber(false); |
| 861 } else if (SkipWhiteSpace()) { | 800 } else if (SkipWhiteSpace()) { |
| 862 token = Token::WHITESPACE; | 801 token = Token::WHITESPACE; |
| 863 } else if (c0_ < 0) { | 802 } else if (c0_ < 0) { |
| 864 token = Token::EOS; | 803 token = Token::EOS; |
| 865 } else { | 804 } else { |
| 866 token = Select(Token::ILLEGAL); | 805 token = Select(Token::ILLEGAL); |
| 867 } | 806 } |
| (...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 930 } | 869 } |
| 931 return x; | 870 return x; |
| 932 } | 871 } |
| 933 | 872 |
| 934 | 873 |
| 935 void Scanner::ScanEscape() { | 874 void Scanner::ScanEscape() { |
| 936 uc32 c = c0_; | 875 uc32 c = c0_; |
| 937 Advance(); | 876 Advance(); |
| 938 | 877 |
| 939 // Skip escaped newlines. | 878 // Skip escaped newlines. |
| 940 if (kIsLineTerminator.get(c)) { | 879 if (ScannerConstants::kIsLineTerminator.get(c)) { |
| 941 // Allow CR+LF newlines in multiline string literals. | 880 // Allow CR+LF newlines in multiline string literals. |
| 942 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); | 881 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); |
| 943 // Allow LF+CR newlines in multiline string literals. | 882 // Allow LF+CR newlines in multiline string literals. |
| 944 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); | 883 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); |
| 945 return; | 884 return; |
| 946 } | 885 } |
| 947 | 886 |
| 948 switch (c) { | 887 switch (c) { |
| 949 case '\'': // fall through | 888 case '\'': // fall through |
| 950 case '"' : // fall through | 889 case '"' : // fall through |
| (...skipping 12 matching lines...) Expand all Loading... |
| 963 case '3' : // fall through | 902 case '3' : // fall through |
| 964 case '4' : // fall through | 903 case '4' : // fall through |
| 965 case '5' : // fall through | 904 case '5' : // fall through |
| 966 case '6' : // fall through | 905 case '6' : // fall through |
| 967 case '7' : c = ScanOctalEscape(c, 2); break; | 906 case '7' : c = ScanOctalEscape(c, 2); break; |
| 968 } | 907 } |
| 969 | 908 |
| 970 // According to ECMA-262, 3rd, 7.8.4 (p 18ff) these | 909 // According to ECMA-262, 3rd, 7.8.4 (p 18ff) these |
| 971 // should be illegal, but they are commonly handled | 910 // should be illegal, but they are commonly handled |
| 972 // as non-escaped characters by JS VMs. | 911 // as non-escaped characters by JS VMs. |
| 973 AddChar(c); | 912 AddLiteralChar(c); |
| 974 } | 913 } |
| 975 | 914 |
| 976 | 915 |
| 977 Token::Value Scanner::ScanString() { | 916 Token::Value Scanner::ScanString() { |
| 978 uc32 quote = c0_; | 917 uc32 quote = c0_; |
| 979 Advance(); // consume quote | 918 Advance(); // consume quote |
| 980 | 919 |
| 981 LiteralScope literal(this); | 920 LiteralScope literal(this); |
| 982 while (c0_ != quote && c0_ >= 0 && !kIsLineTerminator.get(c0_)) { | 921 while (c0_ != quote && c0_ >= 0 |
| 922 && !ScannerConstants::kIsLineTerminator.get(c0_)) { |
| 983 uc32 c = c0_; | 923 uc32 c = c0_; |
| 984 Advance(); | 924 Advance(); |
| 985 if (c == '\\') { | 925 if (c == '\\') { |
| 986 if (c0_ < 0) return Token::ILLEGAL; | 926 if (c0_ < 0) return Token::ILLEGAL; |
| 987 ScanEscape(); | 927 ScanEscape(); |
| 988 } else { | 928 } else { |
| 989 AddChar(c); | 929 AddLiteralChar(c); |
| 990 } | 930 } |
| 991 } | 931 } |
| 992 if (c0_ != quote) return Token::ILLEGAL; | 932 if (c0_ != quote) return Token::ILLEGAL; |
| 993 literal.Complete(); | 933 literal.Complete(); |
| 994 | 934 |
| 995 Advance(); // consume quote | 935 Advance(); // consume quote |
| 996 return Token::STRING; | 936 return Token::STRING; |
| 997 } | 937 } |
| 998 | 938 |
| 999 | 939 |
| (...skipping 10 matching lines...) Expand all Loading... |
| 1010 return then; | 950 return then; |
| 1011 } else { | 951 } else { |
| 1012 return else_; | 952 return else_; |
| 1013 } | 953 } |
| 1014 } | 954 } |
| 1015 | 955 |
| 1016 | 956 |
| 1017 // Returns true if any decimal digits were scanned, returns false otherwise. | 957 // Returns true if any decimal digits were scanned, returns false otherwise. |
| 1018 void Scanner::ScanDecimalDigits() { | 958 void Scanner::ScanDecimalDigits() { |
| 1019 while (IsDecimalDigit(c0_)) | 959 while (IsDecimalDigit(c0_)) |
| 1020 AddCharAdvance(); | 960 AddLiteralCharAdvance(); |
| 1021 } | 961 } |
| 1022 | 962 |
| 1023 | 963 |
| 1024 Token::Value Scanner::ScanNumber(bool seen_period) { | 964 Token::Value Scanner::ScanNumber(bool seen_period) { |
| 1025 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction | 965 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction |
| 1026 | 966 |
| 1027 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL; | 967 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL; |
| 1028 | 968 |
| 1029 LiteralScope literal(this); | 969 LiteralScope literal(this); |
| 1030 if (seen_period) { | 970 if (seen_period) { |
| 1031 // we have already seen a decimal point of the float | 971 // we have already seen a decimal point of the float |
| 1032 AddChar('.'); | 972 AddLiteralChar('.'); |
| 1033 ScanDecimalDigits(); // we know we have at least one digit | 973 ScanDecimalDigits(); // we know we have at least one digit |
| 1034 | 974 |
| 1035 } else { | 975 } else { |
| 1036 // if the first character is '0' we must check for octals and hex | 976 // if the first character is '0' we must check for octals and hex |
| 1037 if (c0_ == '0') { | 977 if (c0_ == '0') { |
| 1038 AddCharAdvance(); | 978 AddLiteralCharAdvance(); |
| 1039 | 979 |
| 1040 // either 0, 0exxx, 0Exxx, 0.xxx, an octal number, or a hex number | 980 // either 0, 0exxx, 0Exxx, 0.xxx, an octal number, or a hex number |
| 1041 if (c0_ == 'x' || c0_ == 'X') { | 981 if (c0_ == 'x' || c0_ == 'X') { |
| 1042 // hex number | 982 // hex number |
| 1043 kind = HEX; | 983 kind = HEX; |
| 1044 AddCharAdvance(); | 984 AddLiteralCharAdvance(); |
| 1045 if (!IsHexDigit(c0_)) { | 985 if (!IsHexDigit(c0_)) { |
| 1046 // we must have at least one hex digit after 'x'/'X' | 986 // we must have at least one hex digit after 'x'/'X' |
| 1047 return Token::ILLEGAL; | 987 return Token::ILLEGAL; |
| 1048 } | 988 } |
| 1049 while (IsHexDigit(c0_)) { | 989 while (IsHexDigit(c0_)) { |
| 1050 AddCharAdvance(); | 990 AddLiteralCharAdvance(); |
| 1051 } | 991 } |
| 1052 } else if ('0' <= c0_ && c0_ <= '7') { | 992 } else if ('0' <= c0_ && c0_ <= '7') { |
| 1053 // (possible) octal number | 993 // (possible) octal number |
| 1054 kind = OCTAL; | 994 kind = OCTAL; |
| 1055 while (true) { | 995 while (true) { |
| 1056 if (c0_ == '8' || c0_ == '9') { | 996 if (c0_ == '8' || c0_ == '9') { |
| 1057 kind = DECIMAL; | 997 kind = DECIMAL; |
| 1058 break; | 998 break; |
| 1059 } | 999 } |
| 1060 if (c0_ < '0' || '7' < c0_) break; | 1000 if (c0_ < '0' || '7' < c0_) break; |
| 1061 AddCharAdvance(); | 1001 AddLiteralCharAdvance(); |
| 1062 } | 1002 } |
| 1063 } | 1003 } |
| 1064 } | 1004 } |
| 1065 | 1005 |
| 1066 // Parse decimal digits and allow trailing fractional part. | 1006 // Parse decimal digits and allow trailing fractional part. |
| 1067 if (kind == DECIMAL) { | 1007 if (kind == DECIMAL) { |
| 1068 ScanDecimalDigits(); // optional | 1008 ScanDecimalDigits(); // optional |
| 1069 if (c0_ == '.') { | 1009 if (c0_ == '.') { |
| 1070 AddCharAdvance(); | 1010 AddLiteralCharAdvance(); |
| 1071 ScanDecimalDigits(); // optional | 1011 ScanDecimalDigits(); // optional |
| 1072 } | 1012 } |
| 1073 } | 1013 } |
| 1074 } | 1014 } |
| 1075 | 1015 |
| 1076 // scan exponent, if any | 1016 // scan exponent, if any |
| 1077 if (c0_ == 'e' || c0_ == 'E') { | 1017 if (c0_ == 'e' || c0_ == 'E') { |
| 1078 ASSERT(kind != HEX); // 'e'/'E' must be scanned as part of the hex number | 1018 ASSERT(kind != HEX); // 'e'/'E' must be scanned as part of the hex number |
| 1079 if (kind == OCTAL) return Token::ILLEGAL; // no exponent for octals allowed | 1019 if (kind == OCTAL) return Token::ILLEGAL; // no exponent for octals allowed |
| 1080 // scan exponent | 1020 // scan exponent |
| 1081 AddCharAdvance(); | 1021 AddLiteralCharAdvance(); |
| 1082 if (c0_ == '+' || c0_ == '-') | 1022 if (c0_ == '+' || c0_ == '-') |
| 1083 AddCharAdvance(); | 1023 AddLiteralCharAdvance(); |
| 1084 if (!IsDecimalDigit(c0_)) { | 1024 if (!IsDecimalDigit(c0_)) { |
| 1085 // we must have at least one decimal digit after 'e'/'E' | 1025 // we must have at least one decimal digit after 'e'/'E' |
| 1086 return Token::ILLEGAL; | 1026 return Token::ILLEGAL; |
| 1087 } | 1027 } |
| 1088 ScanDecimalDigits(); | 1028 ScanDecimalDigits(); |
| 1089 } | 1029 } |
| 1090 | 1030 |
| 1091 // The source character immediately following a numeric literal must | 1031 // The source character immediately following a numeric literal must |
| 1092 // not be an identifier start or a decimal digit; see ECMA-262 | 1032 // not be an identifier start or a decimal digit; see ECMA-262 |
| 1093 // section 7.8.3, page 17 (note that we read only one decimal digit | 1033 // section 7.8.3, page 17 (note that we read only one decimal digit |
| 1094 // if the value is 0). | 1034 // if the value is 0). |
| 1095 if (IsDecimalDigit(c0_) || kIsIdentifierStart.get(c0_)) | 1035 if (IsDecimalDigit(c0_) || ScannerConstants::kIsIdentifierStart.get(c0_)) |
| 1096 return Token::ILLEGAL; | 1036 return Token::ILLEGAL; |
| 1097 | 1037 |
| 1098 literal.Complete(); | 1038 literal.Complete(); |
| 1099 | 1039 |
| 1100 return Token::NUMBER; | 1040 return Token::NUMBER; |
| 1101 } | 1041 } |
| 1102 | 1042 |
| 1103 | 1043 |
| 1104 uc32 Scanner::ScanIdentifierUnicodeEscape() { | 1044 uc32 Scanner::ScanIdentifierUnicodeEscape() { |
| 1105 Advance(); | 1045 Advance(); |
| 1106 if (c0_ != 'u') return unibrow::Utf8::kBadChar; | 1046 if (c0_ != 'u') return unibrow::Utf8::kBadChar; |
| 1107 Advance(); | 1047 Advance(); |
| 1108 uc32 c = ScanHexEscape('u', 4); | 1048 uc32 c = ScanHexEscape('u', 4); |
| 1109 // We do not allow a unicode escape sequence to start another | 1049 // We do not allow a unicode escape sequence to start another |
| 1110 // unicode escape sequence. | 1050 // unicode escape sequence. |
| 1111 if (c == '\\') return unibrow::Utf8::kBadChar; | 1051 if (c == '\\') return unibrow::Utf8::kBadChar; |
| 1112 return c; | 1052 return c; |
| 1113 } | 1053 } |
| 1114 | 1054 |
| 1115 | 1055 |
| 1116 Token::Value Scanner::ScanIdentifier() { | 1056 Token::Value Scanner::ScanIdentifier() { |
| 1117 ASSERT(kIsIdentifierStart.get(c0_)); | 1057 ASSERT(ScannerConstants::kIsIdentifierStart.get(c0_)); |
| 1118 | 1058 |
| 1119 LiteralScope literal(this); | 1059 LiteralScope literal(this); |
| 1120 KeywordMatcher keyword_match; | 1060 KeywordMatcher keyword_match; |
| 1121 | 1061 |
| 1122 // Scan identifier start character. | 1062 // Scan identifier start character. |
| 1123 if (c0_ == '\\') { | 1063 if (c0_ == '\\') { |
| 1124 uc32 c = ScanIdentifierUnicodeEscape(); | 1064 uc32 c = ScanIdentifierUnicodeEscape(); |
| 1125 // Only allow legal identifier start characters. | 1065 // Only allow legal identifier start characters. |
| 1126 if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL; | 1066 if (!ScannerConstants::kIsIdentifierStart.get(c)) return Token::ILLEGAL; |
| 1127 AddChar(c); | 1067 AddLiteralChar(c); |
| 1128 keyword_match.Fail(); | 1068 keyword_match.Fail(); |
| 1129 } else { | 1069 } else { |
| 1130 AddChar(c0_); | 1070 AddLiteralChar(c0_); |
| 1131 keyword_match.AddChar(c0_); | 1071 keyword_match.AddChar(c0_); |
| 1132 Advance(); | 1072 Advance(); |
| 1133 } | 1073 } |
| 1134 | 1074 |
| 1135 // Scan the rest of the identifier characters. | 1075 // Scan the rest of the identifier characters. |
| 1136 while (kIsIdentifierPart.get(c0_)) { | 1076 while (ScannerConstants::kIsIdentifierPart.get(c0_)) { |
| 1137 if (c0_ == '\\') { | 1077 if (c0_ == '\\') { |
| 1138 uc32 c = ScanIdentifierUnicodeEscape(); | 1078 uc32 c = ScanIdentifierUnicodeEscape(); |
| 1139 // Only allow legal identifier part characters. | 1079 // Only allow legal identifier part characters. |
| 1140 if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL; | 1080 if (!ScannerConstants::kIsIdentifierPart.get(c)) return Token::ILLEGAL; |
| 1141 AddChar(c); | 1081 AddLiteralChar(c); |
| 1142 keyword_match.Fail(); | 1082 keyword_match.Fail(); |
| 1143 } else { | 1083 } else { |
| 1144 AddChar(c0_); | 1084 AddLiteralChar(c0_); |
| 1145 keyword_match.AddChar(c0_); | 1085 keyword_match.AddChar(c0_); |
| 1146 Advance(); | 1086 Advance(); |
| 1147 } | 1087 } |
| 1148 } | 1088 } |
| 1149 literal.Complete(); | 1089 literal.Complete(); |
| 1150 | 1090 |
| 1151 return keyword_match.token(); | 1091 return keyword_match.token(); |
| 1152 } | 1092 } |
| 1153 | 1093 |
| 1154 | 1094 |
| 1155 | 1095 |
| 1156 bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) { | |
| 1157 // Checks whether the buffer contains an identifier (no escape). | |
| 1158 if (!buffer->has_more()) return false; | |
| 1159 if (!kIsIdentifierStart.get(buffer->GetNext())) return false; | |
| 1160 while (buffer->has_more()) { | |
| 1161 if (!kIsIdentifierPart.get(buffer->GetNext())) return false; | |
| 1162 } | |
| 1163 return true; | |
| 1164 } | |
| 1165 | |
| 1166 | |
| 1167 bool Scanner::ScanRegExpPattern(bool seen_equal) { | 1096 bool Scanner::ScanRegExpPattern(bool seen_equal) { |
| 1168 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags | 1097 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags |
| 1169 bool in_character_class = false; | 1098 bool in_character_class = false; |
| 1170 | 1099 |
| 1171 // Previous token is either '/' or '/=', in the second case, the | 1100 // Previous token is either '/' or '/=', in the second case, the |
| 1172 // pattern starts at =. | 1101 // pattern starts at =. |
| 1173 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); | 1102 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); |
| 1174 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); | 1103 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); |
| 1175 | 1104 |
| 1176 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, | 1105 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, |
| 1177 // the scanner should pass uninterpreted bodies to the RegExp | 1106 // the scanner should pass uninterpreted bodies to the RegExp |
| 1178 // constructor. | 1107 // constructor. |
| 1179 LiteralScope literal(this); | 1108 LiteralScope literal(this); |
| 1180 if (seen_equal) | 1109 if (seen_equal) |
| 1181 AddChar('='); | 1110 AddLiteralChar('='); |
| 1182 | 1111 |
| 1183 while (c0_ != '/' || in_character_class) { | 1112 while (c0_ != '/' || in_character_class) { |
| 1184 if (kIsLineTerminator.get(c0_) || c0_ < 0) return false; | 1113 if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false; |
| 1185 if (c0_ == '\\') { // escaped character | 1114 if (c0_ == '\\') { // escaped character |
| 1186 AddCharAdvance(); | 1115 AddLiteralCharAdvance(); |
| 1187 if (kIsLineTerminator.get(c0_) || c0_ < 0) return false; | 1116 if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false; |
| 1188 AddCharAdvance(); | 1117 AddLiteralCharAdvance(); |
| 1189 } else { // unescaped character | 1118 } else { // unescaped character |
| 1190 if (c0_ == '[') in_character_class = true; | 1119 if (c0_ == '[') in_character_class = true; |
| 1191 if (c0_ == ']') in_character_class = false; | 1120 if (c0_ == ']') in_character_class = false; |
| 1192 AddCharAdvance(); | 1121 AddLiteralCharAdvance(); |
| 1193 } | 1122 } |
| 1194 } | 1123 } |
| 1195 Advance(); // consume '/' | 1124 Advance(); // consume '/' |
| 1196 | 1125 |
| 1197 literal.Complete(); | 1126 literal.Complete(); |
| 1198 | 1127 |
| 1199 return true; | 1128 return true; |
| 1200 } | 1129 } |
| 1201 | 1130 |
| 1202 bool Scanner::ScanRegExpFlags() { | 1131 bool Scanner::ScanRegExpFlags() { |
| 1203 // Scan regular expression flags. | 1132 // Scan regular expression flags. |
| 1204 LiteralScope literal(this); | 1133 LiteralScope literal(this); |
| 1205 while (kIsIdentifierPart.get(c0_)) { | 1134 while (ScannerConstants::kIsIdentifierPart.get(c0_)) { |
| 1206 if (c0_ == '\\') { | 1135 if (c0_ == '\\') { |
| 1207 uc32 c = ScanIdentifierUnicodeEscape(); | 1136 uc32 c = ScanIdentifierUnicodeEscape(); |
| 1208 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) { | 1137 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) { |
| 1209 // We allow any escaped character, unlike the restriction on | 1138 // We allow any escaped character, unlike the restriction on |
| 1210 // IdentifierPart when it is used to build an IdentifierName. | 1139 // IdentifierPart when it is used to build an IdentifierName. |
| 1211 AddChar(c); | 1140 AddLiteralChar(c); |
| 1212 continue; | 1141 continue; |
| 1213 } | 1142 } |
| 1214 } | 1143 } |
| 1215 AddCharAdvance(); | 1144 AddLiteralCharAdvance(); |
| 1216 } | 1145 } |
| 1217 literal.Complete(); | 1146 literal.Complete(); |
| 1218 | 1147 |
| 1219 next_.location.end_pos = source_pos() - 1; | 1148 next_.location.end_pos = source_pos() - 1; |
| 1220 return true; | 1149 return true; |
| 1221 } | 1150 } |
| 1222 | 1151 |
| 1223 } } // namespace v8::internal | 1152 } } // namespace v8::internal |
| OLD | NEW |