OLD | NEW |
1 // Copyright 2010 the V8 project authors. All rights reserved. | 1 // Copyright 2010 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 12 matching lines...) Expand all Loading... |
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
27 | 27 |
28 #include "v8.h" | 28 #include "v8.h" |
29 | 29 |
30 #include "ast.h" | 30 #include "ast.h" |
31 #include "handles.h" | 31 #include "handles.h" |
32 #include "scanner.h" | 32 #include "scanner.h" |
| 33 #include "unicode-inl.h" |
33 | 34 |
34 namespace v8 { | 35 namespace v8 { |
35 namespace internal { | 36 namespace internal { |
36 | 37 |
37 // ---------------------------------------------------------------------------- | 38 // ---------------------------------------------------------------------------- |
38 // Character predicates | |
39 | |
40 | |
41 unibrow::Predicate<IdentifierStart, 128> Scanner::kIsIdentifierStart; | |
42 unibrow::Predicate<IdentifierPart, 128> Scanner::kIsIdentifierPart; | |
43 unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator; | |
44 unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace; | |
45 | |
46 | |
47 StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_; | |
48 | |
49 | |
50 // ---------------------------------------------------------------------------- | |
51 // UTF8Buffer | 39 // UTF8Buffer |
52 | 40 |
53 UTF8Buffer::UTF8Buffer() : buffer_(kInitialCapacity) { } | 41 UTF8Buffer::UTF8Buffer() : buffer_(kInitialCapacity), recording_(false) { } |
54 | 42 |
55 | 43 |
56 UTF8Buffer::~UTF8Buffer() {} | 44 UTF8Buffer::~UTF8Buffer() {} |
57 | 45 |
58 | 46 |
59 void UTF8Buffer::AddCharSlow(uc32 c) { | 47 void UTF8Buffer::AddCharSlow(uc32 c) { |
60 ASSERT(static_cast<unsigned>(c) > unibrow::Utf8::kMaxOneByteChar); | 48 ASSERT(static_cast<unsigned>(c) > unibrow::Utf8::kMaxOneByteChar); |
61 int length = unibrow::Utf8::Length(c); | 49 int length = unibrow::Utf8::Length(c); |
62 Vector<char> block = buffer_.AddBlock(length, '\0'); | 50 Vector<char> block = buffer_.AddBlock(length, '\0'); |
63 #ifdef DEBUG | 51 #ifdef DEBUG |
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
128 } | 116 } |
129 | 117 |
130 | 118 |
131 void CharacterStreamUTF16Buffer::SeekForward(int pos) { | 119 void CharacterStreamUTF16Buffer::SeekForward(int pos) { |
132 pos_ = pos; | 120 pos_ = pos; |
133 ASSERT(pushback_buffer()->is_empty()); | 121 ASSERT(pushback_buffer()->is_empty()); |
134 stream_->Seek(pos); | 122 stream_->Seek(pos); |
135 } | 123 } |
136 | 124 |
137 | 125 |
138 // ExternalStringUTF16Buffer | |
139 template <typename StringType, typename CharType> | |
140 ExternalStringUTF16Buffer<StringType, CharType>::ExternalStringUTF16Buffer() | |
141 : raw_data_(NULL) { } | |
142 | |
143 | |
144 template <typename StringType, typename CharType> | |
145 void ExternalStringUTF16Buffer<StringType, CharType>::Initialize( | |
146 Handle<StringType> data, | |
147 int start_position, | |
148 int end_position) { | |
149 ASSERT(!data.is_null()); | |
150 raw_data_ = data->resource()->data(); | |
151 | |
152 ASSERT(end_position <= data->length()); | |
153 if (start_position > 0) { | |
154 SeekForward(start_position); | |
155 } | |
156 end_ = | |
157 end_position != Scanner::kNoEndPosition ? end_position : data->length(); | |
158 } | |
159 | |
160 | |
161 template <typename StringType, typename CharType> | |
162 uc32 ExternalStringUTF16Buffer<StringType, CharType>::Advance() { | |
163 if (pos_ < end_) { | |
164 return raw_data_[pos_++]; | |
165 } else { | |
166 // note: currently the following increment is necessary to avoid a | |
167 // test-parser problem! | |
168 pos_++; | |
169 return static_cast<uc32>(-1); | |
170 } | |
171 } | |
172 | |
173 | |
174 template <typename StringType, typename CharType> | |
175 void ExternalStringUTF16Buffer<StringType, CharType>::PushBack(uc32 ch) { | |
176 pos_--; | |
177 ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize); | |
178 ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch); | |
179 } | |
180 | |
181 | |
182 template <typename StringType, typename CharType> | |
183 void ExternalStringUTF16Buffer<StringType, CharType>::SeekForward(int pos) { | |
184 pos_ = pos; | |
185 } | |
186 | |
187 // ---------------------------------------------------------------------------- | 126 // ---------------------------------------------------------------------------- |
188 // Scanner::LiteralScope | 127 // Scanner::LiteralScope |
189 | 128 |
190 Scanner::LiteralScope::LiteralScope(Scanner* self) | 129 Scanner::LiteralScope::LiteralScope(Scanner* self) |
191 : scanner_(self), complete_(false) { | 130 : scanner_(self), complete_(false) { |
192 self->StartLiteral(); | 131 self->StartLiteral(); |
193 } | 132 } |
194 | 133 |
195 | 134 |
196 Scanner::LiteralScope::~LiteralScope() { | 135 Scanner::LiteralScope::~LiteralScope() { |
(...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
302 } | 241 } |
303 return current_.token; | 242 return current_.token; |
304 } | 243 } |
305 | 244 |
306 | 245 |
307 void Scanner::StartLiteral() { | 246 void Scanner::StartLiteral() { |
308 literal_buffer_.StartLiteral(); | 247 literal_buffer_.StartLiteral(); |
309 } | 248 } |
310 | 249 |
311 | 250 |
312 void Scanner::AddChar(uc32 c) { | 251 void Scanner::AddLiteralChar(uc32 c) { |
313 literal_buffer_.AddChar(c); | 252 literal_buffer_.AddChar(c); |
314 } | 253 } |
315 | 254 |
316 | 255 |
317 void Scanner::TerminateLiteral() { | 256 void Scanner::TerminateLiteral() { |
318 next_.literal_chars = literal_buffer_.EndLiteral(); | 257 next_.literal_chars = literal_buffer_.EndLiteral(); |
319 } | 258 } |
320 | 259 |
321 | 260 |
322 void Scanner::DropLiteral() { | 261 void Scanner::DropLiteral() { |
323 literal_buffer_.DropLiteral(); | 262 literal_buffer_.DropLiteral(); |
324 } | 263 } |
325 | 264 |
326 | 265 |
327 void Scanner::AddCharAdvance() { | 266 void Scanner::AddLiteralCharAdvance() { |
328 AddChar(c0_); | 267 AddLiteralChar(c0_); |
329 Advance(); | 268 Advance(); |
330 } | 269 } |
331 | 270 |
332 | 271 |
333 static inline bool IsByteOrderMark(uc32 c) { | 272 static inline bool IsByteOrderMark(uc32 c) { |
334 // The Unicode value U+FFFE is guaranteed never to be assigned as a | 273 // The Unicode value U+FFFE is guaranteed never to be assigned as a |
335 // Unicode character; this implies that in a Unicode context the | 274 // Unicode character; this implies that in a Unicode context the |
336 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF | 275 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF |
337 // character expressed in little-endian byte order (since it could | 276 // character expressed in little-endian byte order (since it could |
338 // not be a U+FFFE character expressed in big-endian byte | 277 // not be a U+FFFE character expressed in big-endian byte |
(...skipping 12 matching lines...) Expand all Loading... |
351 return source_pos() != start_position; | 290 return source_pos() != start_position; |
352 } | 291 } |
353 | 292 |
354 | 293 |
355 bool Scanner::SkipJavaScriptWhiteSpace() { | 294 bool Scanner::SkipJavaScriptWhiteSpace() { |
356 int start_position = source_pos(); | 295 int start_position = source_pos(); |
357 | 296 |
358 while (true) { | 297 while (true) { |
359 // We treat byte-order marks (BOMs) as whitespace for better | 298 // We treat byte-order marks (BOMs) as whitespace for better |
360 // compatibility with Spidermonkey and other JavaScript engines. | 299 // compatibility with Spidermonkey and other JavaScript engines. |
361 while (kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) { | 300 while (ScannerConstants::kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) { |
362 // IsWhiteSpace() includes line terminators! | 301 // IsWhiteSpace() includes line terminators! |
363 if (kIsLineTerminator.get(c0_)) { | 302 if (ScannerConstants::kIsLineTerminator.get(c0_)) { |
364 // Ignore line terminators, but remember them. This is necessary | 303 // Ignore line terminators, but remember them. This is necessary |
365 // for automatic semicolon insertion. | 304 // for automatic semicolon insertion. |
366 has_line_terminator_before_next_ = true; | 305 has_line_terminator_before_next_ = true; |
367 } | 306 } |
368 Advance(); | 307 Advance(); |
369 } | 308 } |
370 | 309 |
371 // If there is an HTML comment end '-->' at the beginning of a | 310 // If there is an HTML comment end '-->' at the beginning of a |
372 // line (with only whitespace in front of it), we treat the rest | 311 // line (with only whitespace in front of it), we treat the rest |
373 // of the line as a comment. This is in line with the way | 312 // of the line as a comment. This is in line with the way |
(...skipping 19 matching lines...) Expand all Loading... |
393 | 332 |
394 | 333 |
395 Token::Value Scanner::SkipSingleLineComment() { | 334 Token::Value Scanner::SkipSingleLineComment() { |
396 Advance(); | 335 Advance(); |
397 | 336 |
398 // The line terminator at the end of the line is not considered | 337 // The line terminator at the end of the line is not considered |
399 // to be part of the single-line comment; it is recognized | 338 // to be part of the single-line comment; it is recognized |
400 // separately by the lexical grammar and becomes part of the | 339 // separately by the lexical grammar and becomes part of the |
401 // stream of input elements for the syntactic grammar (see | 340 // stream of input elements for the syntactic grammar (see |
402 // ECMA-262, section 7.4, page 12). | 341 // ECMA-262, section 7.4, page 12). |
403 while (c0_ >= 0 && !kIsLineTerminator.get(c0_)) { | 342 while (c0_ >= 0 && !ScannerConstants::kIsLineTerminator.get(c0_)) { |
404 Advance(); | 343 Advance(); |
405 } | 344 } |
406 | 345 |
407 return Token::WHITESPACE; | 346 return Token::WHITESPACE; |
408 } | 347 } |
409 | 348 |
410 | 349 |
411 Token::Value Scanner::SkipMultiLineComment() { | 350 Token::Value Scanner::SkipMultiLineComment() { |
412 ASSERT(c0_ == '*'); | 351 ASSERT(c0_ == '*'); |
413 Advance(); | 352 Advance(); |
(...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
530 | 469 |
531 | 470 |
532 Token::Value Scanner::ScanJsonString() { | 471 Token::Value Scanner::ScanJsonString() { |
533 ASSERT_EQ('"', c0_); | 472 ASSERT_EQ('"', c0_); |
534 Advance(); | 473 Advance(); |
535 LiteralScope literal(this); | 474 LiteralScope literal(this); |
536 while (c0_ != '"' && c0_ > 0) { | 475 while (c0_ != '"' && c0_ > 0) { |
537 // Check for control character (0x00-0x1f) or unterminated string (<0). | 476 // Check for control character (0x00-0x1f) or unterminated string (<0). |
538 if (c0_ < 0x20) return Token::ILLEGAL; | 477 if (c0_ < 0x20) return Token::ILLEGAL; |
539 if (c0_ != '\\') { | 478 if (c0_ != '\\') { |
540 AddCharAdvance(); | 479 AddLiteralCharAdvance(); |
541 } else { | 480 } else { |
542 Advance(); | 481 Advance(); |
543 switch (c0_) { | 482 switch (c0_) { |
544 case '"': | 483 case '"': |
545 case '\\': | 484 case '\\': |
546 case '/': | 485 case '/': |
547 AddChar(c0_); | 486 AddLiteralChar(c0_); |
548 break; | 487 break; |
549 case 'b': | 488 case 'b': |
550 AddChar('\x08'); | 489 AddLiteralChar('\x08'); |
551 break; | 490 break; |
552 case 'f': | 491 case 'f': |
553 AddChar('\x0c'); | 492 AddLiteralChar('\x0c'); |
554 break; | 493 break; |
555 case 'n': | 494 case 'n': |
556 AddChar('\x0a'); | 495 AddLiteralChar('\x0a'); |
557 break; | 496 break; |
558 case 'r': | 497 case 'r': |
559 AddChar('\x0d'); | 498 AddLiteralChar('\x0d'); |
560 break; | 499 break; |
561 case 't': | 500 case 't': |
562 AddChar('\x09'); | 501 AddLiteralChar('\x09'); |
563 break; | 502 break; |
564 case 'u': { | 503 case 'u': { |
565 uc32 value = 0; | 504 uc32 value = 0; |
566 for (int i = 0; i < 4; i++) { | 505 for (int i = 0; i < 4; i++) { |
567 Advance(); | 506 Advance(); |
568 int digit = HexValue(c0_); | 507 int digit = HexValue(c0_); |
569 if (digit < 0) { | 508 if (digit < 0) { |
570 return Token::ILLEGAL; | 509 return Token::ILLEGAL; |
571 } | 510 } |
572 value = value * 16 + digit; | 511 value = value * 16 + digit; |
573 } | 512 } |
574 AddChar(value); | 513 AddLiteralChar(value); |
575 break; | 514 break; |
576 } | 515 } |
577 default: | 516 default: |
578 return Token::ILLEGAL; | 517 return Token::ILLEGAL; |
579 } | 518 } |
580 Advance(); | 519 Advance(); |
581 } | 520 } |
582 } | 521 } |
583 if (c0_ != '"') { | 522 if (c0_ != '"') { |
584 return Token::ILLEGAL; | 523 return Token::ILLEGAL; |
585 } | 524 } |
586 literal.Complete(); | 525 literal.Complete(); |
587 Advance(); | 526 Advance(); |
588 return Token::STRING; | 527 return Token::STRING; |
589 } | 528 } |
590 | 529 |
591 | 530 |
592 Token::Value Scanner::ScanJsonNumber() { | 531 Token::Value Scanner::ScanJsonNumber() { |
593 LiteralScope literal(this); | 532 LiteralScope literal(this); |
594 if (c0_ == '-') AddCharAdvance(); | 533 if (c0_ == '-') AddLiteralCharAdvance(); |
595 if (c0_ == '0') { | 534 if (c0_ == '0') { |
596 AddCharAdvance(); | 535 AddLiteralCharAdvance(); |
597 // Prefix zero is only allowed if it's the only digit before | 536 // Prefix zero is only allowed if it's the only digit before |
598 // a decimal point or exponent. | 537 // a decimal point or exponent. |
599 if ('0' <= c0_ && c0_ <= '9') return Token::ILLEGAL; | 538 if ('0' <= c0_ && c0_ <= '9') return Token::ILLEGAL; |
600 } else { | 539 } else { |
601 if (c0_ < '1' || c0_ > '9') return Token::ILLEGAL; | 540 if (c0_ < '1' || c0_ > '9') return Token::ILLEGAL; |
602 do { | 541 do { |
603 AddCharAdvance(); | 542 AddLiteralCharAdvance(); |
604 } while (c0_ >= '0' && c0_ <= '9'); | 543 } while (c0_ >= '0' && c0_ <= '9'); |
605 } | 544 } |
606 if (c0_ == '.') { | 545 if (c0_ == '.') { |
607 AddCharAdvance(); | 546 AddLiteralCharAdvance(); |
608 if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL; | 547 if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL; |
609 do { | 548 do { |
610 AddCharAdvance(); | 549 AddLiteralCharAdvance(); |
611 } while (c0_ >= '0' && c0_ <= '9'); | 550 } while (c0_ >= '0' && c0_ <= '9'); |
612 } | 551 } |
613 if (AsciiAlphaToLower(c0_) == 'e') { | 552 if (AsciiAlphaToLower(c0_) == 'e') { |
614 AddCharAdvance(); | 553 AddLiteralCharAdvance(); |
615 if (c0_ == '-' || c0_ == '+') AddCharAdvance(); | 554 if (c0_ == '-' || c0_ == '+') AddLiteralCharAdvance(); |
616 if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL; | 555 if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL; |
617 do { | 556 do { |
618 AddCharAdvance(); | 557 AddLiteralCharAdvance(); |
619 } while (c0_ >= '0' && c0_ <= '9'); | 558 } while (c0_ >= '0' && c0_ <= '9'); |
620 } | 559 } |
621 literal.Complete(); | 560 literal.Complete(); |
622 return Token::NUMBER; | 561 return Token::NUMBER; |
623 } | 562 } |
624 | 563 |
625 | 564 |
626 Token::Value Scanner::ScanJsonIdentifier(const char* text, | 565 Token::Value Scanner::ScanJsonIdentifier(const char* text, |
627 Token::Value token) { | 566 Token::Value token) { |
628 LiteralScope literal(this); | 567 LiteralScope literal(this); |
629 while (*text != '\0') { | 568 while (*text != '\0') { |
630 if (c0_ != *text) return Token::ILLEGAL; | 569 if (c0_ != *text) return Token::ILLEGAL; |
631 Advance(); | 570 Advance(); |
632 text++; | 571 text++; |
633 } | 572 } |
634 if (kIsIdentifierPart.get(c0_)) return Token::ILLEGAL; | 573 if (ScannerConstants::kIsIdentifierPart.get(c0_)) return Token::ILLEGAL; |
635 literal.Complete(); | 574 literal.Complete(); |
636 return token; | 575 return token; |
637 } | 576 } |
638 | 577 |
639 | 578 |
640 void Scanner::ScanJavaScript() { | 579 void Scanner::ScanJavaScript() { |
641 next_.literal_chars = Vector<const char>(); | 580 next_.literal_chars = Vector<const char>(); |
642 Token::Value token; | 581 Token::Value token; |
643 do { | 582 do { |
644 // Remember the position of the next token | 583 // Remember the position of the next token |
(...skipping 202 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
847 | 786 |
848 case '?': | 787 case '?': |
849 token = Select(Token::CONDITIONAL); | 788 token = Select(Token::CONDITIONAL); |
850 break; | 789 break; |
851 | 790 |
852 case '~': | 791 case '~': |
853 token = Select(Token::BIT_NOT); | 792 token = Select(Token::BIT_NOT); |
854 break; | 793 break; |
855 | 794 |
856 default: | 795 default: |
857 if (kIsIdentifierStart.get(c0_)) { | 796 if (ScannerConstants::kIsIdentifierStart.get(c0_)) { |
858 token = ScanIdentifier(); | 797 token = ScanIdentifier(); |
859 } else if (IsDecimalDigit(c0_)) { | 798 } else if (IsDecimalDigit(c0_)) { |
860 token = ScanNumber(false); | 799 token = ScanNumber(false); |
861 } else if (SkipWhiteSpace()) { | 800 } else if (SkipWhiteSpace()) { |
862 token = Token::WHITESPACE; | 801 token = Token::WHITESPACE; |
863 } else if (c0_ < 0) { | 802 } else if (c0_ < 0) { |
864 token = Token::EOS; | 803 token = Token::EOS; |
865 } else { | 804 } else { |
866 token = Select(Token::ILLEGAL); | 805 token = Select(Token::ILLEGAL); |
867 } | 806 } |
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
930 } | 869 } |
931 return x; | 870 return x; |
932 } | 871 } |
933 | 872 |
934 | 873 |
935 void Scanner::ScanEscape() { | 874 void Scanner::ScanEscape() { |
936 uc32 c = c0_; | 875 uc32 c = c0_; |
937 Advance(); | 876 Advance(); |
938 | 877 |
939 // Skip escaped newlines. | 878 // Skip escaped newlines. |
940 if (kIsLineTerminator.get(c)) { | 879 if (ScannerConstants::kIsLineTerminator.get(c)) { |
941 // Allow CR+LF newlines in multiline string literals. | 880 // Allow CR+LF newlines in multiline string literals. |
942 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); | 881 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); |
943 // Allow LF+CR newlines in multiline string literals. | 882 // Allow LF+CR newlines in multiline string literals. |
944 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); | 883 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); |
945 return; | 884 return; |
946 } | 885 } |
947 | 886 |
948 switch (c) { | 887 switch (c) { |
949 case '\'': // fall through | 888 case '\'': // fall through |
950 case '"' : // fall through | 889 case '"' : // fall through |
(...skipping 12 matching lines...) Expand all Loading... |
963 case '3' : // fall through | 902 case '3' : // fall through |
964 case '4' : // fall through | 903 case '4' : // fall through |
965 case '5' : // fall through | 904 case '5' : // fall through |
966 case '6' : // fall through | 905 case '6' : // fall through |
967 case '7' : c = ScanOctalEscape(c, 2); break; | 906 case '7' : c = ScanOctalEscape(c, 2); break; |
968 } | 907 } |
969 | 908 |
970 // According to ECMA-262, 3rd, 7.8.4 (p 18ff) these | 909 // According to ECMA-262, 3rd, 7.8.4 (p 18ff) these |
971 // should be illegal, but they are commonly handled | 910 // should be illegal, but they are commonly handled |
972 // as non-escaped characters by JS VMs. | 911 // as non-escaped characters by JS VMs. |
973 AddChar(c); | 912 AddLiteralChar(c); |
974 } | 913 } |
975 | 914 |
976 | 915 |
977 Token::Value Scanner::ScanString() { | 916 Token::Value Scanner::ScanString() { |
978 uc32 quote = c0_; | 917 uc32 quote = c0_; |
979 Advance(); // consume quote | 918 Advance(); // consume quote |
980 | 919 |
981 LiteralScope literal(this); | 920 LiteralScope literal(this); |
982 while (c0_ != quote && c0_ >= 0 && !kIsLineTerminator.get(c0_)) { | 921 while (c0_ != quote && c0_ >= 0 |
| 922 && !ScannerConstants::kIsLineTerminator.get(c0_)) { |
983 uc32 c = c0_; | 923 uc32 c = c0_; |
984 Advance(); | 924 Advance(); |
985 if (c == '\\') { | 925 if (c == '\\') { |
986 if (c0_ < 0) return Token::ILLEGAL; | 926 if (c0_ < 0) return Token::ILLEGAL; |
987 ScanEscape(); | 927 ScanEscape(); |
988 } else { | 928 } else { |
989 AddChar(c); | 929 AddLiteralChar(c); |
990 } | 930 } |
991 } | 931 } |
992 if (c0_ != quote) return Token::ILLEGAL; | 932 if (c0_ != quote) return Token::ILLEGAL; |
993 literal.Complete(); | 933 literal.Complete(); |
994 | 934 |
995 Advance(); // consume quote | 935 Advance(); // consume quote |
996 return Token::STRING; | 936 return Token::STRING; |
997 } | 937 } |
998 | 938 |
999 | 939 |
(...skipping 10 matching lines...) Expand all Loading... |
1010 return then; | 950 return then; |
1011 } else { | 951 } else { |
1012 return else_; | 952 return else_; |
1013 } | 953 } |
1014 } | 954 } |
1015 | 955 |
1016 | 956 |
1017 // Returns true if any decimal digits were scanned, returns false otherwise. | 957 // Returns true if any decimal digits were scanned, returns false otherwise. |
1018 void Scanner::ScanDecimalDigits() { | 958 void Scanner::ScanDecimalDigits() { |
1019 while (IsDecimalDigit(c0_)) | 959 while (IsDecimalDigit(c0_)) |
1020 AddCharAdvance(); | 960 AddLiteralCharAdvance(); |
1021 } | 961 } |
1022 | 962 |
1023 | 963 |
1024 Token::Value Scanner::ScanNumber(bool seen_period) { | 964 Token::Value Scanner::ScanNumber(bool seen_period) { |
1025 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction | 965 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction |
1026 | 966 |
1027 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL; | 967 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL; |
1028 | 968 |
1029 LiteralScope literal(this); | 969 LiteralScope literal(this); |
1030 if (seen_period) { | 970 if (seen_period) { |
1031 // we have already seen a decimal point of the float | 971 // we have already seen a decimal point of the float |
1032 AddChar('.'); | 972 AddLiteralChar('.'); |
1033 ScanDecimalDigits(); // we know we have at least one digit | 973 ScanDecimalDigits(); // we know we have at least one digit |
1034 | 974 |
1035 } else { | 975 } else { |
1036 // if the first character is '0' we must check for octals and hex | 976 // if the first character is '0' we must check for octals and hex |
1037 if (c0_ == '0') { | 977 if (c0_ == '0') { |
1038 AddCharAdvance(); | 978 AddLiteralCharAdvance(); |
1039 | 979 |
1040 // either 0, 0exxx, 0Exxx, 0.xxx, an octal number, or a hex number | 980 // either 0, 0exxx, 0Exxx, 0.xxx, an octal number, or a hex number |
1041 if (c0_ == 'x' || c0_ == 'X') { | 981 if (c0_ == 'x' || c0_ == 'X') { |
1042 // hex number | 982 // hex number |
1043 kind = HEX; | 983 kind = HEX; |
1044 AddCharAdvance(); | 984 AddLiteralCharAdvance(); |
1045 if (!IsHexDigit(c0_)) { | 985 if (!IsHexDigit(c0_)) { |
1046 // we must have at least one hex digit after 'x'/'X' | 986 // we must have at least one hex digit after 'x'/'X' |
1047 return Token::ILLEGAL; | 987 return Token::ILLEGAL; |
1048 } | 988 } |
1049 while (IsHexDigit(c0_)) { | 989 while (IsHexDigit(c0_)) { |
1050 AddCharAdvance(); | 990 AddLiteralCharAdvance(); |
1051 } | 991 } |
1052 } else if ('0' <= c0_ && c0_ <= '7') { | 992 } else if ('0' <= c0_ && c0_ <= '7') { |
1053 // (possible) octal number | 993 // (possible) octal number |
1054 kind = OCTAL; | 994 kind = OCTAL; |
1055 while (true) { | 995 while (true) { |
1056 if (c0_ == '8' || c0_ == '9') { | 996 if (c0_ == '8' || c0_ == '9') { |
1057 kind = DECIMAL; | 997 kind = DECIMAL; |
1058 break; | 998 break; |
1059 } | 999 } |
1060 if (c0_ < '0' || '7' < c0_) break; | 1000 if (c0_ < '0' || '7' < c0_) break; |
1061 AddCharAdvance(); | 1001 AddLiteralCharAdvance(); |
1062 } | 1002 } |
1063 } | 1003 } |
1064 } | 1004 } |
1065 | 1005 |
1066 // Parse decimal digits and allow trailing fractional part. | 1006 // Parse decimal digits and allow trailing fractional part. |
1067 if (kind == DECIMAL) { | 1007 if (kind == DECIMAL) { |
1068 ScanDecimalDigits(); // optional | 1008 ScanDecimalDigits(); // optional |
1069 if (c0_ == '.') { | 1009 if (c0_ == '.') { |
1070 AddCharAdvance(); | 1010 AddLiteralCharAdvance(); |
1071 ScanDecimalDigits(); // optional | 1011 ScanDecimalDigits(); // optional |
1072 } | 1012 } |
1073 } | 1013 } |
1074 } | 1014 } |
1075 | 1015 |
1076 // scan exponent, if any | 1016 // scan exponent, if any |
1077 if (c0_ == 'e' || c0_ == 'E') { | 1017 if (c0_ == 'e' || c0_ == 'E') { |
1078 ASSERT(kind != HEX); // 'e'/'E' must be scanned as part of the hex number | 1018 ASSERT(kind != HEX); // 'e'/'E' must be scanned as part of the hex number |
1079 if (kind == OCTAL) return Token::ILLEGAL; // no exponent for octals allowed | 1019 if (kind == OCTAL) return Token::ILLEGAL; // no exponent for octals allowed |
1080 // scan exponent | 1020 // scan exponent |
1081 AddCharAdvance(); | 1021 AddLiteralCharAdvance(); |
1082 if (c0_ == '+' || c0_ == '-') | 1022 if (c0_ == '+' || c0_ == '-') |
1083 AddCharAdvance(); | 1023 AddLiteralCharAdvance(); |
1084 if (!IsDecimalDigit(c0_)) { | 1024 if (!IsDecimalDigit(c0_)) { |
1085 // we must have at least one decimal digit after 'e'/'E' | 1025 // we must have at least one decimal digit after 'e'/'E' |
1086 return Token::ILLEGAL; | 1026 return Token::ILLEGAL; |
1087 } | 1027 } |
1088 ScanDecimalDigits(); | 1028 ScanDecimalDigits(); |
1089 } | 1029 } |
1090 | 1030 |
1091 // The source character immediately following a numeric literal must | 1031 // The source character immediately following a numeric literal must |
1092 // not be an identifier start or a decimal digit; see ECMA-262 | 1032 // not be an identifier start or a decimal digit; see ECMA-262 |
1093 // section 7.8.3, page 17 (note that we read only one decimal digit | 1033 // section 7.8.3, page 17 (note that we read only one decimal digit |
1094 // if the value is 0). | 1034 // if the value is 0). |
1095 if (IsDecimalDigit(c0_) || kIsIdentifierStart.get(c0_)) | 1035 if (IsDecimalDigit(c0_) || ScannerConstants::kIsIdentifierStart.get(c0_)) |
1096 return Token::ILLEGAL; | 1036 return Token::ILLEGAL; |
1097 | 1037 |
1098 literal.Complete(); | 1038 literal.Complete(); |
1099 | 1039 |
1100 return Token::NUMBER; | 1040 return Token::NUMBER; |
1101 } | 1041 } |
1102 | 1042 |
1103 | 1043 |
1104 uc32 Scanner::ScanIdentifierUnicodeEscape() { | 1044 uc32 Scanner::ScanIdentifierUnicodeEscape() { |
1105 Advance(); | 1045 Advance(); |
1106 if (c0_ != 'u') return unibrow::Utf8::kBadChar; | 1046 if (c0_ != 'u') return unibrow::Utf8::kBadChar; |
1107 Advance(); | 1047 Advance(); |
1108 uc32 c = ScanHexEscape('u', 4); | 1048 uc32 c = ScanHexEscape('u', 4); |
1109 // We do not allow a unicode escape sequence to start another | 1049 // We do not allow a unicode escape sequence to start another |
1110 // unicode escape sequence. | 1050 // unicode escape sequence. |
1111 if (c == '\\') return unibrow::Utf8::kBadChar; | 1051 if (c == '\\') return unibrow::Utf8::kBadChar; |
1112 return c; | 1052 return c; |
1113 } | 1053 } |
1114 | 1054 |
1115 | 1055 |
1116 Token::Value Scanner::ScanIdentifier() { | 1056 Token::Value Scanner::ScanIdentifier() { |
1117 ASSERT(kIsIdentifierStart.get(c0_)); | 1057 ASSERT(ScannerConstants::kIsIdentifierStart.get(c0_)); |
1118 | 1058 |
1119 LiteralScope literal(this); | 1059 LiteralScope literal(this); |
1120 KeywordMatcher keyword_match; | 1060 KeywordMatcher keyword_match; |
1121 | 1061 |
1122 // Scan identifier start character. | 1062 // Scan identifier start character. |
1123 if (c0_ == '\\') { | 1063 if (c0_ == '\\') { |
1124 uc32 c = ScanIdentifierUnicodeEscape(); | 1064 uc32 c = ScanIdentifierUnicodeEscape(); |
1125 // Only allow legal identifier start characters. | 1065 // Only allow legal identifier start characters. |
1126 if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL; | 1066 if (!ScannerConstants::kIsIdentifierStart.get(c)) return Token::ILLEGAL; |
1127 AddChar(c); | 1067 AddLiteralChar(c); |
1128 keyword_match.Fail(); | 1068 keyword_match.Fail(); |
1129 } else { | 1069 } else { |
1130 AddChar(c0_); | 1070 AddLiteralChar(c0_); |
1131 keyword_match.AddChar(c0_); | 1071 keyword_match.AddChar(c0_); |
1132 Advance(); | 1072 Advance(); |
1133 } | 1073 } |
1134 | 1074 |
1135 // Scan the rest of the identifier characters. | 1075 // Scan the rest of the identifier characters. |
1136 while (kIsIdentifierPart.get(c0_)) { | 1076 while (ScannerConstants::kIsIdentifierPart.get(c0_)) { |
1137 if (c0_ == '\\') { | 1077 if (c0_ == '\\') { |
1138 uc32 c = ScanIdentifierUnicodeEscape(); | 1078 uc32 c = ScanIdentifierUnicodeEscape(); |
1139 // Only allow legal identifier part characters. | 1079 // Only allow legal identifier part characters. |
1140 if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL; | 1080 if (!ScannerConstants::kIsIdentifierPart.get(c)) return Token::ILLEGAL; |
1141 AddChar(c); | 1081 AddLiteralChar(c); |
1142 keyword_match.Fail(); | 1082 keyword_match.Fail(); |
1143 } else { | 1083 } else { |
1144 AddChar(c0_); | 1084 AddLiteralChar(c0_); |
1145 keyword_match.AddChar(c0_); | 1085 keyword_match.AddChar(c0_); |
1146 Advance(); | 1086 Advance(); |
1147 } | 1087 } |
1148 } | 1088 } |
1149 literal.Complete(); | 1089 literal.Complete(); |
1150 | 1090 |
1151 return keyword_match.token(); | 1091 return keyword_match.token(); |
1152 } | 1092 } |
1153 | 1093 |
1154 | 1094 |
1155 | 1095 |
1156 bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) { | |
1157 // Checks whether the buffer contains an identifier (no escape). | |
1158 if (!buffer->has_more()) return false; | |
1159 if (!kIsIdentifierStart.get(buffer->GetNext())) return false; | |
1160 while (buffer->has_more()) { | |
1161 if (!kIsIdentifierPart.get(buffer->GetNext())) return false; | |
1162 } | |
1163 return true; | |
1164 } | |
1165 | |
1166 | |
1167 bool Scanner::ScanRegExpPattern(bool seen_equal) { | 1096 bool Scanner::ScanRegExpPattern(bool seen_equal) { |
1168 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags | 1097 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags |
1169 bool in_character_class = false; | 1098 bool in_character_class = false; |
1170 | 1099 |
1171 // Previous token is either '/' or '/=', in the second case, the | 1100 // Previous token is either '/' or '/=', in the second case, the |
1172 // pattern starts at =. | 1101 // pattern starts at =. |
1173 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); | 1102 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); |
1174 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); | 1103 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); |
1175 | 1104 |
1176 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, | 1105 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, |
1177 // the scanner should pass uninterpreted bodies to the RegExp | 1106 // the scanner should pass uninterpreted bodies to the RegExp |
1178 // constructor. | 1107 // constructor. |
1179 LiteralScope literal(this); | 1108 LiteralScope literal(this); |
1180 if (seen_equal) | 1109 if (seen_equal) |
1181 AddChar('='); | 1110 AddLiteralChar('='); |
1182 | 1111 |
1183 while (c0_ != '/' || in_character_class) { | 1112 while (c0_ != '/' || in_character_class) { |
1184 if (kIsLineTerminator.get(c0_) || c0_ < 0) return false; | 1113 if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false; |
1185 if (c0_ == '\\') { // escaped character | 1114 if (c0_ == '\\') { // escaped character |
1186 AddCharAdvance(); | 1115 AddLiteralCharAdvance(); |
1187 if (kIsLineTerminator.get(c0_) || c0_ < 0) return false; | 1116 if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false; |
1188 AddCharAdvance(); | 1117 AddLiteralCharAdvance(); |
1189 } else { // unescaped character | 1118 } else { // unescaped character |
1190 if (c0_ == '[') in_character_class = true; | 1119 if (c0_ == '[') in_character_class = true; |
1191 if (c0_ == ']') in_character_class = false; | 1120 if (c0_ == ']') in_character_class = false; |
1192 AddCharAdvance(); | 1121 AddLiteralCharAdvance(); |
1193 } | 1122 } |
1194 } | 1123 } |
1195 Advance(); // consume '/' | 1124 Advance(); // consume '/' |
1196 | 1125 |
1197 literal.Complete(); | 1126 literal.Complete(); |
1198 | 1127 |
1199 return true; | 1128 return true; |
1200 } | 1129 } |
1201 | 1130 |
1202 bool Scanner::ScanRegExpFlags() { | 1131 bool Scanner::ScanRegExpFlags() { |
1203 // Scan regular expression flags. | 1132 // Scan regular expression flags. |
1204 LiteralScope literal(this); | 1133 LiteralScope literal(this); |
1205 while (kIsIdentifierPart.get(c0_)) { | 1134 while (ScannerConstants::kIsIdentifierPart.get(c0_)) { |
1206 if (c0_ == '\\') { | 1135 if (c0_ == '\\') { |
1207 uc32 c = ScanIdentifierUnicodeEscape(); | 1136 uc32 c = ScanIdentifierUnicodeEscape(); |
1208 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) { | 1137 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) { |
1209 // We allow any escaped character, unlike the restriction on | 1138 // We allow any escaped character, unlike the restriction on |
1210 // IdentifierPart when it is used to build an IdentifierName. | 1139 // IdentifierPart when it is used to build an IdentifierName. |
1211 AddChar(c); | 1140 AddLiteralChar(c); |
1212 continue; | 1141 continue; |
1213 } | 1142 } |
1214 } | 1143 } |
1215 AddCharAdvance(); | 1144 AddLiteralCharAdvance(); |
1216 } | 1145 } |
1217 literal.Complete(); | 1146 literal.Complete(); |
1218 | 1147 |
1219 next_.location.end_pos = source_pos() - 1; | 1148 next_.location.end_pos = source_pos() - 1; |
1220 return true; | 1149 return true; |
1221 } | 1150 } |
1222 | 1151 |
1223 } } // namespace v8::internal | 1152 } } // namespace v8::internal |
OLD | NEW |