Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(203)

Side by Side Diff: src/scanner.cc

Issue 5188006: Push version 2.5.7 to trunk.... (Closed) Base URL: http://v8.googlecode.com/svn/trunk/
Patch Set: Created 10 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/scanner.h ('k') | src/scanner-base.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2010 the V8 project authors. All rights reserved. 1 // Copyright 2010 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 12 matching lines...) Expand all
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 27
28 #include "v8.h" 28 #include "v8.h"
29 29
30 #include "ast.h" 30 #include "ast.h"
31 #include "handles.h" 31 #include "handles.h"
32 #include "scanner.h" 32 #include "scanner.h"
33 #include "unicode-inl.h"
33 34
34 namespace v8 { 35 namespace v8 {
35 namespace internal { 36 namespace internal {
36 37
37 // ---------------------------------------------------------------------------- 38 // ----------------------------------------------------------------------------
38 // Character predicates
39
40
41 unibrow::Predicate<IdentifierStart, 128> Scanner::kIsIdentifierStart;
42 unibrow::Predicate<IdentifierPart, 128> Scanner::kIsIdentifierPart;
43 unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator;
44 unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace;
45
46
47 StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_;
48
49
50 // ----------------------------------------------------------------------------
51 // UTF8Buffer 39 // UTF8Buffer
52 40
53 UTF8Buffer::UTF8Buffer() : buffer_(kInitialCapacity) { } 41 UTF8Buffer::UTF8Buffer() : buffer_(kInitialCapacity), recording_(false) { }
54 42
55 43
56 UTF8Buffer::~UTF8Buffer() {} 44 UTF8Buffer::~UTF8Buffer() {}
57 45
58 46
59 void UTF8Buffer::AddCharSlow(uc32 c) { 47 void UTF8Buffer::AddCharSlow(uc32 c) {
60 ASSERT(static_cast<unsigned>(c) > unibrow::Utf8::kMaxOneByteChar); 48 ASSERT(static_cast<unsigned>(c) > unibrow::Utf8::kMaxOneByteChar);
61 int length = unibrow::Utf8::Length(c); 49 int length = unibrow::Utf8::Length(c);
62 Vector<char> block = buffer_.AddBlock(length, '\0'); 50 Vector<char> block = buffer_.AddBlock(length, '\0');
63 #ifdef DEBUG 51 #ifdef DEBUG
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after
128 } 116 }
129 117
130 118
131 void CharacterStreamUTF16Buffer::SeekForward(int pos) { 119 void CharacterStreamUTF16Buffer::SeekForward(int pos) {
132 pos_ = pos; 120 pos_ = pos;
133 ASSERT(pushback_buffer()->is_empty()); 121 ASSERT(pushback_buffer()->is_empty());
134 stream_->Seek(pos); 122 stream_->Seek(pos);
135 } 123 }
136 124
137 125
138 // ExternalStringUTF16Buffer
139 template <typename StringType, typename CharType>
140 ExternalStringUTF16Buffer<StringType, CharType>::ExternalStringUTF16Buffer()
141 : raw_data_(NULL) { }
142
143
144 template <typename StringType, typename CharType>
145 void ExternalStringUTF16Buffer<StringType, CharType>::Initialize(
146 Handle<StringType> data,
147 int start_position,
148 int end_position) {
149 ASSERT(!data.is_null());
150 raw_data_ = data->resource()->data();
151
152 ASSERT(end_position <= data->length());
153 if (start_position > 0) {
154 SeekForward(start_position);
155 }
156 end_ =
157 end_position != Scanner::kNoEndPosition ? end_position : data->length();
158 }
159
160
161 template <typename StringType, typename CharType>
162 uc32 ExternalStringUTF16Buffer<StringType, CharType>::Advance() {
163 if (pos_ < end_) {
164 return raw_data_[pos_++];
165 } else {
166 // note: currently the following increment is necessary to avoid a
167 // test-parser problem!
168 pos_++;
169 return static_cast<uc32>(-1);
170 }
171 }
172
173
174 template <typename StringType, typename CharType>
175 void ExternalStringUTF16Buffer<StringType, CharType>::PushBack(uc32 ch) {
176 pos_--;
177 ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize);
178 ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch);
179 }
180
181
182 template <typename StringType, typename CharType>
183 void ExternalStringUTF16Buffer<StringType, CharType>::SeekForward(int pos) {
184 pos_ = pos;
185 }
186
187 // ---------------------------------------------------------------------------- 126 // ----------------------------------------------------------------------------
188 // Scanner::LiteralScope 127 // Scanner::LiteralScope
189 128
190 Scanner::LiteralScope::LiteralScope(Scanner* self) 129 Scanner::LiteralScope::LiteralScope(Scanner* self)
191 : scanner_(self), complete_(false) { 130 : scanner_(self), complete_(false) {
192 self->StartLiteral(); 131 self->StartLiteral();
193 } 132 }
194 133
195 134
196 Scanner::LiteralScope::~LiteralScope() { 135 Scanner::LiteralScope::~LiteralScope() {
(...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after
302 } 241 }
303 return current_.token; 242 return current_.token;
304 } 243 }
305 244
306 245
307 void Scanner::StartLiteral() { 246 void Scanner::StartLiteral() {
308 literal_buffer_.StartLiteral(); 247 literal_buffer_.StartLiteral();
309 } 248 }
310 249
311 250
312 void Scanner::AddChar(uc32 c) { 251 void Scanner::AddLiteralChar(uc32 c) {
313 literal_buffer_.AddChar(c); 252 literal_buffer_.AddChar(c);
314 } 253 }
315 254
316 255
317 void Scanner::TerminateLiteral() { 256 void Scanner::TerminateLiteral() {
318 next_.literal_chars = literal_buffer_.EndLiteral(); 257 next_.literal_chars = literal_buffer_.EndLiteral();
319 } 258 }
320 259
321 260
322 void Scanner::DropLiteral() { 261 void Scanner::DropLiteral() {
323 literal_buffer_.DropLiteral(); 262 literal_buffer_.DropLiteral();
324 } 263 }
325 264
326 265
327 void Scanner::AddCharAdvance() { 266 void Scanner::AddLiteralCharAdvance() {
328 AddChar(c0_); 267 AddLiteralChar(c0_);
329 Advance(); 268 Advance();
330 } 269 }
331 270
332 271
333 static inline bool IsByteOrderMark(uc32 c) { 272 static inline bool IsByteOrderMark(uc32 c) {
334 // The Unicode value U+FFFE is guaranteed never to be assigned as a 273 // The Unicode value U+FFFE is guaranteed never to be assigned as a
335 // Unicode character; this implies that in a Unicode context the 274 // Unicode character; this implies that in a Unicode context the
336 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF 275 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF
337 // character expressed in little-endian byte order (since it could 276 // character expressed in little-endian byte order (since it could
338 // not be a U+FFFE character expressed in big-endian byte 277 // not be a U+FFFE character expressed in big-endian byte
(...skipping 12 matching lines...) Expand all
351 return source_pos() != start_position; 290 return source_pos() != start_position;
352 } 291 }
353 292
354 293
355 bool Scanner::SkipJavaScriptWhiteSpace() { 294 bool Scanner::SkipJavaScriptWhiteSpace() {
356 int start_position = source_pos(); 295 int start_position = source_pos();
357 296
358 while (true) { 297 while (true) {
359 // We treat byte-order marks (BOMs) as whitespace for better 298 // We treat byte-order marks (BOMs) as whitespace for better
360 // compatibility with Spidermonkey and other JavaScript engines. 299 // compatibility with Spidermonkey and other JavaScript engines.
361 while (kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) { 300 while (ScannerConstants::kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) {
362 // IsWhiteSpace() includes line terminators! 301 // IsWhiteSpace() includes line terminators!
363 if (kIsLineTerminator.get(c0_)) { 302 if (ScannerConstants::kIsLineTerminator.get(c0_)) {
364 // Ignore line terminators, but remember them. This is necessary 303 // Ignore line terminators, but remember them. This is necessary
365 // for automatic semicolon insertion. 304 // for automatic semicolon insertion.
366 has_line_terminator_before_next_ = true; 305 has_line_terminator_before_next_ = true;
367 } 306 }
368 Advance(); 307 Advance();
369 } 308 }
370 309
371 // If there is an HTML comment end '-->' at the beginning of a 310 // If there is an HTML comment end '-->' at the beginning of a
372 // line (with only whitespace in front of it), we treat the rest 311 // line (with only whitespace in front of it), we treat the rest
373 // of the line as a comment. This is in line with the way 312 // of the line as a comment. This is in line with the way
(...skipping 19 matching lines...) Expand all
393 332
394 333
395 Token::Value Scanner::SkipSingleLineComment() { 334 Token::Value Scanner::SkipSingleLineComment() {
396 Advance(); 335 Advance();
397 336
398 // The line terminator at the end of the line is not considered 337 // The line terminator at the end of the line is not considered
399 // to be part of the single-line comment; it is recognized 338 // to be part of the single-line comment; it is recognized
400 // separately by the lexical grammar and becomes part of the 339 // separately by the lexical grammar and becomes part of the
401 // stream of input elements for the syntactic grammar (see 340 // stream of input elements for the syntactic grammar (see
402 // ECMA-262, section 7.4, page 12). 341 // ECMA-262, section 7.4, page 12).
403 while (c0_ >= 0 && !kIsLineTerminator.get(c0_)) { 342 while (c0_ >= 0 && !ScannerConstants::kIsLineTerminator.get(c0_)) {
404 Advance(); 343 Advance();
405 } 344 }
406 345
407 return Token::WHITESPACE; 346 return Token::WHITESPACE;
408 } 347 }
409 348
410 349
411 Token::Value Scanner::SkipMultiLineComment() { 350 Token::Value Scanner::SkipMultiLineComment() {
412 ASSERT(c0_ == '*'); 351 ASSERT(c0_ == '*');
413 Advance(); 352 Advance();
(...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after
530 469
531 470
532 Token::Value Scanner::ScanJsonString() { 471 Token::Value Scanner::ScanJsonString() {
533 ASSERT_EQ('"', c0_); 472 ASSERT_EQ('"', c0_);
534 Advance(); 473 Advance();
535 LiteralScope literal(this); 474 LiteralScope literal(this);
536 while (c0_ != '"' && c0_ > 0) { 475 while (c0_ != '"' && c0_ > 0) {
537 // Check for control character (0x00-0x1f) or unterminated string (<0). 476 // Check for control character (0x00-0x1f) or unterminated string (<0).
538 if (c0_ < 0x20) return Token::ILLEGAL; 477 if (c0_ < 0x20) return Token::ILLEGAL;
539 if (c0_ != '\\') { 478 if (c0_ != '\\') {
540 AddCharAdvance(); 479 AddLiteralCharAdvance();
541 } else { 480 } else {
542 Advance(); 481 Advance();
543 switch (c0_) { 482 switch (c0_) {
544 case '"': 483 case '"':
545 case '\\': 484 case '\\':
546 case '/': 485 case '/':
547 AddChar(c0_); 486 AddLiteralChar(c0_);
548 break; 487 break;
549 case 'b': 488 case 'b':
550 AddChar('\x08'); 489 AddLiteralChar('\x08');
551 break; 490 break;
552 case 'f': 491 case 'f':
553 AddChar('\x0c'); 492 AddLiteralChar('\x0c');
554 break; 493 break;
555 case 'n': 494 case 'n':
556 AddChar('\x0a'); 495 AddLiteralChar('\x0a');
557 break; 496 break;
558 case 'r': 497 case 'r':
559 AddChar('\x0d'); 498 AddLiteralChar('\x0d');
560 break; 499 break;
561 case 't': 500 case 't':
562 AddChar('\x09'); 501 AddLiteralChar('\x09');
563 break; 502 break;
564 case 'u': { 503 case 'u': {
565 uc32 value = 0; 504 uc32 value = 0;
566 for (int i = 0; i < 4; i++) { 505 for (int i = 0; i < 4; i++) {
567 Advance(); 506 Advance();
568 int digit = HexValue(c0_); 507 int digit = HexValue(c0_);
569 if (digit < 0) { 508 if (digit < 0) {
570 return Token::ILLEGAL; 509 return Token::ILLEGAL;
571 } 510 }
572 value = value * 16 + digit; 511 value = value * 16 + digit;
573 } 512 }
574 AddChar(value); 513 AddLiteralChar(value);
575 break; 514 break;
576 } 515 }
577 default: 516 default:
578 return Token::ILLEGAL; 517 return Token::ILLEGAL;
579 } 518 }
580 Advance(); 519 Advance();
581 } 520 }
582 } 521 }
583 if (c0_ != '"') { 522 if (c0_ != '"') {
584 return Token::ILLEGAL; 523 return Token::ILLEGAL;
585 } 524 }
586 literal.Complete(); 525 literal.Complete();
587 Advance(); 526 Advance();
588 return Token::STRING; 527 return Token::STRING;
589 } 528 }
590 529
591 530
592 Token::Value Scanner::ScanJsonNumber() { 531 Token::Value Scanner::ScanJsonNumber() {
593 LiteralScope literal(this); 532 LiteralScope literal(this);
594 if (c0_ == '-') AddCharAdvance(); 533 if (c0_ == '-') AddLiteralCharAdvance();
595 if (c0_ == '0') { 534 if (c0_ == '0') {
596 AddCharAdvance(); 535 AddLiteralCharAdvance();
597 // Prefix zero is only allowed if it's the only digit before 536 // Prefix zero is only allowed if it's the only digit before
598 // a decimal point or exponent. 537 // a decimal point or exponent.
599 if ('0' <= c0_ && c0_ <= '9') return Token::ILLEGAL; 538 if ('0' <= c0_ && c0_ <= '9') return Token::ILLEGAL;
600 } else { 539 } else {
601 if (c0_ < '1' || c0_ > '9') return Token::ILLEGAL; 540 if (c0_ < '1' || c0_ > '9') return Token::ILLEGAL;
602 do { 541 do {
603 AddCharAdvance(); 542 AddLiteralCharAdvance();
604 } while (c0_ >= '0' && c0_ <= '9'); 543 } while (c0_ >= '0' && c0_ <= '9');
605 } 544 }
606 if (c0_ == '.') { 545 if (c0_ == '.') {
607 AddCharAdvance(); 546 AddLiteralCharAdvance();
608 if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL; 547 if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL;
609 do { 548 do {
610 AddCharAdvance(); 549 AddLiteralCharAdvance();
611 } while (c0_ >= '0' && c0_ <= '9'); 550 } while (c0_ >= '0' && c0_ <= '9');
612 } 551 }
613 if (AsciiAlphaToLower(c0_) == 'e') { 552 if (AsciiAlphaToLower(c0_) == 'e') {
614 AddCharAdvance(); 553 AddLiteralCharAdvance();
615 if (c0_ == '-' || c0_ == '+') AddCharAdvance(); 554 if (c0_ == '-' || c0_ == '+') AddLiteralCharAdvance();
616 if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL; 555 if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL;
617 do { 556 do {
618 AddCharAdvance(); 557 AddLiteralCharAdvance();
619 } while (c0_ >= '0' && c0_ <= '9'); 558 } while (c0_ >= '0' && c0_ <= '9');
620 } 559 }
621 literal.Complete(); 560 literal.Complete();
622 return Token::NUMBER; 561 return Token::NUMBER;
623 } 562 }
624 563
625 564
626 Token::Value Scanner::ScanJsonIdentifier(const char* text, 565 Token::Value Scanner::ScanJsonIdentifier(const char* text,
627 Token::Value token) { 566 Token::Value token) {
628 LiteralScope literal(this); 567 LiteralScope literal(this);
629 while (*text != '\0') { 568 while (*text != '\0') {
630 if (c0_ != *text) return Token::ILLEGAL; 569 if (c0_ != *text) return Token::ILLEGAL;
631 Advance(); 570 Advance();
632 text++; 571 text++;
633 } 572 }
634 if (kIsIdentifierPart.get(c0_)) return Token::ILLEGAL; 573 if (ScannerConstants::kIsIdentifierPart.get(c0_)) return Token::ILLEGAL;
635 literal.Complete(); 574 literal.Complete();
636 return token; 575 return token;
637 } 576 }
638 577
639 578
640 void Scanner::ScanJavaScript() { 579 void Scanner::ScanJavaScript() {
641 next_.literal_chars = Vector<const char>(); 580 next_.literal_chars = Vector<const char>();
642 Token::Value token; 581 Token::Value token;
643 do { 582 do {
644 // Remember the position of the next token 583 // Remember the position of the next token
(...skipping 202 matching lines...) Expand 10 before | Expand all | Expand 10 after
847 786
848 case '?': 787 case '?':
849 token = Select(Token::CONDITIONAL); 788 token = Select(Token::CONDITIONAL);
850 break; 789 break;
851 790
852 case '~': 791 case '~':
853 token = Select(Token::BIT_NOT); 792 token = Select(Token::BIT_NOT);
854 break; 793 break;
855 794
856 default: 795 default:
857 if (kIsIdentifierStart.get(c0_)) { 796 if (ScannerConstants::kIsIdentifierStart.get(c0_)) {
858 token = ScanIdentifier(); 797 token = ScanIdentifier();
859 } else if (IsDecimalDigit(c0_)) { 798 } else if (IsDecimalDigit(c0_)) {
860 token = ScanNumber(false); 799 token = ScanNumber(false);
861 } else if (SkipWhiteSpace()) { 800 } else if (SkipWhiteSpace()) {
862 token = Token::WHITESPACE; 801 token = Token::WHITESPACE;
863 } else if (c0_ < 0) { 802 } else if (c0_ < 0) {
864 token = Token::EOS; 803 token = Token::EOS;
865 } else { 804 } else {
866 token = Select(Token::ILLEGAL); 805 token = Select(Token::ILLEGAL);
867 } 806 }
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after
930 } 869 }
931 return x; 870 return x;
932 } 871 }
933 872
934 873
935 void Scanner::ScanEscape() { 874 void Scanner::ScanEscape() {
936 uc32 c = c0_; 875 uc32 c = c0_;
937 Advance(); 876 Advance();
938 877
939 // Skip escaped newlines. 878 // Skip escaped newlines.
940 if (kIsLineTerminator.get(c)) { 879 if (ScannerConstants::kIsLineTerminator.get(c)) {
941 // Allow CR+LF newlines in multiline string literals. 880 // Allow CR+LF newlines in multiline string literals.
942 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); 881 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();
943 // Allow LF+CR newlines in multiline string literals. 882 // Allow LF+CR newlines in multiline string literals.
944 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); 883 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();
945 return; 884 return;
946 } 885 }
947 886
948 switch (c) { 887 switch (c) {
949 case '\'': // fall through 888 case '\'': // fall through
950 case '"' : // fall through 889 case '"' : // fall through
(...skipping 12 matching lines...) Expand all
963 case '3' : // fall through 902 case '3' : // fall through
964 case '4' : // fall through 903 case '4' : // fall through
965 case '5' : // fall through 904 case '5' : // fall through
966 case '6' : // fall through 905 case '6' : // fall through
967 case '7' : c = ScanOctalEscape(c, 2); break; 906 case '7' : c = ScanOctalEscape(c, 2); break;
968 } 907 }
969 908
970 // According to ECMA-262, 3rd, 7.8.4 (p 18ff) these 909 // According to ECMA-262, 3rd, 7.8.4 (p 18ff) these
971 // should be illegal, but they are commonly handled 910 // should be illegal, but they are commonly handled
972 // as non-escaped characters by JS VMs. 911 // as non-escaped characters by JS VMs.
973 AddChar(c); 912 AddLiteralChar(c);
974 } 913 }
975 914
976 915
977 Token::Value Scanner::ScanString() { 916 Token::Value Scanner::ScanString() {
978 uc32 quote = c0_; 917 uc32 quote = c0_;
979 Advance(); // consume quote 918 Advance(); // consume quote
980 919
981 LiteralScope literal(this); 920 LiteralScope literal(this);
982 while (c0_ != quote && c0_ >= 0 && !kIsLineTerminator.get(c0_)) { 921 while (c0_ != quote && c0_ >= 0
922 && !ScannerConstants::kIsLineTerminator.get(c0_)) {
983 uc32 c = c0_; 923 uc32 c = c0_;
984 Advance(); 924 Advance();
985 if (c == '\\') { 925 if (c == '\\') {
986 if (c0_ < 0) return Token::ILLEGAL; 926 if (c0_ < 0) return Token::ILLEGAL;
987 ScanEscape(); 927 ScanEscape();
988 } else { 928 } else {
989 AddChar(c); 929 AddLiteralChar(c);
990 } 930 }
991 } 931 }
992 if (c0_ != quote) return Token::ILLEGAL; 932 if (c0_ != quote) return Token::ILLEGAL;
993 literal.Complete(); 933 literal.Complete();
994 934
995 Advance(); // consume quote 935 Advance(); // consume quote
996 return Token::STRING; 936 return Token::STRING;
997 } 937 }
998 938
999 939
(...skipping 10 matching lines...) Expand all
1010 return then; 950 return then;
1011 } else { 951 } else {
1012 return else_; 952 return else_;
1013 } 953 }
1014 } 954 }
1015 955
1016 956
1017 // Returns true if any decimal digits were scanned, returns false otherwise. 957 // Returns true if any decimal digits were scanned, returns false otherwise.
1018 void Scanner::ScanDecimalDigits() { 958 void Scanner::ScanDecimalDigits() {
1019 while (IsDecimalDigit(c0_)) 959 while (IsDecimalDigit(c0_))
1020 AddCharAdvance(); 960 AddLiteralCharAdvance();
1021 } 961 }
1022 962
1023 963
1024 Token::Value Scanner::ScanNumber(bool seen_period) { 964 Token::Value Scanner::ScanNumber(bool seen_period) {
1025 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction 965 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction
1026 966
1027 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL; 967 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL;
1028 968
1029 LiteralScope literal(this); 969 LiteralScope literal(this);
1030 if (seen_period) { 970 if (seen_period) {
1031 // we have already seen a decimal point of the float 971 // we have already seen a decimal point of the float
1032 AddChar('.'); 972 AddLiteralChar('.');
1033 ScanDecimalDigits(); // we know we have at least one digit 973 ScanDecimalDigits(); // we know we have at least one digit
1034 974
1035 } else { 975 } else {
1036 // if the first character is '0' we must check for octals and hex 976 // if the first character is '0' we must check for octals and hex
1037 if (c0_ == '0') { 977 if (c0_ == '0') {
1038 AddCharAdvance(); 978 AddLiteralCharAdvance();
1039 979
1040 // either 0, 0exxx, 0Exxx, 0.xxx, an octal number, or a hex number 980 // either 0, 0exxx, 0Exxx, 0.xxx, an octal number, or a hex number
1041 if (c0_ == 'x' || c0_ == 'X') { 981 if (c0_ == 'x' || c0_ == 'X') {
1042 // hex number 982 // hex number
1043 kind = HEX; 983 kind = HEX;
1044 AddCharAdvance(); 984 AddLiteralCharAdvance();
1045 if (!IsHexDigit(c0_)) { 985 if (!IsHexDigit(c0_)) {
1046 // we must have at least one hex digit after 'x'/'X' 986 // we must have at least one hex digit after 'x'/'X'
1047 return Token::ILLEGAL; 987 return Token::ILLEGAL;
1048 } 988 }
1049 while (IsHexDigit(c0_)) { 989 while (IsHexDigit(c0_)) {
1050 AddCharAdvance(); 990 AddLiteralCharAdvance();
1051 } 991 }
1052 } else if ('0' <= c0_ && c0_ <= '7') { 992 } else if ('0' <= c0_ && c0_ <= '7') {
1053 // (possible) octal number 993 // (possible) octal number
1054 kind = OCTAL; 994 kind = OCTAL;
1055 while (true) { 995 while (true) {
1056 if (c0_ == '8' || c0_ == '9') { 996 if (c0_ == '8' || c0_ == '9') {
1057 kind = DECIMAL; 997 kind = DECIMAL;
1058 break; 998 break;
1059 } 999 }
1060 if (c0_ < '0' || '7' < c0_) break; 1000 if (c0_ < '0' || '7' < c0_) break;
1061 AddCharAdvance(); 1001 AddLiteralCharAdvance();
1062 } 1002 }
1063 } 1003 }
1064 } 1004 }
1065 1005
1066 // Parse decimal digits and allow trailing fractional part. 1006 // Parse decimal digits and allow trailing fractional part.
1067 if (kind == DECIMAL) { 1007 if (kind == DECIMAL) {
1068 ScanDecimalDigits(); // optional 1008 ScanDecimalDigits(); // optional
1069 if (c0_ == '.') { 1009 if (c0_ == '.') {
1070 AddCharAdvance(); 1010 AddLiteralCharAdvance();
1071 ScanDecimalDigits(); // optional 1011 ScanDecimalDigits(); // optional
1072 } 1012 }
1073 } 1013 }
1074 } 1014 }
1075 1015
1076 // scan exponent, if any 1016 // scan exponent, if any
1077 if (c0_ == 'e' || c0_ == 'E') { 1017 if (c0_ == 'e' || c0_ == 'E') {
1078 ASSERT(kind != HEX); // 'e'/'E' must be scanned as part of the hex number 1018 ASSERT(kind != HEX); // 'e'/'E' must be scanned as part of the hex number
1079 if (kind == OCTAL) return Token::ILLEGAL; // no exponent for octals allowed 1019 if (kind == OCTAL) return Token::ILLEGAL; // no exponent for octals allowed
1080 // scan exponent 1020 // scan exponent
1081 AddCharAdvance(); 1021 AddLiteralCharAdvance();
1082 if (c0_ == '+' || c0_ == '-') 1022 if (c0_ == '+' || c0_ == '-')
1083 AddCharAdvance(); 1023 AddLiteralCharAdvance();
1084 if (!IsDecimalDigit(c0_)) { 1024 if (!IsDecimalDigit(c0_)) {
1085 // we must have at least one decimal digit after 'e'/'E' 1025 // we must have at least one decimal digit after 'e'/'E'
1086 return Token::ILLEGAL; 1026 return Token::ILLEGAL;
1087 } 1027 }
1088 ScanDecimalDigits(); 1028 ScanDecimalDigits();
1089 } 1029 }
1090 1030
1091 // The source character immediately following a numeric literal must 1031 // The source character immediately following a numeric literal must
1092 // not be an identifier start or a decimal digit; see ECMA-262 1032 // not be an identifier start or a decimal digit; see ECMA-262
1093 // section 7.8.3, page 17 (note that we read only one decimal digit 1033 // section 7.8.3, page 17 (note that we read only one decimal digit
1094 // if the value is 0). 1034 // if the value is 0).
1095 if (IsDecimalDigit(c0_) || kIsIdentifierStart.get(c0_)) 1035 if (IsDecimalDigit(c0_) || ScannerConstants::kIsIdentifierStart.get(c0_))
1096 return Token::ILLEGAL; 1036 return Token::ILLEGAL;
1097 1037
1098 literal.Complete(); 1038 literal.Complete();
1099 1039
1100 return Token::NUMBER; 1040 return Token::NUMBER;
1101 } 1041 }
1102 1042
1103 1043
1104 uc32 Scanner::ScanIdentifierUnicodeEscape() { 1044 uc32 Scanner::ScanIdentifierUnicodeEscape() {
1105 Advance(); 1045 Advance();
1106 if (c0_ != 'u') return unibrow::Utf8::kBadChar; 1046 if (c0_ != 'u') return unibrow::Utf8::kBadChar;
1107 Advance(); 1047 Advance();
1108 uc32 c = ScanHexEscape('u', 4); 1048 uc32 c = ScanHexEscape('u', 4);
1109 // We do not allow a unicode escape sequence to start another 1049 // We do not allow a unicode escape sequence to start another
1110 // unicode escape sequence. 1050 // unicode escape sequence.
1111 if (c == '\\') return unibrow::Utf8::kBadChar; 1051 if (c == '\\') return unibrow::Utf8::kBadChar;
1112 return c; 1052 return c;
1113 } 1053 }
1114 1054
1115 1055
1116 Token::Value Scanner::ScanIdentifier() { 1056 Token::Value Scanner::ScanIdentifier() {
1117 ASSERT(kIsIdentifierStart.get(c0_)); 1057 ASSERT(ScannerConstants::kIsIdentifierStart.get(c0_));
1118 1058
1119 LiteralScope literal(this); 1059 LiteralScope literal(this);
1120 KeywordMatcher keyword_match; 1060 KeywordMatcher keyword_match;
1121 1061
1122 // Scan identifier start character. 1062 // Scan identifier start character.
1123 if (c0_ == '\\') { 1063 if (c0_ == '\\') {
1124 uc32 c = ScanIdentifierUnicodeEscape(); 1064 uc32 c = ScanIdentifierUnicodeEscape();
1125 // Only allow legal identifier start characters. 1065 // Only allow legal identifier start characters.
1126 if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL; 1066 if (!ScannerConstants::kIsIdentifierStart.get(c)) return Token::ILLEGAL;
1127 AddChar(c); 1067 AddLiteralChar(c);
1128 keyword_match.Fail(); 1068 keyword_match.Fail();
1129 } else { 1069 } else {
1130 AddChar(c0_); 1070 AddLiteralChar(c0_);
1131 keyword_match.AddChar(c0_); 1071 keyword_match.AddChar(c0_);
1132 Advance(); 1072 Advance();
1133 } 1073 }
1134 1074
1135 // Scan the rest of the identifier characters. 1075 // Scan the rest of the identifier characters.
1136 while (kIsIdentifierPart.get(c0_)) { 1076 while (ScannerConstants::kIsIdentifierPart.get(c0_)) {
1137 if (c0_ == '\\') { 1077 if (c0_ == '\\') {
1138 uc32 c = ScanIdentifierUnicodeEscape(); 1078 uc32 c = ScanIdentifierUnicodeEscape();
1139 // Only allow legal identifier part characters. 1079 // Only allow legal identifier part characters.
1140 if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL; 1080 if (!ScannerConstants::kIsIdentifierPart.get(c)) return Token::ILLEGAL;
1141 AddChar(c); 1081 AddLiteralChar(c);
1142 keyword_match.Fail(); 1082 keyword_match.Fail();
1143 } else { 1083 } else {
1144 AddChar(c0_); 1084 AddLiteralChar(c0_);
1145 keyword_match.AddChar(c0_); 1085 keyword_match.AddChar(c0_);
1146 Advance(); 1086 Advance();
1147 } 1087 }
1148 } 1088 }
1149 literal.Complete(); 1089 literal.Complete();
1150 1090
1151 return keyword_match.token(); 1091 return keyword_match.token();
1152 } 1092 }
1153 1093
1154 1094
1155 1095
1156 bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) {
1157 // Checks whether the buffer contains an identifier (no escape).
1158 if (!buffer->has_more()) return false;
1159 if (!kIsIdentifierStart.get(buffer->GetNext())) return false;
1160 while (buffer->has_more()) {
1161 if (!kIsIdentifierPart.get(buffer->GetNext())) return false;
1162 }
1163 return true;
1164 }
1165
1166
1167 bool Scanner::ScanRegExpPattern(bool seen_equal) { 1096 bool Scanner::ScanRegExpPattern(bool seen_equal) {
1168 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags 1097 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags
1169 bool in_character_class = false; 1098 bool in_character_class = false;
1170 1099
1171 // Previous token is either '/' or '/=', in the second case, the 1100 // Previous token is either '/' or '/=', in the second case, the
1172 // pattern starts at =. 1101 // pattern starts at =.
1173 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); 1102 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1);
1174 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); 1103 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0);
1175 1104
1176 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, 1105 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,
1177 // the scanner should pass uninterpreted bodies to the RegExp 1106 // the scanner should pass uninterpreted bodies to the RegExp
1178 // constructor. 1107 // constructor.
1179 LiteralScope literal(this); 1108 LiteralScope literal(this);
1180 if (seen_equal) 1109 if (seen_equal)
1181 AddChar('='); 1110 AddLiteralChar('=');
1182 1111
1183 while (c0_ != '/' || in_character_class) { 1112 while (c0_ != '/' || in_character_class) {
1184 if (kIsLineTerminator.get(c0_) || c0_ < 0) return false; 1113 if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false;
1185 if (c0_ == '\\') { // escaped character 1114 if (c0_ == '\\') { // escaped character
1186 AddCharAdvance(); 1115 AddLiteralCharAdvance();
1187 if (kIsLineTerminator.get(c0_) || c0_ < 0) return false; 1116 if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false;
1188 AddCharAdvance(); 1117 AddLiteralCharAdvance();
1189 } else { // unescaped character 1118 } else { // unescaped character
1190 if (c0_ == '[') in_character_class = true; 1119 if (c0_ == '[') in_character_class = true;
1191 if (c0_ == ']') in_character_class = false; 1120 if (c0_ == ']') in_character_class = false;
1192 AddCharAdvance(); 1121 AddLiteralCharAdvance();
1193 } 1122 }
1194 } 1123 }
1195 Advance(); // consume '/' 1124 Advance(); // consume '/'
1196 1125
1197 literal.Complete(); 1126 literal.Complete();
1198 1127
1199 return true; 1128 return true;
1200 } 1129 }
1201 1130
1202 bool Scanner::ScanRegExpFlags() { 1131 bool Scanner::ScanRegExpFlags() {
1203 // Scan regular expression flags. 1132 // Scan regular expression flags.
1204 LiteralScope literal(this); 1133 LiteralScope literal(this);
1205 while (kIsIdentifierPart.get(c0_)) { 1134 while (ScannerConstants::kIsIdentifierPart.get(c0_)) {
1206 if (c0_ == '\\') { 1135 if (c0_ == '\\') {
1207 uc32 c = ScanIdentifierUnicodeEscape(); 1136 uc32 c = ScanIdentifierUnicodeEscape();
1208 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) { 1137 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) {
1209 // We allow any escaped character, unlike the restriction on 1138 // We allow any escaped character, unlike the restriction on
1210 // IdentifierPart when it is used to build an IdentifierName. 1139 // IdentifierPart when it is used to build an IdentifierName.
1211 AddChar(c); 1140 AddLiteralChar(c);
1212 continue; 1141 continue;
1213 } 1142 }
1214 } 1143 }
1215 AddCharAdvance(); 1144 AddLiteralCharAdvance();
1216 } 1145 }
1217 literal.Complete(); 1146 literal.Complete();
1218 1147
1219 next_.location.end_pos = source_pos() - 1; 1148 next_.location.end_pos = source_pos() - 1;
1220 return true; 1149 return true;
1221 } 1150 }
1222 1151
1223 } } // namespace v8::internal 1152 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « src/scanner.h ('k') | src/scanner-base.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698