| OLD | NEW |
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Features shared by parsing and pre-parsing scanners. | 5 // Features shared by parsing and pre-parsing scanners. |
| 6 | 6 |
| 7 #include "src/parsing/scanner.h" | 7 #include "src/parsing/scanner.h" |
| 8 | 8 |
| 9 #include <stdint.h> | 9 #include <stdint.h> |
| 10 | 10 |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 49 } | 49 } |
| 50 | 50 |
| 51 | 51 |
| 52 void Scanner::Initialize(Utf16CharacterStream* source) { | 52 void Scanner::Initialize(Utf16CharacterStream* source) { |
| 53 source_ = source; | 53 source_ = source; |
| 54 // Need to capture identifiers in order to recognize "get" and "set" | 54 // Need to capture identifiers in order to recognize "get" and "set" |
| 55 // in object literals. | 55 // in object literals. |
| 56 Init(); | 56 Init(); |
| 57 // Skip initial whitespace allowing HTML comment ends just like | 57 // Skip initial whitespace allowing HTML comment ends just like |
| 58 // after a newline and scan first token. | 58 // after a newline and scan first token. |
| 59 has_line_terminator_before_next_ = true; | 59 has_preceding_line_terminator_ = true; |
| 60 did_see_multiline_comment_ = false; |
| 60 SkipWhiteSpace(); | 61 SkipWhiteSpace(); |
| 61 Scan(); | 62 Scan(); |
| 62 } | 63 } |
| 63 | 64 |
| 64 template <bool capture_raw, bool unicode> | 65 template <bool capture_raw, bool unicode> |
| 65 uc32 Scanner::ScanHexNumber(int expected_length) { | 66 uc32 Scanner::ScanHexNumber(int expected_length) { |
| 66 DCHECK(expected_length <= 4); // prevent overflow | 67 DCHECK(expected_length <= 4); // prevent overflow |
| 67 | 68 |
| 68 int begin = source_pos() - 2; | 69 int begin = source_pos() - 2; |
| 69 uc32 x = 0; | 70 uc32 x = 0; |
| (...skipping 172 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 242 | 243 |
| 243 Token::Value Scanner::Next() { | 244 Token::Value Scanner::Next() { |
| 244 if (next_.token == Token::EOS) { | 245 if (next_.token == Token::EOS) { |
| 245 next_.location.beg_pos = current_.location.beg_pos; | 246 next_.location.beg_pos = current_.location.beg_pos; |
| 246 next_.location.end_pos = current_.location.end_pos; | 247 next_.location.end_pos = current_.location.end_pos; |
| 247 } | 248 } |
| 248 current_ = next_; | 249 current_ = next_; |
| 249 if (V8_UNLIKELY(next_next_.token != Token::UNINITIALIZED)) { | 250 if (V8_UNLIKELY(next_next_.token != Token::UNINITIALIZED)) { |
| 250 next_ = next_next_; | 251 next_ = next_next_; |
| 251 next_next_.token = Token::UNINITIALIZED; | 252 next_next_.token = Token::UNINITIALIZED; |
| 253 has_preceding_line_terminator_ = next_has_preceding_line_terminator_; |
| 252 return current_.token; | 254 return current_.token; |
| 253 } | 255 } |
| 254 has_line_terminator_before_next_ = false; | 256 has_preceding_line_terminator_ = false; |
| 255 has_multiline_comment_before_next_ = false; | |
| 256 if (static_cast<unsigned>(c0_) <= 0x7f) { | 257 if (static_cast<unsigned>(c0_) <= 0x7f) { |
| 257 Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]); | 258 Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]); |
| 258 if (token != Token::ILLEGAL) { | 259 if (token != Token::ILLEGAL) { |
| 259 int pos = source_pos(); | 260 int pos = source_pos(); |
| 260 next_.token = token; | 261 next_.token = token; |
| 261 next_.location.beg_pos = pos; | 262 next_.location.beg_pos = pos; |
| 262 next_.location.end_pos = pos + 1; | 263 next_.location.end_pos = pos + 1; |
| 263 Advance(); | 264 Advance(); |
| 264 return current_.token; | 265 return current_.token; |
| 265 } | 266 } |
| 266 } | 267 } |
| 267 Scan(); | 268 Scan(); |
| 268 return current_.token; | 269 return current_.token; |
| 269 } | 270 } |
| 270 | 271 |
| 271 | 272 |
| 272 Token::Value Scanner::PeekAhead() { | 273 Token::Value Scanner::PeekAhead() { |
| 273 if (next_next_.token != Token::UNINITIALIZED) { | 274 if (next_next_.token != Token::UNINITIALIZED) { |
| 274 return next_next_.token; | 275 return next_next_.token; |
| 275 } | 276 } |
| 276 TokenDesc prev = current_; | 277 TokenDesc prev = current_; |
| 278 bool has_preceding_line_terminator = has_preceding_line_terminator_; |
| 277 Next(); | 279 Next(); |
| 280 next_has_preceding_line_terminator_ = has_preceding_line_terminator_; |
| 281 has_preceding_line_terminator_ = has_preceding_line_terminator; |
| 278 Token::Value ret = next_.token; | 282 Token::Value ret = next_.token; |
| 279 next_next_ = next_; | 283 next_next_ = next_; |
| 280 next_ = current_; | 284 next_ = current_; |
| 281 current_ = prev; | 285 current_ = prev; |
| 282 return ret; | 286 return ret; |
| 283 } | 287 } |
| 284 | 288 |
| 285 | 289 |
| 286 // TODO(yangguo): check whether this is actually necessary. | 290 // TODO(yangguo): check whether this is actually necessary. |
| 287 static inline bool IsLittleEndianByteOrderMark(uc32 c) { | 291 static inline bool IsLittleEndianByteOrderMark(uc32 c) { |
| 288 // The Unicode value U+FFFE is guaranteed never to be assigned as a | 292 // The Unicode value U+FFFE is guaranteed never to be assigned as a |
| 289 // Unicode character; this implies that in a Unicode context the | 293 // Unicode character; this implies that in a Unicode context the |
| 290 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF | 294 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF |
| 291 // character expressed in little-endian byte order (since it could | 295 // character expressed in little-endian byte order (since it could |
| 292 // not be a U+FFFE character expressed in big-endian byte | 296 // not be a U+FFFE character expressed in big-endian byte |
| 293 // order). Nevertheless, we check for it to be compatible with | 297 // order). Nevertheless, we check for it to be compatible with |
| 294 // Spidermonkey. | 298 // Spidermonkey. |
| 295 return c == 0xFFFE; | 299 return c == 0xFFFE; |
| 296 } | 300 } |
| 297 | 301 |
| 298 | 302 |
| 299 bool Scanner::SkipWhiteSpace() { | 303 bool Scanner::SkipWhiteSpace() { |
| 300 int start_position = source_pos(); | 304 int start_position = source_pos(); |
| 301 | |
| 302 while (true) { | 305 while (true) { |
| 303 while (true) { | 306 while (true) { |
| 304 // The unicode cache accepts unsigned inputs. | 307 // The unicode cache accepts unsigned inputs. |
| 305 if (c0_ < 0) break; | 308 if (c0_ < 0) break; |
| 306 // Advance as long as character is a WhiteSpace or LineTerminator. | 309 // Advance as long as character is a WhiteSpace or LineTerminator. |
| 307 // Remember if the latter is the case. | 310 // Remember if the latter is the case. |
| 308 if (unicode_cache_->IsLineTerminator(c0_)) { | 311 if (unicode_cache_->IsLineTerminator(c0_)) { |
| 309 has_line_terminator_before_next_ = true; | 312 has_preceding_line_terminator_ = true; |
| 313 did_see_multiline_comment_ = false; |
| 310 } else if (!unicode_cache_->IsWhiteSpace(c0_) && | 314 } else if (!unicode_cache_->IsWhiteSpace(c0_) && |
| 311 !IsLittleEndianByteOrderMark(c0_)) { | 315 !IsLittleEndianByteOrderMark(c0_)) { |
| 312 break; | 316 break; |
| 313 } | 317 } |
| 314 Advance(); | 318 Advance(); |
| 315 } | 319 } |
| 316 | 320 |
| 317 // If there is an HTML comment end '-->' at the beginning of a | 321 // If there is an HTML comment end '-->' at the beginning of a |
| 318 // line (with only whitespace in front of it), we treat the rest | 322 // line (with only whitespace in front of it), we treat the rest |
| 319 // of the line as a comment. This is in line with the way | 323 // of the line as a comment. This is in line with the way |
| 320 // SpiderMonkey handles it. | 324 // SpiderMonkey handles it. |
| 321 if (c0_ == '-' && has_line_terminator_before_next_) { | 325 if (c0_ == '-' && has_preceding_line_terminator_ && |
| 326 !did_see_multiline_comment_) { |
| 322 Advance(); | 327 Advance(); |
| 323 if (c0_ == '-') { | 328 if (c0_ == '-') { |
| 324 Advance(); | 329 Advance(); |
| 325 if (c0_ == '>') { | 330 if (c0_ == '>') { |
| 326 // Treat the rest of the line as a comment. | 331 // Treat the rest of the line as a comment. |
| 327 SkipSingleLineComment(); | 332 SkipSingleLineComment(); |
| 328 // Continue skipping white space after the comment. | 333 // Continue skipping white space after the comment. |
| 329 continue; | 334 continue; |
| 330 } | 335 } |
| 331 PushBack('-'); // undo Advance() | 336 PushBack('-'); // undo Advance() |
| (...skipping 86 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 418 Token::Value Scanner::SkipMultiLineComment() { | 423 Token::Value Scanner::SkipMultiLineComment() { |
| 419 DCHECK(c0_ == '*'); | 424 DCHECK(c0_ == '*'); |
| 420 Advance(); | 425 Advance(); |
| 421 | 426 |
| 422 while (c0_ >= 0) { | 427 while (c0_ >= 0) { |
| 423 uc32 ch = c0_; | 428 uc32 ch = c0_; |
| 424 Advance(); | 429 Advance(); |
| 425 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(ch)) { | 430 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(ch)) { |
| 426 // Following ECMA-262, section 7.4, a comment containing | 431 // Following ECMA-262, section 7.4, a comment containing |
| 427 // a newline will make the comment count as a line-terminator. | 432 // a newline will make the comment count as a line-terminator. |
| 428 has_multiline_comment_before_next_ = true; | 433 has_preceding_line_terminator_ = true; |
| 434 did_see_multiline_comment_ = true; |
| 429 } | 435 } |
| 430 // If we have reached the end of the multi-line comment, we | 436 // If we have reached the end of the multi-line comment, we |
| 431 // consume the '/' and insert a whitespace. This way all | 437 // consume the '/' and insert a whitespace. This way all |
| 432 // multi-line comments are treated as whitespace. | 438 // multi-line comments are treated as whitespace. |
| 433 if (ch == '*' && c0_ == '/') { | 439 if (ch == '*' && c0_ == '/') { |
| 434 c0_ = ' '; | 440 c0_ = ' '; |
| 435 return Token::WHITESPACE; | 441 return Token::WHITESPACE; |
| 436 } | 442 } |
| 437 } | 443 } |
| 438 | 444 |
| (...skipping 17 matching lines...) Expand all Loading... |
| 456 PushBack('!'); // undo Advance() | 462 PushBack('!'); // undo Advance() |
| 457 DCHECK(c0_ == '!'); | 463 DCHECK(c0_ == '!'); |
| 458 return Token::LT; | 464 return Token::LT; |
| 459 } | 465 } |
| 460 | 466 |
| 461 | 467 |
| 462 void Scanner::Scan() { | 468 void Scanner::Scan() { |
| 463 next_.literal_chars = NULL; | 469 next_.literal_chars = NULL; |
| 464 next_.raw_literal_chars = NULL; | 470 next_.raw_literal_chars = NULL; |
| 465 Token::Value token; | 471 Token::Value token; |
| 472 |
| 466 do { | 473 do { |
| 467 // Remember the position of the next token | 474 // Remember the position of the next token |
| 468 next_.location.beg_pos = source_pos(); | 475 next_.location.beg_pos = source_pos(); |
| 469 | 476 |
| 470 switch (c0_) { | 477 switch (c0_) { |
| 471 case ' ': | 478 case ' ': |
| 472 case '\t': | 479 case '\t': |
| 473 Advance(); | 480 Advance(); |
| 474 token = Token::WHITESPACE; | 481 token = Token::WHITESPACE; |
| 475 break; | 482 break; |
| 476 | 483 |
| 477 case '\n': | 484 case '\n': |
| 478 Advance(); | 485 Advance(); |
| 479 has_line_terminator_before_next_ = true; | 486 has_preceding_line_terminator_ = true; |
| 487 did_see_multiline_comment_ = false; |
| 480 token = Token::WHITESPACE; | 488 token = Token::WHITESPACE; |
| 481 break; | 489 break; |
| 482 | 490 |
| 483 case '"': case '\'': | 491 case '"': case '\'': |
| 484 token = ScanString(); | 492 token = ScanString(); |
| 485 break; | 493 break; |
| 486 | 494 |
| 487 case '<': | 495 case '<': |
| 488 // < <= << <<= <!-- | 496 // < <= << <<= <!-- |
| 489 Advance(); | 497 Advance(); |
| (...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 550 } else { | 558 } else { |
| 551 token = Token::ADD; | 559 token = Token::ADD; |
| 552 } | 560 } |
| 553 break; | 561 break; |
| 554 | 562 |
| 555 case '-': | 563 case '-': |
| 556 // - -- --> -= | 564 // - -- --> -= |
| 557 Advance(); | 565 Advance(); |
| 558 if (c0_ == '-') { | 566 if (c0_ == '-') { |
| 559 Advance(); | 567 Advance(); |
| 560 if (c0_ == '>' && has_line_terminator_before_next_) { | 568 if (c0_ == '>' && has_preceding_line_terminator_ && |
| 569 !did_see_multiline_comment_) { |
| 561 // For compatibility with SpiderMonkey, we skip lines that | 570 // For compatibility with SpiderMonkey, we skip lines that |
| 562 // start with an HTML comment end '-->'. | 571 // start with an HTML comment end '-->'. |
| 563 token = SkipSingleLineComment(); | 572 token = SkipSingleLineComment(); |
| 564 } else { | 573 } else { |
| 565 token = Token::DEC; | 574 token = Token::DEC; |
| 566 } | 575 } |
| 567 } else if (c0_ == '=') { | 576 } else if (c0_ == '=') { |
| 568 token = Select(Token::ASSIGN_SUB); | 577 token = Select(Token::ASSIGN_SUB); |
| 569 } else { | 578 } else { |
| 570 token = Token::SUB; | 579 token = Token::SUB; |
| (...skipping 165 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 736 int current_pos = source_pos(); | 745 int current_pos = source_pos(); |
| 737 DCHECK_EQ(next_.location.end_pos, current_pos); | 746 DCHECK_EQ(next_.location.end_pos, current_pos); |
| 738 // Positions inside the lookahead token aren't supported. | 747 // Positions inside the lookahead token aren't supported. |
| 739 DCHECK(pos >= current_pos); | 748 DCHECK(pos >= current_pos); |
| 740 if (pos != current_pos) { | 749 if (pos != current_pos) { |
| 741 source_->SeekForward(pos - source_->pos()); | 750 source_->SeekForward(pos - source_->pos()); |
| 742 Advance(); | 751 Advance(); |
| 743 // This function is only called to seek to the location | 752 // This function is only called to seek to the location |
| 744 // of the end of a function (at the "}" token). It doesn't matter | 753 // of the end of a function (at the "}" token). It doesn't matter |
| 745 // whether there was a line terminator in the part we skip. | 754 // whether there was a line terminator in the part we skip. |
| 746 has_line_terminator_before_next_ = false; | 755 has_preceding_line_terminator_ = false; |
| 747 has_multiline_comment_before_next_ = false; | |
| 748 } | 756 } |
| 749 Scan(); | 757 Scan(); |
| 750 } | 758 } |
| 751 | 759 |
| 752 | 760 |
| 753 template <bool capture_raw, bool in_template_literal> | 761 template <bool capture_raw, bool in_template_literal> |
| 754 bool Scanner::ScanEscape() { | 762 bool Scanner::ScanEscape() { |
| 755 uc32 c = c0_; | 763 uc32 c = c0_; |
| 756 Advance<capture_raw>(); | 764 Advance<capture_raw>(); |
| 757 | 765 |
| (...skipping 704 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1462 } | 1470 } |
| 1463 | 1471 |
| 1464 | 1472 |
| 1465 const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) { | 1473 const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) { |
| 1466 if (is_next_literal_one_byte()) { | 1474 if (is_next_literal_one_byte()) { |
| 1467 return ast_value_factory->GetOneByteString(next_literal_one_byte_string()); | 1475 return ast_value_factory->GetOneByteString(next_literal_one_byte_string()); |
| 1468 } | 1476 } |
| 1469 return ast_value_factory->GetTwoByteString(next_literal_two_byte_string()); | 1477 return ast_value_factory->GetTwoByteString(next_literal_two_byte_string()); |
| 1470 } | 1478 } |
| 1471 | 1479 |
| 1480 const AstRawString* Scanner::NextNextSymbol( |
| 1481 AstValueFactory* ast_value_factory) { |
| 1482 DCHECK(next_next_.token != Token::UNINITIALIZED); |
| 1483 LiteralBuffer* literal = next_next_.literal_chars; |
| 1484 if (literal->is_one_byte()) { |
| 1485 return ast_value_factory->GetOneByteString(literal->one_byte_literal()); |
| 1486 } |
| 1487 return ast_value_factory->GetTwoByteString(literal->two_byte_literal()); |
| 1488 } |
| 1472 | 1489 |
| 1473 const AstRawString* Scanner::CurrentRawSymbol( | 1490 const AstRawString* Scanner::CurrentRawSymbol( |
| 1474 AstValueFactory* ast_value_factory) { | 1491 AstValueFactory* ast_value_factory) { |
| 1475 if (is_raw_literal_one_byte()) { | 1492 if (is_raw_literal_one_byte()) { |
| 1476 return ast_value_factory->GetOneByteString(raw_literal_one_byte_string()); | 1493 return ast_value_factory->GetOneByteString(raw_literal_one_byte_string()); |
| 1477 } | 1494 } |
| 1478 return ast_value_factory->GetTwoByteString(raw_literal_two_byte_string()); | 1495 return ast_value_factory->GetTwoByteString(raw_literal_two_byte_string()); |
| 1479 } | 1496 } |
| 1480 | 1497 |
| 1481 | 1498 |
| (...skipping 205 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1687 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); | 1704 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); |
| 1688 } | 1705 } |
| 1689 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); | 1706 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); |
| 1690 | 1707 |
| 1691 backing_store_.AddBlock(bytes); | 1708 backing_store_.AddBlock(bytes); |
| 1692 return backing_store_.EndSequence().start(); | 1709 return backing_store_.EndSequence().start(); |
| 1693 } | 1710 } |
| 1694 | 1711 |
| 1695 } // namespace internal | 1712 } // namespace internal |
| 1696 } // namespace v8 | 1713 } // namespace v8 |
| OLD | NEW |