OLD | NEW |
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Features shared by parsing and pre-parsing scanners. | 5 // Features shared by parsing and pre-parsing scanners. |
6 | 6 |
7 #include "src/parsing/scanner.h" | 7 #include "src/parsing/scanner.h" |
8 | 8 |
9 #include <stdint.h> | 9 #include <stdint.h> |
10 | 10 |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
49 } | 49 } |
50 | 50 |
51 | 51 |
52 void Scanner::Initialize(Utf16CharacterStream* source) { | 52 void Scanner::Initialize(Utf16CharacterStream* source) { |
53 source_ = source; | 53 source_ = source; |
54 // Need to capture identifiers in order to recognize "get" and "set" | 54 // Need to capture identifiers in order to recognize "get" and "set" |
55 // in object literals. | 55 // in object literals. |
56 Init(); | 56 Init(); |
57 // Skip initial whitespace allowing HTML comment ends just like | 57 // Skip initial whitespace allowing HTML comment ends just like |
58 // after a newline and scan first token. | 58 // after a newline and scan first token. |
59 has_line_terminator_before_next_ = true; | 59 has_preceding_line_terminator_ = true; |
| 60 did_see_multiline_comment_ = false; |
60 SkipWhiteSpace(); | 61 SkipWhiteSpace(); |
61 Scan(); | 62 Scan(); |
62 } | 63 } |
63 | 64 |
64 template <bool capture_raw, bool unicode> | 65 template <bool capture_raw, bool unicode> |
65 uc32 Scanner::ScanHexNumber(int expected_length) { | 66 uc32 Scanner::ScanHexNumber(int expected_length) { |
66 DCHECK(expected_length <= 4); // prevent overflow | 67 DCHECK(expected_length <= 4); // prevent overflow |
67 | 68 |
68 int begin = source_pos() - 2; | 69 int begin = source_pos() - 2; |
69 uc32 x = 0; | 70 uc32 x = 0; |
(...skipping 172 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
242 | 243 |
243 Token::Value Scanner::Next() { | 244 Token::Value Scanner::Next() { |
244 if (next_.token == Token::EOS) { | 245 if (next_.token == Token::EOS) { |
245 next_.location.beg_pos = current_.location.beg_pos; | 246 next_.location.beg_pos = current_.location.beg_pos; |
246 next_.location.end_pos = current_.location.end_pos; | 247 next_.location.end_pos = current_.location.end_pos; |
247 } | 248 } |
248 current_ = next_; | 249 current_ = next_; |
249 if (V8_UNLIKELY(next_next_.token != Token::UNINITIALIZED)) { | 250 if (V8_UNLIKELY(next_next_.token != Token::UNINITIALIZED)) { |
250 next_ = next_next_; | 251 next_ = next_next_; |
251 next_next_.token = Token::UNINITIALIZED; | 252 next_next_.token = Token::UNINITIALIZED; |
| 253 has_preceding_line_terminator_ = next_has_preceding_line_terminator_; |
252 return current_.token; | 254 return current_.token; |
253 } | 255 } |
254 has_line_terminator_before_next_ = false; | 256 has_preceding_line_terminator_ = false; |
255 has_multiline_comment_before_next_ = false; | |
256 if (static_cast<unsigned>(c0_) <= 0x7f) { | 257 if (static_cast<unsigned>(c0_) <= 0x7f) { |
257 Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]); | 258 Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]); |
258 if (token != Token::ILLEGAL) { | 259 if (token != Token::ILLEGAL) { |
259 int pos = source_pos(); | 260 int pos = source_pos(); |
260 next_.token = token; | 261 next_.token = token; |
261 next_.location.beg_pos = pos; | 262 next_.location.beg_pos = pos; |
262 next_.location.end_pos = pos + 1; | 263 next_.location.end_pos = pos + 1; |
263 Advance(); | 264 Advance(); |
264 return current_.token; | 265 return current_.token; |
265 } | 266 } |
266 } | 267 } |
267 Scan(); | 268 Scan(); |
268 return current_.token; | 269 return current_.token; |
269 } | 270 } |
270 | 271 |
271 | 272 |
272 Token::Value Scanner::PeekAhead() { | 273 Token::Value Scanner::PeekAhead() { |
273 if (next_next_.token != Token::UNINITIALIZED) { | 274 if (next_next_.token != Token::UNINITIALIZED) { |
274 return next_next_.token; | 275 return next_next_.token; |
275 } | 276 } |
276 TokenDesc prev = current_; | 277 TokenDesc prev = current_; |
| 278 bool has_preceding_line_terminator = has_preceding_line_terminator_; |
277 Next(); | 279 Next(); |
| 280 next_has_preceding_line_terminator_ = has_preceding_line_terminator_; |
| 281 has_preceding_line_terminator_ = has_preceding_line_terminator; |
278 Token::Value ret = next_.token; | 282 Token::Value ret = next_.token; |
279 next_next_ = next_; | 283 next_next_ = next_; |
280 next_ = current_; | 284 next_ = current_; |
281 current_ = prev; | 285 current_ = prev; |
282 return ret; | 286 return ret; |
283 } | 287 } |
284 | 288 |
285 | 289 |
286 // TODO(yangguo): check whether this is actually necessary. | 290 // TODO(yangguo): check whether this is actually necessary. |
287 static inline bool IsLittleEndianByteOrderMark(uc32 c) { | 291 static inline bool IsLittleEndianByteOrderMark(uc32 c) { |
288 // The Unicode value U+FFFE is guaranteed never to be assigned as a | 292 // The Unicode value U+FFFE is guaranteed never to be assigned as a |
289 // Unicode character; this implies that in a Unicode context the | 293 // Unicode character; this implies that in a Unicode context the |
290 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF | 294 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF |
291 // character expressed in little-endian byte order (since it could | 295 // character expressed in little-endian byte order (since it could |
292 // not be a U+FFFE character expressed in big-endian byte | 296 // not be a U+FFFE character expressed in big-endian byte |
293 // order). Nevertheless, we check for it to be compatible with | 297 // order). Nevertheless, we check for it to be compatible with |
294 // Spidermonkey. | 298 // Spidermonkey. |
295 return c == 0xFFFE; | 299 return c == 0xFFFE; |
296 } | 300 } |
297 | 301 |
298 | 302 |
299 bool Scanner::SkipWhiteSpace() { | 303 bool Scanner::SkipWhiteSpace() { |
300 int start_position = source_pos(); | 304 int start_position = source_pos(); |
301 | |
302 while (true) { | 305 while (true) { |
303 while (true) { | 306 while (true) { |
304 // The unicode cache accepts unsigned inputs. | 307 // The unicode cache accepts unsigned inputs. |
305 if (c0_ < 0) break; | 308 if (c0_ < 0) break; |
306 // Advance as long as character is a WhiteSpace or LineTerminator. | 309 // Advance as long as character is a WhiteSpace or LineTerminator. |
307 // Remember if the latter is the case. | 310 // Remember if the latter is the case. |
308 if (unicode_cache_->IsLineTerminator(c0_)) { | 311 if (unicode_cache_->IsLineTerminator(c0_)) { |
309 has_line_terminator_before_next_ = true; | 312 has_preceding_line_terminator_ = true; |
| 313 did_see_multiline_comment_ = false; |
310 } else if (!unicode_cache_->IsWhiteSpace(c0_) && | 314 } else if (!unicode_cache_->IsWhiteSpace(c0_) && |
311 !IsLittleEndianByteOrderMark(c0_)) { | 315 !IsLittleEndianByteOrderMark(c0_)) { |
312 break; | 316 break; |
313 } | 317 } |
314 Advance(); | 318 Advance(); |
315 } | 319 } |
316 | 320 |
317 // If there is an HTML comment end '-->' at the beginning of a | 321 // If there is an HTML comment end '-->' at the beginning of a |
318 // line (with only whitespace in front of it), we treat the rest | 322 // line (with only whitespace in front of it), we treat the rest |
319 // of the line as a comment. This is in line with the way | 323 // of the line as a comment. This is in line with the way |
320 // SpiderMonkey handles it. | 324 // SpiderMonkey handles it. |
321 if (c0_ == '-' && has_line_terminator_before_next_) { | 325 if (c0_ == '-' && has_preceding_line_terminator_ && |
| 326 !did_see_multiline_comment_) { |
322 Advance(); | 327 Advance(); |
323 if (c0_ == '-') { | 328 if (c0_ == '-') { |
324 Advance(); | 329 Advance(); |
325 if (c0_ == '>') { | 330 if (c0_ == '>') { |
326 // Treat the rest of the line as a comment. | 331 // Treat the rest of the line as a comment. |
327 SkipSingleLineComment(); | 332 SkipSingleLineComment(); |
328 // Continue skipping white space after the comment. | 333 // Continue skipping white space after the comment. |
329 continue; | 334 continue; |
330 } | 335 } |
331 PushBack('-'); // undo Advance() | 336 PushBack('-'); // undo Advance() |
(...skipping 86 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
418 Token::Value Scanner::SkipMultiLineComment() { | 423 Token::Value Scanner::SkipMultiLineComment() { |
419 DCHECK(c0_ == '*'); | 424 DCHECK(c0_ == '*'); |
420 Advance(); | 425 Advance(); |
421 | 426 |
422 while (c0_ >= 0) { | 427 while (c0_ >= 0) { |
423 uc32 ch = c0_; | 428 uc32 ch = c0_; |
424 Advance(); | 429 Advance(); |
425 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(ch)) { | 430 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(ch)) { |
426 // Following ECMA-262, section 7.4, a comment containing | 431 // Following ECMA-262, section 7.4, a comment containing |
427 // a newline will make the comment count as a line-terminator. | 432 // a newline will make the comment count as a line-terminator. |
428 has_multiline_comment_before_next_ = true; | 433 has_preceding_line_terminator_ = true; |
| 434 did_see_multiline_comment_ = true; |
429 } | 435 } |
430 // If we have reached the end of the multi-line comment, we | 436 // If we have reached the end of the multi-line comment, we |
431 // consume the '/' and insert a whitespace. This way all | 437 // consume the '/' and insert a whitespace. This way all |
432 // multi-line comments are treated as whitespace. | 438 // multi-line comments are treated as whitespace. |
433 if (ch == '*' && c0_ == '/') { | 439 if (ch == '*' && c0_ == '/') { |
434 c0_ = ' '; | 440 c0_ = ' '; |
435 return Token::WHITESPACE; | 441 return Token::WHITESPACE; |
436 } | 442 } |
437 } | 443 } |
438 | 444 |
(...skipping 17 matching lines...) Expand all Loading... |
456 PushBack('!'); // undo Advance() | 462 PushBack('!'); // undo Advance() |
457 DCHECK(c0_ == '!'); | 463 DCHECK(c0_ == '!'); |
458 return Token::LT; | 464 return Token::LT; |
459 } | 465 } |
460 | 466 |
461 | 467 |
462 void Scanner::Scan() { | 468 void Scanner::Scan() { |
463 next_.literal_chars = NULL; | 469 next_.literal_chars = NULL; |
464 next_.raw_literal_chars = NULL; | 470 next_.raw_literal_chars = NULL; |
465 Token::Value token; | 471 Token::Value token; |
| 472 |
466 do { | 473 do { |
467 // Remember the position of the next token | 474 // Remember the position of the next token |
468 next_.location.beg_pos = source_pos(); | 475 next_.location.beg_pos = source_pos(); |
469 | 476 |
470 switch (c0_) { | 477 switch (c0_) { |
471 case ' ': | 478 case ' ': |
472 case '\t': | 479 case '\t': |
473 Advance(); | 480 Advance(); |
474 token = Token::WHITESPACE; | 481 token = Token::WHITESPACE; |
475 break; | 482 break; |
476 | 483 |
477 case '\n': | 484 case '\n': |
478 Advance(); | 485 Advance(); |
479 has_line_terminator_before_next_ = true; | 486 has_preceding_line_terminator_ = true; |
| 487 did_see_multiline_comment_ = false; |
480 token = Token::WHITESPACE; | 488 token = Token::WHITESPACE; |
481 break; | 489 break; |
482 | 490 |
483 case '"': case '\'': | 491 case '"': case '\'': |
484 token = ScanString(); | 492 token = ScanString(); |
485 break; | 493 break; |
486 | 494 |
487 case '<': | 495 case '<': |
488 // < <= << <<= <!-- | 496 // < <= << <<= <!-- |
489 Advance(); | 497 Advance(); |
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
550 } else { | 558 } else { |
551 token = Token::ADD; | 559 token = Token::ADD; |
552 } | 560 } |
553 break; | 561 break; |
554 | 562 |
555 case '-': | 563 case '-': |
556 // - -- --> -= | 564 // - -- --> -= |
557 Advance(); | 565 Advance(); |
558 if (c0_ == '-') { | 566 if (c0_ == '-') { |
559 Advance(); | 567 Advance(); |
560 if (c0_ == '>' && has_line_terminator_before_next_) { | 568 if (c0_ == '>' && has_preceding_line_terminator_ && |
| 569 !did_see_multiline_comment_) { |
561 // For compatibility with SpiderMonkey, we skip lines that | 570 // For compatibility with SpiderMonkey, we skip lines that |
562 // start with an HTML comment end '-->'. | 571 // start with an HTML comment end '-->'. |
563 token = SkipSingleLineComment(); | 572 token = SkipSingleLineComment(); |
564 } else { | 573 } else { |
565 token = Token::DEC; | 574 token = Token::DEC; |
566 } | 575 } |
567 } else if (c0_ == '=') { | 576 } else if (c0_ == '=') { |
568 token = Select(Token::ASSIGN_SUB); | 577 token = Select(Token::ASSIGN_SUB); |
569 } else { | 578 } else { |
570 token = Token::SUB; | 579 token = Token::SUB; |
(...skipping 165 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
736 int current_pos = source_pos(); | 745 int current_pos = source_pos(); |
737 DCHECK_EQ(next_.location.end_pos, current_pos); | 746 DCHECK_EQ(next_.location.end_pos, current_pos); |
738 // Positions inside the lookahead token aren't supported. | 747 // Positions inside the lookahead token aren't supported. |
739 DCHECK(pos >= current_pos); | 748 DCHECK(pos >= current_pos); |
740 if (pos != current_pos) { | 749 if (pos != current_pos) { |
741 source_->SeekForward(pos - source_->pos()); | 750 source_->SeekForward(pos - source_->pos()); |
742 Advance(); | 751 Advance(); |
743 // This function is only called to seek to the location | 752 // This function is only called to seek to the location |
744 // of the end of a function (at the "}" token). It doesn't matter | 753 // of the end of a function (at the "}" token). It doesn't matter |
745 // whether there was a line terminator in the part we skip. | 754 // whether there was a line terminator in the part we skip. |
746 has_line_terminator_before_next_ = false; | 755 has_preceding_line_terminator_ = false; |
747 has_multiline_comment_before_next_ = false; | |
748 } | 756 } |
749 Scan(); | 757 Scan(); |
750 } | 758 } |
751 | 759 |
752 | 760 |
753 template <bool capture_raw, bool in_template_literal> | 761 template <bool capture_raw, bool in_template_literal> |
754 bool Scanner::ScanEscape() { | 762 bool Scanner::ScanEscape() { |
755 uc32 c = c0_; | 763 uc32 c = c0_; |
756 Advance<capture_raw>(); | 764 Advance<capture_raw>(); |
757 | 765 |
(...skipping 704 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1462 } | 1470 } |
1463 | 1471 |
1464 | 1472 |
1465 const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) { | 1473 const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) { |
1466 if (is_next_literal_one_byte()) { | 1474 if (is_next_literal_one_byte()) { |
1467 return ast_value_factory->GetOneByteString(next_literal_one_byte_string()); | 1475 return ast_value_factory->GetOneByteString(next_literal_one_byte_string()); |
1468 } | 1476 } |
1469 return ast_value_factory->GetTwoByteString(next_literal_two_byte_string()); | 1477 return ast_value_factory->GetTwoByteString(next_literal_two_byte_string()); |
1470 } | 1478 } |
1471 | 1479 |
| 1480 const AstRawString* Scanner::NextNextSymbol( |
| 1481 AstValueFactory* ast_value_factory) { |
| 1482 DCHECK(next_next_.token != Token::UNINITIALIZED); |
| 1483 LiteralBuffer* literal = next_next_.literal_chars; |
| 1484 if (literal->is_one_byte()) { |
| 1485 return ast_value_factory->GetOneByteString(literal->one_byte_literal()); |
| 1486 } |
| 1487 return ast_value_factory->GetTwoByteString(literal->two_byte_literal()); |
| 1488 } |
1472 | 1489 |
1473 const AstRawString* Scanner::CurrentRawSymbol( | 1490 const AstRawString* Scanner::CurrentRawSymbol( |
1474 AstValueFactory* ast_value_factory) { | 1491 AstValueFactory* ast_value_factory) { |
1475 if (is_raw_literal_one_byte()) { | 1492 if (is_raw_literal_one_byte()) { |
1476 return ast_value_factory->GetOneByteString(raw_literal_one_byte_string()); | 1493 return ast_value_factory->GetOneByteString(raw_literal_one_byte_string()); |
1477 } | 1494 } |
1478 return ast_value_factory->GetTwoByteString(raw_literal_two_byte_string()); | 1495 return ast_value_factory->GetTwoByteString(raw_literal_two_byte_string()); |
1479 } | 1496 } |
1480 | 1497 |
1481 | 1498 |
(...skipping 205 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1687 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); | 1704 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); |
1688 } | 1705 } |
1689 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); | 1706 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); |
1690 | 1707 |
1691 backing_store_.AddBlock(bytes); | 1708 backing_store_.AddBlock(bytes); |
1692 return backing_store_.EndSequence().start(); | 1709 return backing_store_.EndSequence().start(); |
1693 } | 1710 } |
1694 | 1711 |
1695 } // namespace internal | 1712 } // namespace internal |
1696 } // namespace v8 | 1713 } // namespace v8 |
OLD | NEW |