OLD | NEW |
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Features shared by parsing and pre-parsing scanners. | 5 // Features shared by parsing and pre-parsing scanners. |
6 | 6 |
7 #include "src/parsing/scanner.h" | 7 #include "src/parsing/scanner.h" |
8 | 8 |
9 #include <stdint.h> | 9 #include <stdint.h> |
10 | 10 |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
49 } | 49 } |
50 | 50 |
51 | 51 |
52 void Scanner::Initialize(Utf16CharacterStream* source) { | 52 void Scanner::Initialize(Utf16CharacterStream* source) { |
53 source_ = source; | 53 source_ = source; |
54 // Need to capture identifiers in order to recognize "get" and "set" | 54 // Need to capture identifiers in order to recognize "get" and "set" |
55 // in object literals. | 55 // in object literals. |
56 Init(); | 56 Init(); |
57 // Skip initial whitespace allowing HTML comment ends just like | 57 // Skip initial whitespace allowing HTML comment ends just like |
58 // after a newline and scan first token. | 58 // after a newline and scan first token. |
59 has_line_terminator_before_next_ = true; | 59 has_preceding_line_terminator_ = true; |
| 60 has_preceding_multiline_comment_ = false; |
60 SkipWhiteSpace(); | 61 SkipWhiteSpace(); |
61 Scan(); | 62 Scan(); |
62 } | 63 } |
63 | 64 |
64 template <bool capture_raw, bool unicode> | 65 template <bool capture_raw, bool unicode> |
65 uc32 Scanner::ScanHexNumber(int expected_length) { | 66 uc32 Scanner::ScanHexNumber(int expected_length) { |
66 DCHECK(expected_length <= 4); // prevent overflow | 67 DCHECK(expected_length <= 4); // prevent overflow |
67 | 68 |
68 int begin = source_pos() - 2; | 69 int begin = source_pos() - 2; |
69 uc32 x = 0; | 70 uc32 x = 0; |
(...skipping 172 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
242 | 243 |
243 Token::Value Scanner::Next() { | 244 Token::Value Scanner::Next() { |
244 if (next_.token == Token::EOS) { | 245 if (next_.token == Token::EOS) { |
245 next_.location.beg_pos = current_.location.beg_pos; | 246 next_.location.beg_pos = current_.location.beg_pos; |
246 next_.location.end_pos = current_.location.end_pos; | 247 next_.location.end_pos = current_.location.end_pos; |
247 } | 248 } |
248 current_ = next_; | 249 current_ = next_; |
249 if (V8_UNLIKELY(next_next_.token != Token::UNINITIALIZED)) { | 250 if (V8_UNLIKELY(next_next_.token != Token::UNINITIALIZED)) { |
250 next_ = next_next_; | 251 next_ = next_next_; |
251 next_next_.token = Token::UNINITIALIZED; | 252 next_next_.token = Token::UNINITIALIZED; |
| 253 has_preceding_line_terminator_ = next_has_preceding_line_terminator_; |
252 return current_.token; | 254 return current_.token; |
253 } | 255 } |
254 has_line_terminator_before_next_ = false; | 256 has_preceding_line_terminator_ = false; |
255 has_multiline_comment_before_next_ = false; | 257 has_preceding_multiline_comment_ = false; |
256 if (static_cast<unsigned>(c0_) <= 0x7f) { | 258 if (static_cast<unsigned>(c0_) <= 0x7f) { |
257 Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]); | 259 Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]); |
258 if (token != Token::ILLEGAL) { | 260 if (token != Token::ILLEGAL) { |
259 int pos = source_pos(); | 261 int pos = source_pos(); |
260 next_.token = token; | 262 next_.token = token; |
261 next_.location.beg_pos = pos; | 263 next_.location.beg_pos = pos; |
262 next_.location.end_pos = pos + 1; | 264 next_.location.end_pos = pos + 1; |
263 Advance(); | 265 Advance(); |
264 return current_.token; | 266 return current_.token; |
265 } | 267 } |
266 } | 268 } |
267 Scan(); | 269 Scan(); |
268 return current_.token; | 270 return current_.token; |
269 } | 271 } |
270 | 272 |
271 | 273 |
272 Token::Value Scanner::PeekAhead() { | 274 Token::Value Scanner::PeekAhead() { |
273 if (next_next_.token != Token::UNINITIALIZED) { | 275 if (next_next_.token != Token::UNINITIALIZED) { |
274 return next_next_.token; | 276 return next_next_.token; |
275 } | 277 } |
276 TokenDesc prev = current_; | 278 TokenDesc prev = current_; |
| 279 bool has_preceding_line_terminator = |
| 280 has_preceding_line_terminator_ || has_preceding_multiline_comment_; |
277 Next(); | 281 Next(); |
| 282 next_has_preceding_line_terminator_ = |
| 283 has_preceding_line_terminator_ || has_preceding_multiline_comment_; |
| 284 has_preceding_line_terminator_ = has_preceding_line_terminator; |
278 Token::Value ret = next_.token; | 285 Token::Value ret = next_.token; |
279 next_next_ = next_; | 286 next_next_ = next_; |
280 next_ = current_; | 287 next_ = current_; |
281 current_ = prev; | 288 current_ = prev; |
282 return ret; | 289 return ret; |
283 } | 290 } |
284 | 291 |
285 | 292 |
286 // TODO(yangguo): check whether this is actually necessary. | 293 // TODO(yangguo): check whether this is actually necessary. |
287 static inline bool IsLittleEndianByteOrderMark(uc32 c) { | 294 static inline bool IsLittleEndianByteOrderMark(uc32 c) { |
288 // The Unicode value U+FFFE is guaranteed never to be assigned as a | 295 // The Unicode value U+FFFE is guaranteed never to be assigned as a |
289 // Unicode character; this implies that in a Unicode context the | 296 // Unicode character; this implies that in a Unicode context the |
290 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF | 297 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF |
291 // character expressed in little-endian byte order (since it could | 298 // character expressed in little-endian byte order (since it could |
292 // not be a U+FFFE character expressed in big-endian byte | 299 // not be a U+FFFE character expressed in big-endian byte |
293 // order). Nevertheless, we check for it to be compatible with | 300 // order). Nevertheless, we check for it to be compatible with |
294 // Spidermonkey. | 301 // Spidermonkey. |
295 return c == 0xFFFE; | 302 return c == 0xFFFE; |
296 } | 303 } |
297 | 304 |
298 | 305 |
299 bool Scanner::SkipWhiteSpace() { | 306 bool Scanner::SkipWhiteSpace() { |
300 int start_position = source_pos(); | 307 int start_position = source_pos(); |
301 | |
302 while (true) { | 308 while (true) { |
303 while (true) { | 309 while (true) { |
304 // The unicode cache accepts unsigned inputs. | 310 // The unicode cache accepts unsigned inputs. |
305 if (c0_ < 0) break; | 311 if (c0_ < 0) break; |
306 // Advance as long as character is a WhiteSpace or LineTerminator. | 312 // Advance as long as character is a WhiteSpace or LineTerminator. |
307 // Remember if the latter is the case. | 313 // Remember if the latter is the case. |
308 if (unicode_cache_->IsLineTerminator(c0_)) { | 314 if (unicode_cache_->IsLineTerminator(c0_)) { |
309 has_line_terminator_before_next_ = true; | 315 has_preceding_line_terminator_ = true; |
310 } else if (!unicode_cache_->IsWhiteSpace(c0_) && | 316 } else if (!unicode_cache_->IsWhiteSpace(c0_) && |
311 !IsLittleEndianByteOrderMark(c0_)) { | 317 !IsLittleEndianByteOrderMark(c0_)) { |
312 break; | 318 break; |
313 } | 319 } |
314 Advance(); | 320 Advance(); |
315 } | 321 } |
316 | 322 |
317 // If there is an HTML comment end '-->' at the beginning of a | 323 // If there is an HTML comment end '-->' at the beginning of a |
318 // line (with only whitespace in front of it), we treat the rest | 324 // line (with only whitespace in front of it), we treat the rest |
319 // of the line as a comment. This is in line with the way | 325 // of the line as a comment. This is in line with the way |
320 // SpiderMonkey handles it. | 326 // SpiderMonkey handles it. |
321 if (c0_ == '-' && has_line_terminator_before_next_) { | 327 if (c0_ == '-' && has_preceding_line_terminator_) { |
322 Advance(); | 328 Advance(); |
323 if (c0_ == '-') { | 329 if (c0_ == '-') { |
324 Advance(); | 330 Advance(); |
325 if (c0_ == '>') { | 331 if (c0_ == '>') { |
326 // Treat the rest of the line as a comment. | 332 // Treat the rest of the line as a comment. |
327 SkipSingleLineComment(); | 333 SkipSingleLineComment(); |
328 // Continue skipping white space after the comment. | 334 // Continue skipping white space after the comment. |
329 continue; | 335 continue; |
330 } | 336 } |
331 PushBack('-'); // undo Advance() | 337 PushBack('-'); // undo Advance() |
(...skipping 86 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
418 Token::Value Scanner::SkipMultiLineComment() { | 424 Token::Value Scanner::SkipMultiLineComment() { |
419 DCHECK(c0_ == '*'); | 425 DCHECK(c0_ == '*'); |
420 Advance(); | 426 Advance(); |
421 | 427 |
422 while (c0_ >= 0) { | 428 while (c0_ >= 0) { |
423 uc32 ch = c0_; | 429 uc32 ch = c0_; |
424 Advance(); | 430 Advance(); |
425 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(ch)) { | 431 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(ch)) { |
426 // Following ECMA-262, section 7.4, a comment containing | 432 // Following ECMA-262, section 7.4, a comment containing |
427 // a newline will make the comment count as a line-terminator. | 433 // a newline will make the comment count as a line-terminator. |
428 has_multiline_comment_before_next_ = true; | 434 has_preceding_multiline_comment_ = true; |
429 } | 435 } |
430 // If we have reached the end of the multi-line comment, we | 436 // If we have reached the end of the multi-line comment, we |
431 // consume the '/' and insert a whitespace. This way all | 437 // consume the '/' and insert a whitespace. This way all |
432 // multi-line comments are treated as whitespace. | 438 // multi-line comments are treated as whitespace. |
433 if (ch == '*' && c0_ == '/') { | 439 if (ch == '*' && c0_ == '/') { |
434 c0_ = ' '; | 440 c0_ = ' '; |
435 return Token::WHITESPACE; | 441 return Token::WHITESPACE; |
436 } | 442 } |
437 } | 443 } |
438 | 444 |
(...skipping 17 matching lines...) Expand all Loading... |
456 PushBack('!'); // undo Advance() | 462 PushBack('!'); // undo Advance() |
457 DCHECK(c0_ == '!'); | 463 DCHECK(c0_ == '!'); |
458 return Token::LT; | 464 return Token::LT; |
459 } | 465 } |
460 | 466 |
461 | 467 |
462 void Scanner::Scan() { | 468 void Scanner::Scan() { |
463 next_.literal_chars = NULL; | 469 next_.literal_chars = NULL; |
464 next_.raw_literal_chars = NULL; | 470 next_.raw_literal_chars = NULL; |
465 Token::Value token; | 471 Token::Value token; |
| 472 |
466 do { | 473 do { |
467 // Remember the position of the next token | 474 // Remember the position of the next token |
468 next_.location.beg_pos = source_pos(); | 475 next_.location.beg_pos = source_pos(); |
469 | 476 |
470 switch (c0_) { | 477 switch (c0_) { |
471 case ' ': | 478 case ' ': |
472 case '\t': | 479 case '\t': |
473 Advance(); | 480 Advance(); |
474 token = Token::WHITESPACE; | 481 token = Token::WHITESPACE; |
475 break; | 482 break; |
476 | 483 |
477 case '\n': | 484 case '\n': |
478 Advance(); | 485 Advance(); |
479 has_line_terminator_before_next_ = true; | 486 has_preceding_line_terminator_ = true; |
480 token = Token::WHITESPACE; | 487 token = Token::WHITESPACE; |
481 break; | 488 break; |
482 | 489 |
483 case '"': case '\'': | 490 case '"': case '\'': |
484 token = ScanString(); | 491 token = ScanString(); |
485 break; | 492 break; |
486 | 493 |
487 case '<': | 494 case '<': |
488 // < <= << <<= <!-- | 495 // < <= << <<= <!-- |
489 Advance(); | 496 Advance(); |
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
550 } else { | 557 } else { |
551 token = Token::ADD; | 558 token = Token::ADD; |
552 } | 559 } |
553 break; | 560 break; |
554 | 561 |
555 case '-': | 562 case '-': |
556 // - -- --> -= | 563 // - -- --> -= |
557 Advance(); | 564 Advance(); |
558 if (c0_ == '-') { | 565 if (c0_ == '-') { |
559 Advance(); | 566 Advance(); |
560 if (c0_ == '>' && has_line_terminator_before_next_) { | 567 if (c0_ == '>' && has_preceding_line_terminator_) { |
561 // For compatibility with SpiderMonkey, we skip lines that | 568 // For compatibility with SpiderMonkey, we skip lines that |
562 // start with an HTML comment end '-->'. | 569 // start with an HTML comment end '-->'. |
563 token = SkipSingleLineComment(); | 570 token = SkipSingleLineComment(); |
564 } else { | 571 } else { |
565 token = Token::DEC; | 572 token = Token::DEC; |
566 } | 573 } |
567 } else if (c0_ == '=') { | 574 } else if (c0_ == '=') { |
568 token = Select(Token::ASSIGN_SUB); | 575 token = Select(Token::ASSIGN_SUB); |
569 } else { | 576 } else { |
570 token = Token::SUB; | 577 token = Token::SUB; |
(...skipping 165 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
736 int current_pos = source_pos(); | 743 int current_pos = source_pos(); |
737 DCHECK_EQ(next_.location.end_pos, current_pos); | 744 DCHECK_EQ(next_.location.end_pos, current_pos); |
738 // Positions inside the lookahead token aren't supported. | 745 // Positions inside the lookahead token aren't supported. |
739 DCHECK(pos >= current_pos); | 746 DCHECK(pos >= current_pos); |
740 if (pos != current_pos) { | 747 if (pos != current_pos) { |
741 source_->SeekForward(pos - source_->pos()); | 748 source_->SeekForward(pos - source_->pos()); |
742 Advance(); | 749 Advance(); |
743 // This function is only called to seek to the location | 750 // This function is only called to seek to the location |
744 // of the end of a function (at the "}" token). It doesn't matter | 751 // of the end of a function (at the "}" token). It doesn't matter |
745 // whether there was a line terminator in the part we skip. | 752 // whether there was a line terminator in the part we skip. |
746 has_line_terminator_before_next_ = false; | 753 has_preceding_line_terminator_ = false; |
747 has_multiline_comment_before_next_ = false; | 754 has_preceding_multiline_comment_ = false; |
748 } | 755 } |
749 Scan(); | 756 Scan(); |
750 } | 757 } |
751 | 758 |
752 | 759 |
753 template <bool capture_raw, bool in_template_literal> | 760 template <bool capture_raw, bool in_template_literal> |
754 bool Scanner::ScanEscape() { | 761 bool Scanner::ScanEscape() { |
755 uc32 c = c0_; | 762 uc32 c = c0_; |
756 Advance<capture_raw>(); | 763 Advance<capture_raw>(); |
757 | 764 |
(...skipping 704 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1462 } | 1469 } |
1463 | 1470 |
1464 | 1471 |
1465 const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) { | 1472 const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) { |
1466 if (is_next_literal_one_byte()) { | 1473 if (is_next_literal_one_byte()) { |
1467 return ast_value_factory->GetOneByteString(next_literal_one_byte_string()); | 1474 return ast_value_factory->GetOneByteString(next_literal_one_byte_string()); |
1468 } | 1475 } |
1469 return ast_value_factory->GetTwoByteString(next_literal_two_byte_string()); | 1476 return ast_value_factory->GetTwoByteString(next_literal_two_byte_string()); |
1470 } | 1477 } |
1471 | 1478 |
| 1479 const AstRawString* Scanner::NextNextSymbol( |
| 1480 AstValueFactory* ast_value_factory) { |
| 1481 DCHECK(next_next_.token != Token::UNINITIALIZED); |
| 1482 LiteralBuffer* literal = next_next_.literal_chars; |
| 1483 if (literal->is_one_byte()) { |
| 1484 return ast_value_factory->GetOneByteString(literal->one_byte_literal()); |
| 1485 } |
| 1486 return ast_value_factory->GetTwoByteString(literal->two_byte_literal()); |
| 1487 } |
1472 | 1488 |
1473 const AstRawString* Scanner::CurrentRawSymbol( | 1489 const AstRawString* Scanner::CurrentRawSymbol( |
1474 AstValueFactory* ast_value_factory) { | 1490 AstValueFactory* ast_value_factory) { |
1475 if (is_raw_literal_one_byte()) { | 1491 if (is_raw_literal_one_byte()) { |
1476 return ast_value_factory->GetOneByteString(raw_literal_one_byte_string()); | 1492 return ast_value_factory->GetOneByteString(raw_literal_one_byte_string()); |
1477 } | 1493 } |
1478 return ast_value_factory->GetTwoByteString(raw_literal_two_byte_string()); | 1494 return ast_value_factory->GetTwoByteString(raw_literal_two_byte_string()); |
1479 } | 1495 } |
1480 | 1496 |
1481 | 1497 |
(...skipping 205 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1687 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); | 1703 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); |
1688 } | 1704 } |
1689 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); | 1705 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); |
1690 | 1706 |
1691 backing_store_.AddBlock(bytes); | 1707 backing_store_.AddBlock(bytes); |
1692 return backing_store_.EndSequence().start(); | 1708 return backing_store_.EndSequence().start(); |
1693 } | 1709 } |
1694 | 1710 |
1695 } // namespace internal | 1711 } // namespace internal |
1696 } // namespace v8 | 1712 } // namespace v8 |
OLD | NEW |