OLD | NEW |
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Features shared by parsing and pre-parsing scanners. | 5 // Features shared by parsing and pre-parsing scanners. |
6 | 6 |
7 #include "src/parsing/scanner.h" | 7 #include "src/parsing/scanner.h" |
8 | 8 |
9 #include <stdint.h> | 9 #include <stdint.h> |
10 | 10 |
(...skipping 322 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
333 // The Unicode value U+FFFE is guaranteed never to be assigned as a | 333 // The Unicode value U+FFFE is guaranteed never to be assigned as a |
334 // Unicode character; this implies that in a Unicode context the | 334 // Unicode character; this implies that in a Unicode context the |
335 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF | 335 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF |
336 // character expressed in little-endian byte order (since it could | 336 // character expressed in little-endian byte order (since it could |
337 // not be a U+FFFE character expressed in big-endian byte | 337 // not be a U+FFFE character expressed in big-endian byte |
338 // order). Nevertheless, we check for it to be compatible with | 338 // order). Nevertheless, we check for it to be compatible with |
339 // Spidermonkey. | 339 // Spidermonkey. |
340 return c == 0xFFFE; | 340 return c == 0xFFFE; |
341 } | 341 } |
342 | 342 |
343 | |
344 bool Scanner::SkipWhiteSpace() { | 343 bool Scanner::SkipWhiteSpace() { |
345 int start_position = source_pos(); | 344 int start_position = source_pos(); |
346 | 345 |
347 while (true) { | 346 while (true) { |
348 while (true) { | 347 while (true) { |
349 // The unicode cache accepts unsigned inputs. | 348 // Don't skip behind the end of input. |
350 if (c0_ == kEndOfInput) break; | 349 if (c0_ == kEndOfInput) break; |
| 350 |
351 // Advance as long as character is a WhiteSpace or LineTerminator. | 351 // Advance as long as character is a WhiteSpace or LineTerminator. |
352 // Remember if the latter is the case. | 352 // Remember if the latter is the case. |
353 if (unicode_cache_->IsLineTerminator(c0_)) { | 353 if (unicode_cache_->IsLineTerminator(c0_)) { |
354 has_line_terminator_before_next_ = true; | 354 has_line_terminator_before_next_ = true; |
355 } else if (!unicode_cache_->IsWhiteSpace(c0_) && | 355 } else if (!unicode_cache_->IsWhiteSpace(c0_) && |
356 !IsLittleEndianByteOrderMark(c0_)) { | 356 !IsLittleEndianByteOrderMark(c0_)) { |
357 break; | 357 break; |
358 } | 358 } |
359 Advance(); | 359 Advance(); |
360 } | 360 } |
361 | 361 |
362 // If there is an HTML comment end '-->' at the beginning of a | 362 // If there is an HTML comment end '-->' at the beginning of a |
363 // line (with only whitespace in front of it), we treat the rest | 363 // line (with only whitespace in front of it), we treat the rest |
364 // of the line as a comment. This is in line with the way | 364 // of the line as a comment. This is in line with the way |
365 // SpiderMonkey handles it. | 365 // SpiderMonkey handles it. |
366 if (c0_ == '-' && has_line_terminator_before_next_) { | 366 if (c0_ != '-' || !has_line_terminator_before_next_) break; |
367 Advance(); | 367 |
368 if (c0_ == '-') { | 368 Advance(); |
369 Advance(); | 369 if (c0_ != '-') { |
370 if (c0_ == '>') { | |
371 // Treat the rest of the line as a comment. | |
372 SkipSingleLineComment(); | |
373 // Continue skipping white space after the comment. | |
374 continue; | |
375 } | |
376 PushBack('-'); // undo Advance() | |
377 } | |
378 PushBack('-'); // undo Advance() | 370 PushBack('-'); // undo Advance() |
| 371 break; |
379 } | 372 } |
380 // Return whether or not we skipped any characters. | 373 |
381 return source_pos() != start_position; | 374 Advance(); |
| 375 if (c0_ != '>') { |
| 376 PushBack2('-', '-'); // undo 2x Advance(); |
| 377 break; |
| 378 } |
| 379 |
| 380 // Treat the rest of the line as a comment. |
| 381 SkipSingleLineComment(); |
382 } | 382 } |
| 383 |
| 384 // Return whether or not we skipped any characters. |
| 385 return source_pos() != start_position; |
383 } | 386 } |
384 | 387 |
385 | |
386 Token::Value Scanner::SkipSingleLineComment() { | 388 Token::Value Scanner::SkipSingleLineComment() { |
387 Advance(); | 389 Advance(); |
388 | 390 |
389 // The line terminator at the end of the line is not considered | 391 // The line terminator at the end of the line is not considered |
390 // to be part of the single-line comment; it is recognized | 392 // to be part of the single-line comment; it is recognized |
391 // separately by the lexical grammar and becomes part of the | 393 // separately by the lexical grammar and becomes part of the |
392 // stream of input elements for the syntactic grammar (see | 394 // stream of input elements for the syntactic grammar (see |
393 // ECMA-262, section 7.4). | 395 // ECMA-262, section 7.4). |
394 while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) { | 396 while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) { |
395 Advance(); | 397 Advance(); |
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
478 if (ch == '*' && c0_ == '/') { | 480 if (ch == '*' && c0_ == '/') { |
479 c0_ = ' '; | 481 c0_ = ' '; |
480 return Token::WHITESPACE; | 482 return Token::WHITESPACE; |
481 } | 483 } |
482 } | 484 } |
483 | 485 |
484 // Unterminated multi-line comment. | 486 // Unterminated multi-line comment. |
485 return Token::ILLEGAL; | 487 return Token::ILLEGAL; |
486 } | 488 } |
487 | 489 |
488 | |
489 Token::Value Scanner::ScanHtmlComment() { | 490 Token::Value Scanner::ScanHtmlComment() { |
490 // Check for <!-- comments. | 491 // Check for <!-- comments. |
491 DCHECK(c0_ == '!'); | 492 DCHECK(c0_ == '!'); |
492 Advance(); | 493 Advance(); |
493 if (c0_ == '-') { | 494 if (c0_ != '-') { |
494 Advance(); | 495 PushBack('!'); // undo Advance() |
495 if (c0_ == '-') { | 496 return Token::LT; |
496 found_html_comment_ = true; | |
497 return SkipSingleLineComment(); | |
498 } | |
499 PushBack('-'); // undo Advance() | |
500 } | 497 } |
501 PushBack('!'); // undo Advance() | 498 |
502 DCHECK(c0_ == '!'); | 499 Advance(); |
503 return Token::LT; | 500 if (c0_ != '-') { |
| 501 PushBack2('-', '!'); // undo 2x Advance() |
| 502 return Token::LT; |
| 503 } |
| 504 |
| 505 found_html_comment_ = true; |
| 506 return SkipSingleLineComment(); |
504 } | 507 } |
505 | 508 |
506 | |
507 void Scanner::Scan() { | 509 void Scanner::Scan() { |
508 next_.literal_chars = NULL; | 510 next_.literal_chars = NULL; |
509 next_.raw_literal_chars = NULL; | 511 next_.raw_literal_chars = NULL; |
510 Token::Value token; | 512 Token::Value token; |
511 do { | 513 do { |
512 // Remember the position of the next token | 514 // Remember the position of the next token |
513 next_.location.beg_pos = source_pos(); | 515 next_.location.beg_pos = source_pos(); |
514 | 516 |
515 switch (c0_) { | 517 switch (c0_) { |
516 case ' ': | 518 case ' ': |
(...skipping 1117 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1634 // 2, reset the source to the desired position, | 1636 // 2, reset the source to the desired position, |
1635 source_->Seek(position); | 1637 source_->Seek(position); |
1636 // 3, re-scan, by scanning the look-ahead char + 1 token (next_). | 1638 // 3, re-scan, by scanning the look-ahead char + 1 token (next_). |
1637 c0_ = source_->Advance(); | 1639 c0_ = source_->Advance(); |
1638 Next(); | 1640 Next(); |
1639 DCHECK_EQ(next_.location.beg_pos, position); | 1641 DCHECK_EQ(next_.location.beg_pos, position); |
1640 } | 1642 } |
1641 | 1643 |
1642 } // namespace internal | 1644 } // namespace internal |
1643 } // namespace v8 | 1645 } // namespace v8 |
OLD | NEW |