OLD | NEW |
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 26 matching lines...) Expand all Loading... |
37 #include "hashmap.h" | 37 #include "hashmap.h" |
38 #include "list.h" | 38 #include "list.h" |
39 #include "token.h" | 39 #include "token.h" |
40 #include "unicode-inl.h" | 40 #include "unicode-inl.h" |
41 #include "utils.h" | 41 #include "utils.h" |
42 | 42 |
43 namespace v8 { | 43 namespace v8 { |
44 namespace internal { | 44 namespace internal { |
45 | 45 |
46 | 46 |
| 47 class ParserRecorder; |
| 48 |
| 49 |
47 // Returns the value (0 .. 15) of a hexadecimal character c. | 50 // Returns the value (0 .. 15) of a hexadecimal character c. |
48 // If c is not a legal hexadecimal character, returns a value < 0. | 51 // If c is not a legal hexadecimal character, returns a value < 0. |
49 inline int HexValue(uc32 c) { | 52 inline int HexValue(uc32 c) { |
50 c -= '0'; | 53 c -= '0'; |
51 if (static_cast<unsigned>(c) <= 9) return c; | 54 if (static_cast<unsigned>(c) <= 9) return c; |
52 c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36. | 55 c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36. |
53 if (static_cast<unsigned>(c) <= 5) return c + 10; | 56 if (static_cast<unsigned>(c) <= 5) return c + 10; |
54 return -1; | 57 return -1; |
55 } | 58 } |
56 | 59 |
(...skipping 306 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
363 | 366 |
364 void Initialize(Utf16CharacterStream* source); | 367 void Initialize(Utf16CharacterStream* source); |
365 | 368 |
366 // Returns the next token and advances input. | 369 // Returns the next token and advances input. |
367 Token::Value Next(); | 370 Token::Value Next(); |
368 // Returns the current token again. | 371 // Returns the current token again. |
369 Token::Value current_token() { return current_.token; } | 372 Token::Value current_token() { return current_.token; } |
370 // Returns the location information for the current token | 373 // Returns the location information for the current token |
371 // (the token last returned by Next()). | 374 // (the token last returned by Next()). |
372 Location location() const { return current_.location; } | 375 Location location() const { return current_.location; } |
373 // Returns the literal string, if any, for the current token (the | 376 |
374 // token last returned by Next()). The string is 0-terminated. | 377 // Similar functions for the upcoming token. |
375 // Literal strings are collected for identifiers, strings, and | 378 |
376 // numbers. | 379 // One token look-ahead (past the token returned by Next()). |
377 // These functions only give the correct result if the literal | 380 Token::Value peek() const { return next_.token; } |
378 // was scanned between calls to StartLiteral() and TerminateLiteral(). | 381 |
379 Vector<const char> literal_one_byte_string() { | 382 Location peek_location() const { return next_.location; } |
380 ASSERT_NOT_NULL(current_.literal_chars); | |
381 return current_.literal_chars->one_byte_literal(); | |
382 } | |
383 Vector<const uc16> literal_utf16_string() { | |
384 ASSERT_NOT_NULL(current_.literal_chars); | |
385 return current_.literal_chars->utf16_literal(); | |
386 } | |
387 bool is_literal_one_byte() { | |
388 ASSERT_NOT_NULL(current_.literal_chars); | |
389 return current_.literal_chars->is_one_byte(); | |
390 } | |
391 bool is_literal_contextual_keyword(Vector<const char> keyword) { | |
392 ASSERT_NOT_NULL(current_.literal_chars); | |
393 return current_.literal_chars->is_contextual_keyword(keyword); | |
394 } | |
395 int literal_length() const { | |
396 ASSERT_NOT_NULL(current_.literal_chars); | |
397 return current_.literal_chars->length(); | |
398 } | |
399 | 383 |
400 bool literal_contains_escapes() const { | 384 bool literal_contains_escapes() const { |
401 Location location = current_.location; | 385 Location location = current_.location; |
402 int source_length = (location.end_pos - location.beg_pos); | 386 int source_length = (location.end_pos - location.beg_pos); |
403 if (current_.token == Token::STRING) { | 387 if (current_.token == Token::STRING) { |
404 // Subtract delimiters. | 388 // Subtract delimiters. |
405 source_length -= 2; | 389 source_length -= 2; |
406 } | 390 } |
407 return current_.literal_chars->length() != source_length; | 391 return current_.literal_chars->length() != source_length; |
408 } | 392 } |
409 | 393 bool is_literal_contextual_keyword(Vector<const char> keyword) { |
410 // Similar functions for the upcoming token. | 394 ASSERT_NOT_NULL(current_.literal_chars); |
411 | 395 return current_.literal_chars->is_contextual_keyword(keyword); |
412 // One token look-ahead (past the token returned by Next()). | |
413 Token::Value peek() const { return next_.token; } | |
414 | |
415 Location peek_location() const { return next_.location; } | |
416 | |
417 // Returns the literal string for the next token (the token that | |
418 // would be returned if Next() were called). | |
419 Vector<const char> next_literal_one_byte_string() { | |
420 ASSERT_NOT_NULL(next_.literal_chars); | |
421 return next_.literal_chars->one_byte_literal(); | |
422 } | |
423 Vector<const uc16> next_literal_utf16_string() { | |
424 ASSERT_NOT_NULL(next_.literal_chars); | |
425 return next_.literal_chars->utf16_literal(); | |
426 } | |
427 bool is_next_literal_one_byte() { | |
428 ASSERT_NOT_NULL(next_.literal_chars); | |
429 return next_.literal_chars->is_one_byte(); | |
430 } | 396 } |
431 bool is_next_contextual_keyword(Vector<const char> keyword) { | 397 bool is_next_contextual_keyword(Vector<const char> keyword) { |
432 ASSERT_NOT_NULL(next_.literal_chars); | 398 ASSERT_NOT_NULL(next_.literal_chars); |
433 return next_.literal_chars->is_contextual_keyword(keyword); | 399 return next_.literal_chars->is_contextual_keyword(keyword); |
434 } | 400 } |
435 int next_literal_length() const { | |
436 ASSERT_NOT_NULL(next_.literal_chars); | |
437 return next_.literal_chars->length(); | |
438 } | |
439 | 401 |
440 Handle<String> AllocateLiteralString(Isolate* isolate, PretenureFlag tenured); | |
441 Handle<String> AllocateNextLiteralString(Isolate* isolate, | 402 Handle<String> AllocateNextLiteralString(Isolate* isolate, |
442 PretenureFlag tenured); | 403 PretenureFlag tenured); |
443 Handle<String> AllocateInternalizedString(Isolate* isolate); | 404 Handle<String> AllocateInternalizedString(Isolate* isolate); |
444 | 405 |
445 double DoubleValue(); | 406 double DoubleValue(); |
446 bool UnescapedLiteralMatches(const char* data, int length) { | 407 bool UnescapedLiteralMatches(const char* data, int length) { |
447 if (is_literal_one_byte() && | 408 if (is_literal_one_byte() && |
448 literal_length() == length && | 409 literal_length() == length && |
449 !literal_contains_escapes()) { | 410 !literal_contains_escapes()) { |
450 return !strncmp(literal_one_byte_string().start(), data, length); | 411 return !strncmp(literal_one_byte_string().start(), data, length); |
451 } | 412 } |
452 return false; | 413 return false; |
453 } | 414 } |
454 void IsGetOrSet(bool* is_get, bool* is_set) { | 415 void IsGetOrSet(bool* is_get, bool* is_set) { |
455 if (is_literal_one_byte() && | 416 if (is_literal_one_byte() && |
456 literal_length() == 3 && | 417 literal_length() == 3 && |
457 !literal_contains_escapes()) { | 418 !literal_contains_escapes()) { |
458 const char* token = literal_one_byte_string().start(); | 419 const char* token = literal_one_byte_string().start(); |
459 *is_get = strncmp(token, "get", 3) == 0; | 420 *is_get = strncmp(token, "get", 3) == 0; |
460 *is_set = !*is_get && strncmp(token, "set", 3) == 0; | 421 *is_set = !*is_get && strncmp(token, "set", 3) == 0; |
461 } | 422 } |
462 } | 423 } |
463 | 424 |
| 425 int FindNumber(DuplicateFinder* finder, int value); |
| 426 int FindSymbol(DuplicateFinder* finder, int value); |
| 427 |
| 428 void LogSymbol(ParserRecorder* log, int position); |
| 429 |
464 UnicodeCache* unicode_cache() { return unicode_cache_; } | 430 UnicodeCache* unicode_cache() { return unicode_cache_; } |
465 | 431 |
466 static const int kCharacterLookaheadBufferSize = 1; | |
467 | |
468 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. | |
469 uc32 ScanOctalEscape(uc32 c, int length); | |
470 | |
471 // Returns the location of the last seen octal literal. | 432 // Returns the location of the last seen octal literal. |
472 Location octal_position() const { return octal_pos_; } | 433 Location octal_position() const { return octal_pos_; } |
473 void clear_octal_position() { octal_pos_ = Location::invalid(); } | 434 void clear_octal_position() { octal_pos_ = Location::invalid(); } |
474 | 435 |
475 // Seek forward to the given position. This operation does not | 436 // Seek forward to the given position. This operation does not |
476 // work in general, for instance when there are pushed back | 437 // work in general, for instance when there are pushed back |
477 // characters, but works for seeking forward until simple delimiter | 438 // characters, but works for seeking forward until simple delimiter |
478 // tokens, which is what it is used for. | 439 // tokens, which is what it is used for. |
479 void SeekForward(int pos); | 440 void SeekForward(int pos); |
480 | 441 |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
512 bool ScanRegExpFlags(); | 473 bool ScanRegExpFlags(); |
513 | 474 |
514 private: | 475 private: |
515 // The current and look-ahead token. | 476 // The current and look-ahead token. |
516 struct TokenDesc { | 477 struct TokenDesc { |
517 Token::Value token; | 478 Token::Value token; |
518 Location location; | 479 Location location; |
519 LiteralBuffer* literal_chars; | 480 LiteralBuffer* literal_chars; |
520 }; | 481 }; |
521 | 482 |
| 483 static const int kCharacterLookaheadBufferSize = 1; |
| 484 |
| 485 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. |
| 486 uc32 ScanOctalEscape(uc32 c, int length); |
| 487 |
522 // Call this after setting source_ to the input. | 488 // Call this after setting source_ to the input. |
523 void Init() { | 489 void Init() { |
524 // Set c0_ (one character ahead) | 490 // Set c0_ (one character ahead) |
525 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1); | 491 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1); |
526 Advance(); | 492 Advance(); |
527 // Initialize current_ to not refer to a literal. | 493 // Initialize current_ to not refer to a literal. |
528 current_.literal_chars = NULL; | 494 current_.literal_chars = NULL; |
529 } | 495 } |
530 | 496 |
531 // Literal buffer support | 497 // Literal buffer support |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
572 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) { | 538 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) { |
573 Advance(); | 539 Advance(); |
574 if (c0_ == next) { | 540 if (c0_ == next) { |
575 Advance(); | 541 Advance(); |
576 return then; | 542 return then; |
577 } else { | 543 } else { |
578 return else_; | 544 return else_; |
579 } | 545 } |
580 } | 546 } |
581 | 547 |
| 548 // Returns the literal string, if any, for the current token (the |
| 549 // token last returned by Next()). The string is 0-terminated. |
| 550 // Literal strings are collected for identifiers, strings, and |
| 551 // numbers. |
| 552 // These functions only give the correct result if the literal |
| 553 // was scanned between calls to StartLiteral() and TerminateLiteral(). |
| 554 Vector<const char> literal_one_byte_string() { |
| 555 ASSERT_NOT_NULL(current_.literal_chars); |
| 556 return current_.literal_chars->one_byte_literal(); |
| 557 } |
| 558 Vector<const uc16> literal_utf16_string() { |
| 559 ASSERT_NOT_NULL(current_.literal_chars); |
| 560 return current_.literal_chars->utf16_literal(); |
| 561 } |
| 562 bool is_literal_one_byte() { |
| 563 ASSERT_NOT_NULL(current_.literal_chars); |
| 564 return current_.literal_chars->is_one_byte(); |
| 565 } |
| 566 int literal_length() const { |
| 567 ASSERT_NOT_NULL(current_.literal_chars); |
| 568 return current_.literal_chars->length(); |
| 569 } |
| 570 // Returns the literal string for the next token (the token that |
| 571 // would be returned if Next() were called). |
| 572 Vector<const char> next_literal_one_byte_string() { |
| 573 ASSERT_NOT_NULL(next_.literal_chars); |
| 574 return next_.literal_chars->one_byte_literal(); |
| 575 } |
| 576 Vector<const uc16> next_literal_utf16_string() { |
| 577 ASSERT_NOT_NULL(next_.literal_chars); |
| 578 return next_.literal_chars->utf16_literal(); |
| 579 } |
| 580 bool is_next_literal_one_byte() { |
| 581 ASSERT_NOT_NULL(next_.literal_chars); |
| 582 return next_.literal_chars->is_one_byte(); |
| 583 } |
| 584 int next_literal_length() const { |
| 585 ASSERT_NOT_NULL(next_.literal_chars); |
| 586 return next_.literal_chars->length(); |
| 587 } |
| 588 |
582 uc32 ScanHexNumber(int expected_length); | 589 uc32 ScanHexNumber(int expected_length); |
583 | 590 |
584 // Scans a single JavaScript token. | 591 // Scans a single JavaScript token. |
585 void Scan(); | 592 void Scan(); |
586 | 593 |
587 bool SkipWhiteSpace(); | 594 bool SkipWhiteSpace(); |
588 Token::Value SkipSingleLineComment(); | 595 Token::Value SkipSingleLineComment(); |
589 Token::Value SkipMultiLineComment(); | 596 Token::Value SkipMultiLineComment(); |
590 // Scans a possible HTML comment -- begins with '<!'. | 597 // Scans a possible HTML comment -- begins with '<!'. |
591 Token::Value ScanHtmlComment(); | 598 Token::Value ScanHtmlComment(); |
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
644 bool harmony_scoping_; | 651 bool harmony_scoping_; |
645 // Whether we scan 'module', 'import', 'export' as keywords. | 652 // Whether we scan 'module', 'import', 'export' as keywords. |
646 bool harmony_modules_; | 653 bool harmony_modules_; |
647 // Whether we scan 0o777 and 0b111 as numbers. | 654 // Whether we scan 0o777 and 0b111 as numbers. |
648 bool harmony_numeric_literals_; | 655 bool harmony_numeric_literals_; |
649 }; | 656 }; |
650 | 657 |
651 } } // namespace v8::internal | 658 } } // namespace v8::internal |
652 | 659 |
653 #endif // V8_SCANNER_H_ | 660 #endif // V8_SCANNER_H_ |
OLD | NEW |