| OLD | NEW |
| 1 // Copyright 2010 the V8 project authors. All rights reserved. | 1 // Copyright 2010 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 57 // Buffered stream of characters, using an internal UC16 buffer. | 57 // Buffered stream of characters, using an internal UC16 buffer. |
| 58 | 58 |
| 59 class UC16CharacterStream { | 59 class UC16CharacterStream { |
| 60 public: | 60 public: |
| 61 UC16CharacterStream() : pos_(0) { } | 61 UC16CharacterStream() : pos_(0) { } |
| 62 virtual ~UC16CharacterStream() { } | 62 virtual ~UC16CharacterStream() { } |
| 63 | 63 |
| 64 // Returns and advances past the next UC16 character in the input | 64 // Returns and advances past the next UC16 character in the input |
| 65 // stream. If there are no more characters, it returns a negative | 65 // stream. If there are no more characters, it returns a negative |
| 66 // value. | 66 // value. |
| 67 inline int32_t Advance() { | 67 inline uc32 Advance() { |
| 68 if (buffer_cursor_ < buffer_end_ || ReadBlock()) { | 68 if (buffer_cursor_ < buffer_end_ || ReadBlock()) { |
| 69 pos_++; | 69 pos_++; |
| 70 return *(buffer_cursor_++); | 70 return static_cast<uc32>(*(buffer_cursor_++)); |
| 71 } | 71 } |
| 72 // Note: currently the following increment is necessary to avoid a | 72 // Note: currently the following increment is necessary to avoid a |
| 73 // parser problem! The scanner treats the final kEndOfInput as | 73 // parser problem! The scanner treats the final kEndOfInput as |
| 74 // a character with a position, and does math relative to that | 74 // a character with a position, and does math relative to that |
| 75 // position. | 75 // position. |
| 76 pos_++; | 76 pos_++; |
| 77 | 77 |
| 78 return kEndOfInput; | 78 return kEndOfInput; |
| 79 } | 79 } |
| 80 | 80 |
| 81 // Return the current position in the character stream. | 81 // Return the current position in the character stream. |
| 82 // Starts at zero. | 82 // Starts at zero. |
| 83 inline unsigned pos() const { return pos_; } | 83 inline unsigned pos() const { return pos_; } |
| 84 | 84 |
| 85 // Skips forward past the next character_count UC16 characters | 85 // Skips forward past the next character_count UC16 characters |
| 86 // in the input, or until the end of input if that comes sooner. | 86 // in the input, or until the end of input if that comes sooner. |
| 87 // Returns the number of characters actually skipped. If less | 87 // Returns the number of characters actually skipped. If less |
| 88 // than character_count, | 88 // than character_count, |
| 89 inline unsigned SeekForward(unsigned character_count) { | 89 inline unsigned SeekForward(unsigned character_count) { |
| 90 unsigned buffered_chars = | 90 unsigned buffered_chars = |
| 91 static_cast<unsigned>(buffer_end_ - buffer_cursor_); | 91 static_cast<unsigned>(buffer_end_ - buffer_cursor_); |
| 92 if (character_count <= buffered_chars) { | 92 if (character_count <= buffered_chars) { |
| 93 buffer_cursor_ += character_count; | 93 buffer_cursor_ += character_count; |
| 94 pos_ += character_count; | 94 pos_ += character_count; |
| 95 return character_count; | 95 return character_count; |
| 96 } | 96 } |
| 97 return SlowSeekForward(character_count); | 97 return SlowSeekForward(character_count); |
| 98 } | 98 } |
| 99 | 99 |
| 100 // Pushes back the most recently read UC16 character, i.e., | 100 // Pushes back the most recently read UC16 character (or negative |
| 101 // the value returned by the most recent call to Advance. | 101 // value if at end of input), i.e., the value returned by the most recent |
| 102 // call to Advance. |
| 102 // Must not be used right after calling SeekForward. | 103 // Must not be used right after calling SeekForward. |
| 103 virtual void PushBack(uc16 character) = 0; | 104 virtual void PushBack(int32_t character) = 0; |
| 104 | 105 |
| 105 protected: | 106 protected: |
| 106 static const int32_t kEndOfInput = -1; | 107 static const uc32 kEndOfInput = -1; |
| 107 | 108 |
| 108 // Ensures that the buffer_cursor_ points to the character at | 109 // Ensures that the buffer_cursor_ points to the character at |
| 109 // position pos_ of the input, if possible. If the position | 110 // position pos_ of the input, if possible. If the position |
| 110 // is at or after the end of the input, return false. If there | 111 // is at or after the end of the input, return false. If there |
| 111 // are more characters available, return true. | 112 // are more characters available, return true. |
| 112 virtual bool ReadBlock() = 0; | 113 virtual bool ReadBlock() = 0; |
| 113 virtual unsigned SlowSeekForward(unsigned character_count) = 0; | 114 virtual unsigned SlowSeekForward(unsigned character_count) = 0; |
| 114 | 115 |
| 115 const uc16* buffer_cursor_; | 116 const uc16* buffer_cursor_; |
| 116 const uc16* buffer_end_; | 117 const uc16* buffer_end_; |
| (...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 239 Vector<byte> backing_store_; | 240 Vector<byte> backing_store_; |
| 240 }; | 241 }; |
| 241 | 242 |
| 242 | 243 |
| 243 // ---------------------------------------------------------------------------- | 244 // ---------------------------------------------------------------------------- |
| 244 // Scanner base-class. | 245 // Scanner base-class. |
| 245 | 246 |
| 246 // Generic functionality used by both JSON and JavaScript scanners. | 247 // Generic functionality used by both JSON and JavaScript scanners. |
| 247 class Scanner { | 248 class Scanner { |
| 248 public: | 249 public: |
| 250 // -1 is outside of the range of any real source code. |
| 251 static const int kNoOctalLocation = -1; |
| 252 |
| 249 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; | 253 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; |
| 250 | 254 |
| 251 class LiteralScope { | 255 class LiteralScope { |
| 252 public: | 256 public: |
| 253 explicit LiteralScope(Scanner* self); | 257 explicit LiteralScope(Scanner* self); |
| 254 ~LiteralScope(); | 258 ~LiteralScope(); |
| 255 void Complete(); | 259 void Complete(); |
| 256 | 260 |
| 257 private: | 261 private: |
| 258 Scanner* scanner_; | 262 Scanner* scanner_; |
| 259 bool complete_; | 263 bool complete_; |
| 260 }; | 264 }; |
| 261 | 265 |
| 262 Scanner(); | 266 Scanner(); |
| 263 | 267 |
| 264 // Returns the current token again. | 268 // Returns the current token again. |
| 265 Token::Value current_token() { return current_.token; } | 269 Token::Value current_token() { return current_.token; } |
| 266 | 270 |
| 267 // One token look-ahead (past the token returned by Next()). | 271 // One token look-ahead (past the token returned by Next()). |
| 268 Token::Value peek() const { return next_.token; } | 272 Token::Value peek() const { return next_.token; } |
| 269 | 273 |
| 270 struct Location { | 274 struct Location { |
| 271 Location(int b, int e) : beg_pos(b), end_pos(e) { } | 275 Location(int b, int e) : beg_pos(b), end_pos(e) { } |
| 272 Location() : beg_pos(0), end_pos(0) { } | 276 Location() : beg_pos(0), end_pos(0) { } |
| 277 |
| 278 bool IsValid() const { |
| 279 return beg_pos >= 0 && end_pos >= beg_pos; |
| 280 } |
| 281 |
| 273 int beg_pos; | 282 int beg_pos; |
| 274 int end_pos; | 283 int end_pos; |
| 275 }; | 284 }; |
| 276 | 285 |
| 286 static Location NoLocation() { |
| 287 return Location(-1, -1); |
| 288 } |
| 289 |
| 277 // Returns the location information for the current token | 290 // Returns the location information for the current token |
| 278 // (the token returned by Next()). | 291 // (the token returned by Next()). |
| 279 Location location() const { return current_.location; } | 292 Location location() const { return current_.location; } |
| 280 Location peek_location() const { return next_.location; } | 293 Location peek_location() const { return next_.location; } |
| 281 | 294 |
| 295 // Returns the location of the last seen octal literal |
| 296 int octal_position() const { return octal_pos_; } |
| 297 void clear_octal_position() { octal_pos_ = -1; } |
| 298 |
| 282 // Returns the literal string, if any, for the current token (the | 299 // Returns the literal string, if any, for the current token (the |
| 283 // token returned by Next()). The string is 0-terminated and in | 300 // token returned by Next()). The string is 0-terminated and in |
| 284 // UTF-8 format; they may contain 0-characters. Literal strings are | 301 // UTF-8 format; they may contain 0-characters. Literal strings are |
| 285 // collected for identifiers, strings, and numbers. | 302 // collected for identifiers, strings, and numbers. |
| 286 // These functions only give the correct result if the literal | 303 // These functions only give the correct result if the literal |
| 287 // was scanned between calls to StartLiteral() and TerminateLiteral(). | 304 // was scanned between calls to StartLiteral() and TerminateLiteral(). |
| 288 bool is_literal_ascii() { | 305 bool is_literal_ascii() { |
| 289 ASSERT_NOT_NULL(current_.literal_chars); | 306 ASSERT_NOT_NULL(current_.literal_chars); |
| 290 return current_.literal_chars->is_ascii(); | 307 return current_.literal_chars->is_ascii(); |
| 291 } | 308 } |
| (...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 385 Advance(); | 402 Advance(); |
| 386 if (c0_ == next) { | 403 if (c0_ == next) { |
| 387 Advance(); | 404 Advance(); |
| 388 return then; | 405 return then; |
| 389 } else { | 406 } else { |
| 390 return else_; | 407 return else_; |
| 391 } | 408 } |
| 392 } | 409 } |
| 393 | 410 |
| 394 uc32 ScanHexEscape(uc32 c, int length); | 411 uc32 ScanHexEscape(uc32 c, int length); |
| 412 |
| 413 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. |
| 395 uc32 ScanOctalEscape(uc32 c, int length); | 414 uc32 ScanOctalEscape(uc32 c, int length); |
| 396 | 415 |
| 397 // Return the current source position. | 416 // Return the current source position. |
| 398 int source_pos() { | 417 int source_pos() { |
| 399 return source_->pos() - kCharacterLookaheadBufferSize; | 418 return source_->pos() - kCharacterLookaheadBufferSize; |
| 400 } | 419 } |
| 401 | 420 |
| 402 // Buffers collecting literal strings, numbers, etc. | 421 // Buffers collecting literal strings, numbers, etc. |
| 403 LiteralBuffer literal_buffer1_; | 422 LiteralBuffer literal_buffer1_; |
| 404 LiteralBuffer literal_buffer2_; | 423 LiteralBuffer literal_buffer2_; |
| 405 | 424 |
| 406 TokenDesc current_; // desc for current token (as returned by Next()) | 425 TokenDesc current_; // desc for current token (as returned by Next()) |
| 407 TokenDesc next_; // desc for next token (one token look-ahead) | 426 TokenDesc next_; // desc for next token (one token look-ahead) |
| 408 | 427 |
| 409 // Input stream. Must be initialized to an UC16CharacterStream. | 428 // Input stream. Must be initialized to an UC16CharacterStream. |
| 410 UC16CharacterStream* source_; | 429 UC16CharacterStream* source_; |
| 411 | 430 |
| 431 // Start position of the octal literal last scanned. |
| 432 int octal_pos_; |
| 412 | 433 |
| 413 // One Unicode character look-ahead; c0_ < 0 at the end of the input. | 434 // One Unicode character look-ahead; c0_ < 0 at the end of the input. |
| 414 uc32 c0_; | 435 uc32 c0_; |
| 415 }; | 436 }; |
| 416 | 437 |
| 417 // ---------------------------------------------------------------------------- | 438 // ---------------------------------------------------------------------------- |
| 418 // JavaScriptScanner - base logic for JavaScript scanning. | 439 // JavaScriptScanner - base logic for JavaScript scanning. |
| 419 | 440 |
| 420 class JavaScriptScanner : public Scanner { | 441 class JavaScriptScanner : public Scanner { |
| 421 public: | 442 public: |
| (...skipping 114 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 536 UNMATCHABLE, | 557 UNMATCHABLE, |
| 537 INITIAL, | 558 INITIAL, |
| 538 KEYWORD_PREFIX, | 559 KEYWORD_PREFIX, |
| 539 KEYWORD_MATCHED, | 560 KEYWORD_MATCHED, |
| 540 C, | 561 C, |
| 541 CA, | 562 CA, |
| 542 CO, | 563 CO, |
| 543 CON, | 564 CON, |
| 544 D, | 565 D, |
| 545 DE, | 566 DE, |
| 567 E, |
| 568 EX, |
| 546 F, | 569 F, |
| 547 I, | 570 I, |
| 571 IM, |
| 572 IMP, |
| 548 IN, | 573 IN, |
| 549 N, | 574 N, |
| 575 P, |
| 576 PR, |
| 577 S, |
| 550 T, | 578 T, |
| 551 TH, | 579 TH, |
| 552 TR, | 580 TR, |
| 553 V, | 581 V, |
| 554 W | 582 W |
| 555 }; | 583 }; |
| 556 | 584 |
| 557 struct FirstState { | 585 struct FirstState { |
| 558 const char* keyword; | 586 const char* keyword; |
| 559 State state; | 587 State state; |
| 560 Token::Value token; | 588 Token::Value token; |
| 561 }; | 589 }; |
| 562 | 590 |
| 563 // Range of possible first characters of a keyword. | 591 // Range of possible first characters of a keyword. |
| 564 static const unsigned int kFirstCharRangeMin = 'b'; | 592 static const unsigned int kFirstCharRangeMin = 'b'; |
| 565 static const unsigned int kFirstCharRangeMax = 'w'; | 593 static const unsigned int kFirstCharRangeMax = 'y'; |
| 566 static const unsigned int kFirstCharRangeLength = | 594 static const unsigned int kFirstCharRangeLength = |
| 567 kFirstCharRangeMax - kFirstCharRangeMin + 1; | 595 kFirstCharRangeMax - kFirstCharRangeMin + 1; |
| 568 // State map for first keyword character range. | 596 // State map for first keyword character range. |
| 569 static FirstState first_states_[kFirstCharRangeLength]; | 597 static FirstState first_states_[kFirstCharRangeLength]; |
| 570 | 598 |
| 571 // If input equals keyword's character at position, continue matching keyword | 599 // If input equals keyword's character at position, continue matching keyword |
| 572 // from that position. | 600 // from that position. |
| 573 inline bool MatchKeywordStart(unibrow::uchar input, | 601 inline bool MatchKeywordStart(unibrow::uchar input, |
| 574 const char* keyword, | 602 const char* keyword, |
| 575 int position, | 603 int position, |
| (...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 616 // keyword with the current prefix). | 644 // keyword with the current prefix). |
| 617 const char* keyword_; | 645 const char* keyword_; |
| 618 int counter_; | 646 int counter_; |
| 619 Token::Value keyword_token_; | 647 Token::Value keyword_token_; |
| 620 }; | 648 }; |
| 621 | 649 |
| 622 | 650 |
| 623 } } // namespace v8::internal | 651 } } // namespace v8::internal |
| 624 | 652 |
| 625 #endif // V8_SCANNER_BASE_H_ | 653 #endif // V8_SCANNER_BASE_H_ |
| OLD | NEW |