| OLD | NEW |
| 1 // Copyright 2010 the V8 project authors. All rights reserved. | 1 // Copyright 2010 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 45 // Returns the value (0 .. 15) of a hexadecimal character c. | 45 // Returns the value (0 .. 15) of a hexadecimal character c. |
| 46 // If c is not a legal hexadecimal character, returns a value < 0. | 46 // If c is not a legal hexadecimal character, returns a value < 0. |
| 47 inline int HexValue(uc32 c) { | 47 inline int HexValue(uc32 c) { |
| 48 c -= '0'; | 48 c -= '0'; |
| 49 if (static_cast<unsigned>(c) <= 9) return c; | 49 if (static_cast<unsigned>(c) <= 9) return c; |
| 50 c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36. | 50 c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36. |
| 51 if (static_cast<unsigned>(c) <= 5) return c + 10; | 51 if (static_cast<unsigned>(c) <= 5) return c + 10; |
| 52 return -1; | 52 return -1; |
| 53 } | 53 } |
| 54 | 54 |
| 55 // ---------------------------------------------------------------------------- | |
| 56 // UTF16Buffer - scanner input source with pushback. | |
| 57 | 55 |
| 58 class UTF16Buffer { | 56 // --------------------------------------------------------------------- |
| 57 // Buffered stream of characters, using an internal UC16 buffer. |
| 58 |
| 59 class UC16CharacterStream { |
| 59 public: | 60 public: |
| 60 UTF16Buffer(); | 61 UC16CharacterStream() : pos_(0) { } |
| 61 virtual ~UTF16Buffer() {} | 62 virtual ~UC16CharacterStream() { } |
| 62 | 63 |
| 63 virtual void PushBack(uc32 ch) = 0; | 64 // Returns and advances past the next UC16 character in the input |
| 64 // Returns a value < 0 when the buffer end is reached. | 65 // stream. If there are no more characters, it returns a negative |
| 65 virtual uc32 Advance() = 0; | 66 // value. |
| 66 virtual void SeekForward(int pos) = 0; | 67 inline int32_t Advance() { |
| 68 if (buffer_cursor_ < buffer_end_ || ReadBlock()) { |
| 69 pos_++; |
| 70 return *(buffer_cursor_++); |
| 71 } |
| 72 // Note: currently the following increment is necessary to avoid a |
| 73 // parser problem! The scanner treats the final kEndOfInput as |
| 74 // a character with a position, and does math relative to that |
| 75 // position. |
| 76 pos_++; |
| 67 | 77 |
| 68 int pos() const { return pos_; } | 78 return kEndOfInput; |
| 79 } |
| 69 | 80 |
| 70 static const int kNoEndPosition = 1; | 81 // Return the current position in the character stream. |
| 82 // Starts at zero. |
| 83 inline unsigned pos() const { return pos_; } |
| 84 |
| 85 // Skips forward past the next character_count UC16 characters |
| 86 // in the input, or until the end of input if that comes sooner. |
| 87 // Returns the number of characters actually skipped. If less |
| 88 // than character_count, |
| 89 inline unsigned SeekForward(unsigned character_count) { |
| 90 unsigned buffered_chars = |
| 91 static_cast<unsigned>(buffer_end_ - buffer_cursor_); |
| 92 if (character_count <= buffered_chars) { |
| 93 buffer_cursor_ += character_count; |
| 94 pos_ += character_count; |
| 95 return character_count; |
| 96 } |
| 97 return SlowSeekForward(character_count); |
| 98 } |
| 99 |
| 100 // Pushes back the most recently read UC16 character, i.e., |
| 101 // the value returned by the most recent call to Advance. |
| 102 // Must not be used right after calling SeekForward. |
| 103 virtual void PushBack(uc16 character) = 0; |
| 71 | 104 |
| 72 protected: | 105 protected: |
| 73 // Initial value of end_ before the input stream is initialized. | 106 static const int32_t kEndOfInput = -1; |
| 74 | 107 |
| 75 int pos_; // Current position in the buffer. | 108 // Ensures that the buffer_cursor_ points to the character at |
| 76 int end_; // Position where scanning should stop (EOF). | 109 // position pos_ of the input, if possible. If the position |
| 110 // is at or after the end of the input, return false. If there |
| 111 // are more characters available, return true. |
| 112 virtual bool ReadBlock() = 0; |
| 113 virtual unsigned SlowSeekForward(unsigned character_count) = 0; |
| 114 |
| 115 const uc16* buffer_cursor_; |
| 116 const uc16* buffer_end_; |
| 117 unsigned pos_; |
| 77 }; | 118 }; |
| 78 | 119 |
| 79 | 120 |
| 121 // --------------------------------------------------------------------- |
| 122 // Constants used by scanners. |
| 123 |
| 80 class ScannerConstants : AllStatic { | 124 class ScannerConstants : AllStatic { |
| 81 public: | 125 public: |
| 82 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; | 126 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; |
| 83 | 127 |
| 84 static StaticResource<Utf8Decoder>* utf8_decoder() { | 128 static StaticResource<Utf8Decoder>* utf8_decoder() { |
| 85 return &utf8_decoder_; | 129 return &utf8_decoder_; |
| 86 } | 130 } |
| 87 | 131 |
| 88 static unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart; | 132 static unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart; |
| 89 static unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart; | 133 static unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart; |
| (...skipping 180 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 270 } | 314 } |
| 271 | 315 |
| 272 inline void AddLiteralCharAdvance() { | 316 inline void AddLiteralCharAdvance() { |
| 273 AddLiteralChar(c0_); | 317 AddLiteralChar(c0_); |
| 274 Advance(); | 318 Advance(); |
| 275 } | 319 } |
| 276 | 320 |
| 277 // Low-level scanning support. | 321 // Low-level scanning support. |
| 278 void Advance() { c0_ = source_->Advance(); } | 322 void Advance() { c0_ = source_->Advance(); } |
| 279 void PushBack(uc32 ch) { | 323 void PushBack(uc32 ch) { |
| 280 source_->PushBack(ch); | 324 source_->PushBack(c0_); |
| 281 c0_ = ch; | 325 c0_ = ch; |
| 282 } | 326 } |
| 283 | 327 |
| 284 inline Token::Value Select(Token::Value tok) { | 328 inline Token::Value Select(Token::Value tok) { |
| 285 Advance(); | 329 Advance(); |
| 286 return tok; | 330 return tok; |
| 287 } | 331 } |
| 288 | 332 |
| 289 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) { | 333 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) { |
| 290 Advance(); | 334 Advance(); |
| 291 if (c0_ == next) { | 335 if (c0_ == next) { |
| 292 Advance(); | 336 Advance(); |
| 293 return then; | 337 return then; |
| 294 } else { | 338 } else { |
| 295 return else_; | 339 return else_; |
| 296 } | 340 } |
| 297 } | 341 } |
| 298 | 342 |
| 299 uc32 ScanHexEscape(uc32 c, int length); | 343 uc32 ScanHexEscape(uc32 c, int length); |
| 300 uc32 ScanOctalEscape(uc32 c, int length); | 344 uc32 ScanOctalEscape(uc32 c, int length); |
| 301 | 345 |
| 302 // Return the current source position. | 346 // Return the current source position. |
| 303 int source_pos() { | 347 int source_pos() { |
| 304 return source_->pos() - kCharacterLookaheadBufferSize; | 348 return source_->pos() - kCharacterLookaheadBufferSize; |
| 305 } | 349 } |
| 306 | 350 |
| 307 TokenDesc current_; // desc for current token (as returned by Next()) | 351 TokenDesc current_; // desc for current token (as returned by Next()) |
| 308 TokenDesc next_; // desc for next token (one token look-ahead) | 352 TokenDesc next_; // desc for next token (one token look-ahead) |
| 309 | 353 |
| 310 // Input stream. Must be initialized to an UTF16Buffer. | 354 // Input stream. Must be initialized to an UC16CharacterStream. |
| 311 UTF16Buffer* source_; | 355 UC16CharacterStream* source_; |
| 312 | 356 |
| 313 // Buffer to hold literal values (identifiers, strings, numbers) | 357 // Buffer to hold literal values (identifiers, strings, numbers) |
| 314 // using '\x00'-terminated UTF-8 encoding. Handles allocation internally. | 358 // using '\x00'-terminated UTF-8 encoding. Handles allocation internally. |
| 315 LiteralCollector literal_buffer_; | 359 LiteralCollector literal_buffer_; |
| 316 | 360 |
| 317 // One Unicode character look-ahead; c0_ < 0 at the end of the input. | 361 // One Unicode character look-ahead; c0_ < 0 at the end of the input. |
| 318 uc32 c0_; | 362 uc32 c0_; |
| 319 }; | 363 }; |
| 320 | 364 |
| 321 // ---------------------------------------------------------------------------- | 365 // ---------------------------------------------------------------------------- |
| (...skipping 218 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 540 // keyword with the current prefix). | 584 // keyword with the current prefix). |
| 541 const char* keyword_; | 585 const char* keyword_; |
| 542 int counter_; | 586 int counter_; |
| 543 Token::Value keyword_token_; | 587 Token::Value keyword_token_; |
| 544 }; | 588 }; |
| 545 | 589 |
| 546 | 590 |
| 547 } } // namespace v8::internal | 591 } } // namespace v8::internal |
| 548 | 592 |
| 549 #endif // V8_SCANNER_BASE_H_ | 593 #endif // V8_SCANNER_BASE_H_ |
| OLD | NEW |