| OLD | NEW |
| 1 // Copyright 2010 the V8 project authors. All rights reserved. | 1 // Copyright 2010 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 45 // Returns the value (0 .. 15) of a hexadecimal character c. | 45 // Returns the value (0 .. 15) of a hexadecimal character c. |
| 46 // If c is not a legal hexadecimal character, returns a value < 0. | 46 // If c is not a legal hexadecimal character, returns a value < 0. |
| 47 inline int HexValue(uc32 c) { | 47 inline int HexValue(uc32 c) { |
| 48 c -= '0'; | 48 c -= '0'; |
| 49 if (static_cast<unsigned>(c) <= 9) return c; | 49 if (static_cast<unsigned>(c) <= 9) return c; |
| 50 c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36. | 50 c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36. |
| 51 if (static_cast<unsigned>(c) <= 5) return c + 10; | 51 if (static_cast<unsigned>(c) <= 5) return c + 10; |
| 52 return -1; | 52 return -1; |
| 53 } | 53 } |
| 54 | 54 |
| 55 // ---------------------------------------------------------------------------- | |
| 56 // UTF16Buffer - scanner input source with pushback. | |
| 57 | 55 |
| 58 class UTF16Buffer { | 56 // --------------------------------------------------------------------- |
| 57 // Buffered stream of characters, using an internal UC16 buffer. |
| 58 |
| 59 class UC16CharacterStream { |
| 59 public: | 60 public: |
| 60 UTF16Buffer(); | 61 UC16CharacterStream() : pos_(0) { } |
| 61 virtual ~UTF16Buffer() {} | 62 virtual ~UC16CharacterStream() { } |
| 62 | 63 |
| 63 virtual void PushBack(uc32 ch) = 0; | 64 // Returns and advances past the next UC16 character in the input |
| 64 // Returns a value < 0 when the buffer end is reached. | 65 // stream. If there are no more characters, it returns a negative |
| 65 virtual uc32 Advance() = 0; | 66 // value. |
| 66 virtual void SeekForward(int pos) = 0; | 67 inline int32_t Advance() { |
| 68 if (buffer_cursor_ < buffer_end_ || ReadBlock()) { |
| 69 pos_++; |
| 70 return *(buffer_cursor_++); |
| 71 } |
| 72 // Note: currently the following increment is necessary to avoid a |
| 73 // parser problem! The scanner treats the final kEndOfInput as |
| 74 // a character with a position, and does math relative to that |
| 75 // position. |
| 76 pos_++; |
| 67 | 77 |
| 68 int pos() const { return pos_; } | 78 return kEndOfInput; |
| 79 } |
| 69 | 80 |
| 70 static const int kNoEndPosition = 1; | 81 // Return the current position in the character stream. |
| 82 // Starts at zero. |
| 83 inline unsigned pos() const { return pos_; } |
| 84 |
| 85 // Skips forward past the next character_count UC16 characters |
| 86 // in the input, or until the end of input if that comes sooner. |
| 87 // Returns the number of characters actually skipped. If less |
| 88 // than character_count, |
| 89 inline unsigned SeekForward(unsigned character_count) { |
| 90 unsigned buffered_chars = |
| 91 static_cast<unsigned>(buffer_end_ - buffer_cursor_); |
| 92 if (character_count <= buffered_chars) { |
| 93 buffer_cursor_ += character_count; |
| 94 pos_ += character_count; |
| 95 return character_count; |
| 96 } |
| 97 return SlowSeekForward(character_count); |
| 98 } |
| 99 |
| 100 // Pushes back the most recently read UC16 character, i.e., |
| 101 // the value returned by the most recent call to Advance. |
| 102 // Must not be used right after calling SeekForward. |
| 103 virtual void PushBack(uc16 character) = 0; |
| 71 | 104 |
| 72 protected: | 105 protected: |
| 73 // Initial value of end_ before the input stream is initialized. | 106 static const int32_t kEndOfInput = -1; |
| 74 | 107 |
| 75 int pos_; // Current position in the buffer. | 108 // Ensures that the buffer_cursor_ points to the character at |
| 76 int end_; // Position where scanning should stop (EOF). | 109 // position pos_ of the input, if possible. If the position |
| 110 // is at or after the end of the input, return false. If there |
| 111 // are more characters available, return true. |
| 112 virtual bool ReadBlock() = 0; |
| 113 virtual unsigned SlowSeekForward(unsigned character_count) = 0; |
| 114 |
| 115 const uc16* buffer_cursor_; |
| 116 const uc16* buffer_end_; |
| 117 unsigned pos_; |
| 77 }; | 118 }; |
| 78 | 119 |
| 79 | 120 |
| 80 class ScannerConstants { | 121 class ScannerConstants { |
| 122 // --------------------------------------------------------------------- |
| 123 // Constants used by scanners. |
| 81 public: | 124 public: |
| 82 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; | 125 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; |
| 83 | 126 |
| 84 StaticResource<Utf8Decoder>* utf8_decoder() { | 127 StaticResource<Utf8Decoder>* utf8_decoder() { |
| 85 return &utf8_decoder_; | 128 return &utf8_decoder_; |
| 86 } | 129 } |
| 87 | 130 |
| 88 bool IsIdentifierStart(unibrow::uchar c) { return kIsIdentifierStart.get(c); } | 131 bool IsIdentifierStart(unibrow::uchar c) { return kIsIdentifierStart.get(c); } |
| 89 bool IsIdentifierPart(unibrow::uchar c) { return kIsIdentifierPart.get(c); } | 132 bool IsIdentifierPart(unibrow::uchar c) { return kIsIdentifierPart.get(c); } |
| 90 bool IsLineTerminator(unibrow::uchar c) { return kIsLineTerminator.get(c); } | 133 bool IsLineTerminator(unibrow::uchar c) { return kIsLineTerminator.get(c); } |
| (...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 173 public: | 216 public: |
| 174 explicit LiteralScope(Scanner* self); | 217 explicit LiteralScope(Scanner* self); |
| 175 ~LiteralScope(); | 218 ~LiteralScope(); |
| 176 void Complete(); | 219 void Complete(); |
| 177 | 220 |
| 178 private: | 221 private: |
| 179 Scanner* scanner_; | 222 Scanner* scanner_; |
| 180 bool complete_; | 223 bool complete_; |
| 181 }; | 224 }; |
| 182 | 225 |
| 183 Scanner(); | 226 explicit Scanner(Isolate* isolate); |
| 184 | 227 |
| 185 // Returns the current token again. | 228 // Returns the current token again. |
| 186 Token::Value current_token() { return current_.token; } | 229 Token::Value current_token() { return current_.token; } |
| 187 | 230 |
| 188 // One token look-ahead (past the token returned by Next()). | 231 // One token look-ahead (past the token returned by Next()). |
| 189 Token::Value peek() const { return next_.token; } | 232 Token::Value peek() const { return next_.token; } |
| 190 | 233 |
| 191 struct Location { | 234 struct Location { |
| 192 Location(int b, int e) : beg_pos(b), end_pos(e) { } | 235 Location(int b, int e) : beg_pos(b), end_pos(e) { } |
| 193 Location() : beg_pos(0), end_pos(0) { } | 236 Location() : beg_pos(0), end_pos(0) { } |
| (...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 279 } | 322 } |
| 280 | 323 |
| 281 inline void AddLiteralCharAdvance() { | 324 inline void AddLiteralCharAdvance() { |
| 282 AddLiteralChar(c0_); | 325 AddLiteralChar(c0_); |
| 283 Advance(); | 326 Advance(); |
| 284 } | 327 } |
| 285 | 328 |
| 286 // Low-level scanning support. | 329 // Low-level scanning support. |
| 287 void Advance() { c0_ = source_->Advance(); } | 330 void Advance() { c0_ = source_->Advance(); } |
| 288 void PushBack(uc32 ch) { | 331 void PushBack(uc32 ch) { |
| 289 source_->PushBack(ch); | 332 source_->PushBack(c0_); |
| 290 c0_ = ch; | 333 c0_ = ch; |
| 291 } | 334 } |
| 292 | 335 |
| 293 inline Token::Value Select(Token::Value tok) { | 336 inline Token::Value Select(Token::Value tok) { |
| 294 Advance(); | 337 Advance(); |
| 295 return tok; | 338 return tok; |
| 296 } | 339 } |
| 297 | 340 |
| 298 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) { | 341 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) { |
| 299 Advance(); | 342 Advance(); |
| (...skipping 11 matching lines...) Expand all Loading... |
| 311 // Return the current source position. | 354 // Return the current source position. |
| 312 int source_pos() { | 355 int source_pos() { |
| 313 return source_->pos() - kCharacterLookaheadBufferSize; | 356 return source_->pos() - kCharacterLookaheadBufferSize; |
| 314 } | 357 } |
| 315 | 358 |
| 316 ScannerConstants* scanner_constants_; | 359 ScannerConstants* scanner_constants_; |
| 317 | 360 |
| 318 TokenDesc current_; // desc for current token (as returned by Next()) | 361 TokenDesc current_; // desc for current token (as returned by Next()) |
| 319 TokenDesc next_; // desc for next token (one token look-ahead) | 362 TokenDesc next_; // desc for next token (one token look-ahead) |
| 320 | 363 |
| 321 // Input stream. Must be initialized to an UTF16Buffer. | 364 // Input stream. Must be initialized to an UC16CharacterStream. |
| 322 UTF16Buffer* source_; | 365 UC16CharacterStream* source_; |
| 323 | 366 |
| 324 // Buffer to hold literal values (identifiers, strings, numbers) | 367 // Buffer to hold literal values (identifiers, strings, numbers) |
| 325 // using '\x00'-terminated UTF-8 encoding. Handles allocation internally. | 368 // using '\x00'-terminated UTF-8 encoding. Handles allocation internally. |
| 326 LiteralCollector literal_buffer_; | 369 LiteralCollector literal_buffer_; |
| 327 | 370 |
| 328 // One Unicode character look-ahead; c0_ < 0 at the end of the input. | 371 // One Unicode character look-ahead; c0_ < 0 at the end of the input. |
| 329 uc32 c0_; | 372 uc32 c0_; |
| 330 }; | 373 }; |
| 331 | 374 |
| 332 // ---------------------------------------------------------------------------- | 375 // ---------------------------------------------------------------------------- |
| (...skipping 30 matching lines...) Expand all Loading... |
| 363 void Complete() { | 406 void Complete() { |
| 364 scanner_->TerminateLiteral(); | 407 scanner_->TerminateLiteral(); |
| 365 complete_ = true; | 408 complete_ = true; |
| 366 } | 409 } |
| 367 | 410 |
| 368 private: | 411 private: |
| 369 JavaScriptScanner* scanner_; | 412 JavaScriptScanner* scanner_; |
| 370 bool complete_; | 413 bool complete_; |
| 371 }; | 414 }; |
| 372 | 415 |
| 373 explicit JavaScriptScanner(ScannerConstants* scanner_constants); | 416 explicit JavaScriptScanner(Isolate* isolate); |
| 374 | 417 |
| 375 // Returns the next token. | 418 // Returns the next token. |
| 376 Token::Value Next(); | 419 Token::Value Next(); |
| 377 | 420 |
| 378 // Returns true if there was a line terminator before the peek'ed token. | 421 // Returns true if there was a line terminator before the peek'ed token. |
| 379 bool has_line_terminator_before_next() const { | 422 bool has_line_terminator_before_next() const { |
| 380 return has_line_terminator_before_next_; | 423 return has_line_terminator_before_next_; |
| 381 } | 424 } |
| 382 | 425 |
| 383 // Scans the input as a regular expression pattern, previous | 426 // Scans the input as a regular expression pattern, previous |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 418 void ScanEscape(); | 461 void ScanEscape(); |
| 419 Token::Value ScanString(); | 462 Token::Value ScanString(); |
| 420 | 463 |
| 421 // Scans a possible HTML comment -- begins with '<!'. | 464 // Scans a possible HTML comment -- begins with '<!'. |
| 422 Token::Value ScanHtmlComment(); | 465 Token::Value ScanHtmlComment(); |
| 423 | 466 |
| 424 // Decodes a unicode escape-sequence which is part of an identifier. | 467 // Decodes a unicode escape-sequence which is part of an identifier. |
| 425 // If the escape sequence cannot be decoded the result is kBadChar. | 468 // If the escape sequence cannot be decoded the result is kBadChar. |
| 426 uc32 ScanIdentifierUnicodeEscape(); | 469 uc32 ScanIdentifierUnicodeEscape(); |
| 427 | 470 |
| 428 ScannerConstants* scanner_constants_; | |
| 429 int literal_flags_; | 471 int literal_flags_; |
| 430 bool has_line_terminator_before_next_; | 472 bool has_line_terminator_before_next_; |
| 431 }; | 473 }; |
| 432 | 474 |
| 433 | 475 |
| 434 // ---------------------------------------------------------------------------- | 476 // ---------------------------------------------------------------------------- |
| 435 // Keyword matching state machine. | 477 // Keyword matching state machine. |
| 436 | 478 |
| 437 class KeywordMatcher { | 479 class KeywordMatcher { |
| 438 // Incrementally recognize keywords. | 480 // Incrementally recognize keywords. |
| (...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 552 // keyword with the current prefix). | 594 // keyword with the current prefix). |
| 553 const char* keyword_; | 595 const char* keyword_; |
| 554 int counter_; | 596 int counter_; |
| 555 Token::Value keyword_token_; | 597 Token::Value keyword_token_; |
| 556 }; | 598 }; |
| 557 | 599 |
| 558 | 600 |
| 559 } } // namespace v8::internal | 601 } } // namespace v8::internal |
| 560 | 602 |
| 561 #endif // V8_SCANNER_BASE_H_ | 603 #endif // V8_SCANNER_BASE_H_ |
| OLD | NEW |