| OLD | NEW |
| 1 // Copyright 2010 the V8 project authors. All rights reserved. | 1 // Copyright 2010 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 57 // Buffered stream of characters, using an internal UC16 buffer. | 57 // Buffered stream of characters, using an internal UC16 buffer. |
| 58 | 58 |
| 59 class UC16CharacterStream { | 59 class UC16CharacterStream { |
| 60 public: | 60 public: |
| 61 UC16CharacterStream() : pos_(0) { } | 61 UC16CharacterStream() : pos_(0) { } |
| 62 virtual ~UC16CharacterStream() { } | 62 virtual ~UC16CharacterStream() { } |
| 63 | 63 |
| 64 // Returns and advances past the next UC16 character in the input | 64 // Returns and advances past the next UC16 character in the input |
| 65 // stream. If there are no more characters, it returns a negative | 65 // stream. If there are no more characters, it returns a negative |
| 66 // value. | 66 // value. |
| 67 inline int32_t Advance() { | 67 inline uc32 Advance() { |
| 68 if (buffer_cursor_ < buffer_end_ || ReadBlock()) { | 68 if (buffer_cursor_ < buffer_end_ || ReadBlock()) { |
| 69 pos_++; | 69 pos_++; |
| 70 return *(buffer_cursor_++); | 70 return static_cast<uc32>(*(buffer_cursor_++)); |
| 71 } | 71 } |
| 72 // Note: currently the following increment is necessary to avoid a | 72 // Note: currently the following increment is necessary to avoid a |
| 73 // parser problem! The scanner treats the final kEndOfInput as | 73 // parser problem! The scanner treats the final kEndOfInput as |
| 74 // a character with a position, and does math relative to that | 74 // a character with a position, and does math relative to that |
| 75 // position. | 75 // position. |
| 76 pos_++; | 76 pos_++; |
| 77 | 77 |
| 78 return kEndOfInput; | 78 return kEndOfInput; |
| 79 } | 79 } |
| 80 | 80 |
| 81 // Return the current position in the character stream. | 81 // Return the current position in the character stream. |
| 82 // Starts at zero. | 82 // Starts at zero. |
| 83 inline unsigned pos() const { return pos_; } | 83 inline unsigned pos() const { return pos_; } |
| 84 | 84 |
| 85 // Skips forward past the next character_count UC16 characters | 85 // Skips forward past the next character_count UC16 characters |
| 86 // in the input, or until the end of input if that comes sooner. | 86 // in the input, or until the end of input if that comes sooner. |
| 87 // Returns the number of characters actually skipped. If less | 87 // Returns the number of characters actually skipped. If less |
| 88 // than character_count, | 88 // than character_count, |
| 89 inline unsigned SeekForward(unsigned character_count) { | 89 inline unsigned SeekForward(unsigned character_count) { |
| 90 unsigned buffered_chars = | 90 unsigned buffered_chars = |
| 91 static_cast<unsigned>(buffer_end_ - buffer_cursor_); | 91 static_cast<unsigned>(buffer_end_ - buffer_cursor_); |
| 92 if (character_count <= buffered_chars) { | 92 if (character_count <= buffered_chars) { |
| 93 buffer_cursor_ += character_count; | 93 buffer_cursor_ += character_count; |
| 94 pos_ += character_count; | 94 pos_ += character_count; |
| 95 return character_count; | 95 return character_count; |
| 96 } | 96 } |
| 97 return SlowSeekForward(character_count); | 97 return SlowSeekForward(character_count); |
| 98 } | 98 } |
| 99 | 99 |
| 100 // Pushes back the most recently read UC16 character, i.e., | 100 // Pushes back the most recently read UC16 character (or negative |
| 101 // the value returned by the most recent call to Advance. | 101 // value if at end of input), i.e., the value returned by the most recent |
| 102 // call to Advance. |
| 102 // Must not be used right after calling SeekForward. | 103 // Must not be used right after calling SeekForward. |
| 103 virtual void PushBack(uc16 character) = 0; | 104 virtual void PushBack(int32_t character) = 0; |
| 104 | 105 |
| 105 protected: | 106 protected: |
| 106 static const int32_t kEndOfInput = -1; | 107 static const uc32 kEndOfInput = -1; |
| 107 | 108 |
| 108 // Ensures that the buffer_cursor_ points to the character at | 109 // Ensures that the buffer_cursor_ points to the character at |
| 109 // position pos_ of the input, if possible. If the position | 110 // position pos_ of the input, if possible. If the position |
| 110 // is at or after the end of the input, return false. If there | 111 // is at or after the end of the input, return false. If there |
| 111 // are more characters available, return true. | 112 // are more characters available, return true. |
| 112 virtual bool ReadBlock() = 0; | 113 virtual bool ReadBlock() = 0; |
| 113 virtual unsigned SlowSeekForward(unsigned character_count) = 0; | 114 virtual unsigned SlowSeekForward(unsigned character_count) = 0; |
| 114 | 115 |
| 115 const uc16* buffer_cursor_; | 116 const uc16* buffer_cursor_; |
| 116 const uc16* buffer_end_; | 117 const uc16* buffer_end_; |
| (...skipping 130 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 247 Vector<byte> backing_store_; | 248 Vector<byte> backing_store_; |
| 248 }; | 249 }; |
| 249 | 250 |
| 250 | 251 |
| 251 // ---------------------------------------------------------------------------- | 252 // ---------------------------------------------------------------------------- |
| 252 // Scanner base-class. | 253 // Scanner base-class. |
| 253 | 254 |
| 254 // Generic functionality used by both JSON and JavaScript scanners. | 255 // Generic functionality used by both JSON and JavaScript scanners. |
| 255 class Scanner { | 256 class Scanner { |
| 256 public: | 257 public: |
| 258 // -1 is outside of the range of any real source code. |
| 259 static const int kNoOctalLocation = -1; |
| 260 |
| 257 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; | 261 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; |
| 258 | 262 |
| 259 class LiteralScope { | 263 class LiteralScope { |
| 260 public: | 264 public: |
| 261 explicit LiteralScope(Scanner* self); | 265 explicit LiteralScope(Scanner* self); |
| 262 ~LiteralScope(); | 266 ~LiteralScope(); |
| 263 void Complete(); | 267 void Complete(); |
| 264 | 268 |
| 265 private: | 269 private: |
| 266 Scanner* scanner_; | 270 Scanner* scanner_; |
| 267 bool complete_; | 271 bool complete_; |
| 268 }; | 272 }; |
| 269 | 273 |
| 270 explicit Scanner(Isolate* isolate); | 274 explicit Scanner(Isolate* isolate); |
| 271 | 275 |
| 272 // Returns the current token again. | 276 // Returns the current token again. |
| 273 Token::Value current_token() { return current_.token; } | 277 Token::Value current_token() { return current_.token; } |
| 274 | 278 |
| 275 // One token look-ahead (past the token returned by Next()). | 279 // One token look-ahead (past the token returned by Next()). |
| 276 Token::Value peek() const { return next_.token; } | 280 Token::Value peek() const { return next_.token; } |
| 277 | 281 |
| 278 struct Location { | 282 struct Location { |
| 279 Location(int b, int e) : beg_pos(b), end_pos(e) { } | 283 Location(int b, int e) : beg_pos(b), end_pos(e) { } |
| 280 Location() : beg_pos(0), end_pos(0) { } | 284 Location() : beg_pos(0), end_pos(0) { } |
| 285 |
| 286 bool IsValid() const { |
| 287 return beg_pos >= 0 && end_pos >= beg_pos; |
| 288 } |
| 289 |
| 281 int beg_pos; | 290 int beg_pos; |
| 282 int end_pos; | 291 int end_pos; |
| 283 }; | 292 }; |
| 284 | 293 |
| 294 static Location NoLocation() { |
| 295 return Location(-1, -1); |
| 296 } |
| 297 |
| 285 // Returns the location information for the current token | 298 // Returns the location information for the current token |
| 286 // (the token returned by Next()). | 299 // (the token returned by Next()). |
| 287 Location location() const { return current_.location; } | 300 Location location() const { return current_.location; } |
| 288 Location peek_location() const { return next_.location; } | 301 Location peek_location() const { return next_.location; } |
| 289 | 302 |
| 303 // Returns the location of the last seen octal literal |
| 304 int octal_position() const { return octal_pos_; } |
| 305 void clear_octal_position() { octal_pos_ = -1; } |
| 306 |
| 290 // Returns the literal string, if any, for the current token (the | 307 // Returns the literal string, if any, for the current token (the |
| 291 // token returned by Next()). The string is 0-terminated and in | 308 // token returned by Next()). The string is 0-terminated and in |
| 292 // UTF-8 format; they may contain 0-characters. Literal strings are | 309 // UTF-8 format; they may contain 0-characters. Literal strings are |
| 293 // collected for identifiers, strings, and numbers. | 310 // collected for identifiers, strings, and numbers. |
| 294 // These functions only give the correct result if the literal | 311 // These functions only give the correct result if the literal |
| 295 // was scanned between calls to StartLiteral() and TerminateLiteral(). | 312 // was scanned between calls to StartLiteral() and TerminateLiteral(). |
| 296 bool is_literal_ascii() { | 313 bool is_literal_ascii() { |
| 297 ASSERT_NOT_NULL(current_.literal_chars); | 314 ASSERT_NOT_NULL(current_.literal_chars); |
| 298 return current_.literal_chars->is_ascii(); | 315 return current_.literal_chars->is_ascii(); |
| 299 } | 316 } |
| (...skipping 112 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 412 // Buffers collecting literal strings, numbers, etc. | 429 // Buffers collecting literal strings, numbers, etc. |
| 413 LiteralBuffer literal_buffer1_; | 430 LiteralBuffer literal_buffer1_; |
| 414 LiteralBuffer literal_buffer2_; | 431 LiteralBuffer literal_buffer2_; |
| 415 | 432 |
| 416 TokenDesc current_; // desc for current token (as returned by Next()) | 433 TokenDesc current_; // desc for current token (as returned by Next()) |
| 417 TokenDesc next_; // desc for next token (one token look-ahead) | 434 TokenDesc next_; // desc for next token (one token look-ahead) |
| 418 | 435 |
| 419 // Input stream. Must be initialized to an UC16CharacterStream. | 436 // Input stream. Must be initialized to an UC16CharacterStream. |
| 420 UC16CharacterStream* source_; | 437 UC16CharacterStream* source_; |
| 421 | 438 |
| 439 // Start position of the octal literal last scanned. |
| 440 int octal_pos_; |
| 422 | 441 |
| 423 // One Unicode character look-ahead; c0_ < 0 at the end of the input. | 442 // One Unicode character look-ahead; c0_ < 0 at the end of the input. |
| 424 uc32 c0_; | 443 uc32 c0_; |
| 425 }; | 444 }; |
| 426 | 445 |
| 427 // ---------------------------------------------------------------------------- | 446 // ---------------------------------------------------------------------------- |
| 428 // JavaScriptScanner - base logic for JavaScript scanning. | 447 // JavaScriptScanner - base logic for JavaScript scanning. |
| 429 | 448 |
| 430 class JavaScriptScanner : public Scanner { | 449 class JavaScriptScanner : public Scanner { |
| 431 public: | 450 public: |
| (...skipping 194 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 626 // keyword with the current prefix). | 645 // keyword with the current prefix). |
| 627 const char* keyword_; | 646 const char* keyword_; |
| 628 int counter_; | 647 int counter_; |
| 629 Token::Value keyword_token_; | 648 Token::Value keyword_token_; |
| 630 }; | 649 }; |
| 631 | 650 |
| 632 | 651 |
| 633 } } // namespace v8::internal | 652 } } // namespace v8::internal |
| 634 | 653 |
| 635 #endif // V8_SCANNER_BASE_H_ | 654 #endif // V8_SCANNER_BASE_H_ |
| OLD | NEW |