Chromium Code Reviews| Index: src/scanner.h |
| diff --git a/src/scanner.h b/src/scanner.h |
| index adeea9b23a00d50d7f07a9203dcc311fd50cebe4..d23d7c736697f2e8f1c75b3bf181ff6e446522d8 100644 |
| --- a/src/scanner.h |
| +++ b/src/scanner.h |
| @@ -35,67 +35,95 @@ |
| namespace v8 { |
| namespace internal { |
| -// UTF16 buffer to read characters from a character stream. |
| -class CharacterStreamUTF16Buffer: public UTF16Buffer { |
| +// A buffered character stream based on a random access character |
| +// source (ReadBlock can be called with pos_ pointing to any position, |
| +// even positions before the current). |
| +class BufferedUC16CharacterStream: public UC16CharacterStream { |
| public: |
| - CharacterStreamUTF16Buffer(); |
| - virtual ~CharacterStreamUTF16Buffer() {} |
| - void Initialize(Handle<String> data, |
| - unibrow::CharacterStream* stream, |
| - int start_position, |
| - int end_position); |
| - virtual void PushBack(uc32 ch); |
| - virtual uc32 Advance(); |
| - virtual void SeekForward(int pos); |
| - |
| - private: |
| - List<uc32> pushback_buffer_; |
| - uc32 last_; |
| - unibrow::CharacterStream* stream_; |
| - |
| - List<uc32>* pushback_buffer() { return &pushback_buffer_; } |
| + BufferedUC16CharacterStream(); |
| + virtual ~BufferedUC16CharacterStream(); |
| + |
| + virtual void PushBack(uc16 character); |
|
Erik Corry
2010/12/07 12:27:30
Missing blank line.
Lasse Reichstein
2010/12/07 14:05:54
Done.
|
| + protected: |
| + static const unsigned kBufferSize = 512; |
| + static const unsigned kPushBackStepSize = 16; |
| + |
| + virtual unsigned SlowSeekForward(unsigned delta); |
| + virtual bool ReadBlock(); |
| + virtual void SlowPushBack(uc16 character); |
| + |
| + virtual unsigned BufferSeekForward(unsigned delta) = 0; |
| + virtual unsigned FillBuffer(unsigned position, unsigned length) = 0; |
| + |
| + const uc16* pushback_limit_; |
| + uc16 buffer_[kBufferSize]; |
| }; |
| -// UTF16 buffer to read characters from an external string. |
| -template <typename StringType, typename CharType> |
| -class ExternalStringUTF16Buffer: public UTF16Buffer { |
| +// Generic string stream. |
| +class GenericStringUC16CharacterStream: public BufferedUC16CharacterStream { |
| + public: |
| + GenericStringUC16CharacterStream(Handle<String> data, |
| + unsigned start_position, |
| + unsigned end_position); |
| + virtual ~GenericStringUC16CharacterStream(); |
| + |
| + protected: |
| + virtual unsigned BufferSeekForward(unsigned delta); |
| + virtual unsigned FillBuffer(unsigned position, unsigned length); |
| + |
| + Handle<String> string_; |
| + unsigned start_position_; |
| + unsigned length_; |
| +}; |
| + |
| + |
| +// UC16 stream based on a literal UTF-8 string. |
| +class Utf8ToUC16CharacterStream: public BufferedUC16CharacterStream { |
| public: |
| - ExternalStringUTF16Buffer(); |
| - virtual ~ExternalStringUTF16Buffer() {} |
| - void Initialize(Handle<StringType> data, |
| - int start_position, |
| - int end_position); |
| - virtual void PushBack(uc32 ch); |
| - virtual uc32 Advance(); |
| - virtual void SeekForward(int pos); |
| - |
| - private: |
| - const CharType* raw_data_; // Pointer to the actual array of characters. |
| + Utf8ToUC16CharacterStream(const byte* data, unsigned length); |
| + virtual ~Utf8ToUC16CharacterStream(); |
|
Erik Corry
2010/12/07 12:27:30
Missing blank line
Lasse Reichstein
2010/12/07 14:05:54
Done.
|
| + protected: |
| + virtual unsigned BufferSeekForward(unsigned delta); |
| + virtual unsigned FillBuffer(unsigned char_position, unsigned length); |
| + void SetRawPosition(unsigned char_position); |
| + |
| + const byte* raw_data_; |
| + unsigned raw_data_length_; // Not the number of characters! |
|
Erik Corry
2010/12/07 12:27:30
Measured in bytes?
Lasse Reichstein
2010/12/07 14:05:54
Yes. Reworded to say so.
|
| + unsigned raw_data_pos_; |
| + // The character position of the character at raw_data[raw_data_pos_]. |
| + // Not necessarily the same as pos_. |
| + unsigned raw_character_position_; |
| }; |
| -// Initializes a UTF16Buffer as input stream, using one of a number |
| -// of strategies depending on the available character sources. |
| -class StreamInitializer { |
| +// UTF16 buffer to read characters from an external string. |
| +class ExternalTwoByteStringUC16CharacterStream: public UC16CharacterStream { |
| public: |
| - UTF16Buffer* Init(Handle<String> source, |
| - unibrow::CharacterStream* stream, |
| - int start_position, |
| - int end_position); |
| - private: |
| - // Different UTF16 buffers used to pull characters from. Based on input one of |
| - // these will be initialized as the actual data source. |
| - CharacterStreamUTF16Buffer char_stream_buffer_; |
| - ExternalStringUTF16Buffer<ExternalTwoByteString, uint16_t> |
| - two_byte_string_buffer_; |
| - ExternalStringUTF16Buffer<ExternalAsciiString, char> ascii_string_buffer_; |
| - |
| - // Used to convert the source string into a character stream when a stream |
| - // is not passed to the scanner. |
| - SafeStringInputBuffer safe_string_input_buffer_; |
| + ExternalTwoByteStringUC16CharacterStream(Handle<ExternalTwoByteString> data, |
| + int start_position, |
| + int end_position); |
| + virtual ~ExternalTwoByteStringUC16CharacterStream(); |
| + |
| + virtual void PushBack(uc16 character) { |
| + ASSERT(buffer_cursor_ > raw_data_); |
| + buffer_cursor_--; |
| + pos_--; |
| + } |
| + protected: |
| + virtual unsigned SlowSeekForward(unsigned delta) { |
| + // Fast case always handles seeking. |
| + return 0; |
| + } |
| + virtual bool ReadBlock() { |
| + // Entire string is read at start. |
| + return false; |
| + } |
| + Handle<ExternalTwoByteString> source_; |
| + const uc16* raw_data_; // Pointer to the actual array of characters. |
| }; |
| + |
| // ---------------------------------------------------------------------------- |
| // V8JavaScriptScanner |
| // JavaScript scanner getting its input from either a V8 String or a unicode |
| @@ -103,28 +131,17 @@ class StreamInitializer { |
| class V8JavaScriptScanner : public JavaScriptScanner { |
| public: |
| - V8JavaScriptScanner() {} |
| - |
| - // Initialize the Scanner to scan source. |
| - void Initialize(Handle<String> source, int literal_flags = kAllLiterals); |
| - void Initialize(Handle<String> source, |
| - unibrow::CharacterStream* stream, |
| + V8JavaScriptScanner(); |
| + void Initialize(UC16CharacterStream* source, |
| int literal_flags = kAllLiterals); |
| - void Initialize(Handle<String> source, |
| - int start_position, int end_position, |
| - int literal_flags = kAllLiterals); |
| - |
| - protected: |
| - StreamInitializer stream_initializer_; |
| }; |
| class JsonScanner : public Scanner { |
| public: |
| - JsonScanner(); |
| + explicit JsonScanner(); |
|
Erik Corry
2010/12/07 12:27:30
No explicit.
|
| - // Initialize the Scanner to scan source. |
| - void Initialize(Handle<String> source); |
| + void Initialize(UC16CharacterStream* source); |
| // Returns the next token. |
| Token::Value Next(); |
| @@ -138,7 +155,7 @@ class JsonScanner : public Scanner { |
| // Recognizes all of the single-character tokens directly, or calls a function |
| // to scan a number, string or identifier literal. |
| // The only allowed whitespace characters between tokens are tab, |
| - // carrige-return, newline and space. |
| + // carriage-return, newline and space. |
| void ScanJson(); |
| // A JSON number (production JSONNumber) is a subset of the valid JavaScript |
| @@ -159,60 +176,8 @@ class JsonScanner : public Scanner { |
| // are the only valid JSON identifiers (productions JSONBooleanLiteral, |
| // JSONNullLiteral). |
| Token::Value ScanJsonIdentifier(const char* text, Token::Value token); |
| - |
| - StreamInitializer stream_initializer_; |
| }; |
| - |
| -// ExternalStringUTF16Buffer |
| -template <typename StringType, typename CharType> |
| -ExternalStringUTF16Buffer<StringType, CharType>::ExternalStringUTF16Buffer() |
| - : raw_data_(NULL) { } |
| - |
| - |
| -template <typename StringType, typename CharType> |
| -void ExternalStringUTF16Buffer<StringType, CharType>::Initialize( |
| - Handle<StringType> data, |
| - int start_position, |
| - int end_position) { |
| - ASSERT(!data.is_null()); |
| - raw_data_ = data->resource()->data(); |
| - |
| - ASSERT(end_position <= data->length()); |
| - if (start_position > 0) { |
| - SeekForward(start_position); |
| - } |
| - end_ = |
| - end_position != kNoEndPosition ? end_position : data->length(); |
| -} |
| - |
| - |
| -template <typename StringType, typename CharType> |
| -uc32 ExternalStringUTF16Buffer<StringType, CharType>::Advance() { |
| - if (pos_ < end_) { |
| - return raw_data_[pos_++]; |
| - } else { |
| - // note: currently the following increment is necessary to avoid a |
| - // test-parser problem! |
| - pos_++; |
| - return static_cast<uc32>(-1); |
| - } |
| -} |
| - |
| - |
| -template <typename StringType, typename CharType> |
| -void ExternalStringUTF16Buffer<StringType, CharType>::PushBack(uc32 ch) { |
| - pos_--; |
| - ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize); |
| - ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch); |
| -} |
| - |
| - |
| -template <typename StringType, typename CharType> |
| -void ExternalStringUTF16Buffer<StringType, CharType>::SeekForward(int pos) { |
| - pos_ = pos; |
| -} |
| - |
| } } // namespace v8::internal |
| #endif // V8_SCANNER_H_ |