| OLD | NEW |
| 1 // Copyright 2010 the V8 project authors. All rights reserved. | 1 // Copyright 2010 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 17 matching lines...) Expand all Loading... |
| 28 #ifndef V8_SCANNER_H_ | 28 #ifndef V8_SCANNER_H_ |
| 29 #define V8_SCANNER_H_ | 29 #define V8_SCANNER_H_ |
| 30 | 30 |
| 31 #include "token.h" | 31 #include "token.h" |
| 32 #include "char-predicates-inl.h" | 32 #include "char-predicates-inl.h" |
| 33 #include "scanner-base.h" | 33 #include "scanner-base.h" |
| 34 | 34 |
| 35 namespace v8 { | 35 namespace v8 { |
| 36 namespace internal { | 36 namespace internal { |
| 37 | 37 |
| 38 // UTF16 buffer to read characters from a character stream. | 38 // A buffered character stream based on a random access character |
| 39 class CharacterStreamUTF16Buffer: public UTF16Buffer { | 39 // source (ReadBlock can be called with pos_ pointing to any position, |
| 40 // even positions before the current). |
| 41 class BufferedUC16CharacterStream: public UC16CharacterStream { |
| 40 public: | 42 public: |
| 41 CharacterStreamUTF16Buffer(); | 43 BufferedUC16CharacterStream(); |
| 42 virtual ~CharacterStreamUTF16Buffer() {} | 44 virtual ~BufferedUC16CharacterStream(); |
| 43 void Initialize(Handle<String> data, | |
| 44 unibrow::CharacterStream* stream, | |
| 45 int start_position, | |
| 46 int end_position); | |
| 47 virtual void PushBack(uc32 ch); | |
| 48 virtual uc32 Advance(); | |
| 49 virtual void SeekForward(int pos); | |
| 50 | 45 |
| 51 private: | 46 virtual void PushBack(uc16 character); |
| 52 List<uc32> pushback_buffer_; | |
| 53 uc32 last_; | |
| 54 unibrow::CharacterStream* stream_; | |
| 55 | 47 |
| 56 List<uc32>* pushback_buffer() { return &pushback_buffer_; } | 48 protected: |
| 49 static const unsigned kBufferSize = 512; |
| 50 static const unsigned kPushBackStepSize = 16; |
| 51 |
| 52 virtual unsigned SlowSeekForward(unsigned delta); |
| 53 virtual bool ReadBlock(); |
| 54 virtual void SlowPushBack(uc16 character); |
| 55 |
| 56 virtual unsigned BufferSeekForward(unsigned delta) = 0; |
| 57 virtual unsigned FillBuffer(unsigned position, unsigned length) = 0; |
| 58 |
| 59 const uc16* pushback_limit_; |
| 60 uc16 buffer_[kBufferSize]; |
| 61 }; |
| 62 |
| 63 |
| 64 // Generic string stream. |
| 65 class GenericStringUC16CharacterStream: public BufferedUC16CharacterStream { |
| 66 public: |
| 67 GenericStringUC16CharacterStream(Handle<String> data, |
| 68 unsigned start_position, |
| 69 unsigned end_position); |
| 70 virtual ~GenericStringUC16CharacterStream(); |
| 71 |
| 72 protected: |
| 73 virtual unsigned BufferSeekForward(unsigned delta); |
| 74 virtual unsigned FillBuffer(unsigned position, unsigned length); |
| 75 |
| 76 Handle<String> string_; |
| 77 unsigned start_position_; |
| 78 unsigned length_; |
| 79 }; |
| 80 |
| 81 |
| 82 // UC16 stream based on a literal UTF-8 string. |
| 83 class Utf8ToUC16CharacterStream: public BufferedUC16CharacterStream { |
| 84 public: |
| 85 Utf8ToUC16CharacterStream(const byte* data, unsigned length); |
| 86 virtual ~Utf8ToUC16CharacterStream(); |
| 87 |
| 88 protected: |
| 89 virtual unsigned BufferSeekForward(unsigned delta); |
| 90 virtual unsigned FillBuffer(unsigned char_position, unsigned length); |
| 91 void SetRawPosition(unsigned char_position); |
| 92 |
| 93 const byte* raw_data_; |
| 94 unsigned raw_data_length_; // Measured in bytes, not characters. |
| 95 unsigned raw_data_pos_; |
| 96 // The character position of the character at raw_data[raw_data_pos_]. |
| 97 // Not necessarily the same as pos_. |
| 98 unsigned raw_character_position_; |
| 57 }; | 99 }; |
| 58 | 100 |
| 59 | 101 |
| 60 // UTF16 buffer to read characters from an external string. | 102 // UTF16 buffer to read characters from an external string. |
| 61 template <typename StringType, typename CharType> | 103 class ExternalTwoByteStringUC16CharacterStream: public UC16CharacterStream { |
| 62 class ExternalStringUTF16Buffer: public UTF16Buffer { | |
| 63 public: | 104 public: |
| 64 ExternalStringUTF16Buffer(); | 105 ExternalTwoByteStringUC16CharacterStream(Handle<ExternalTwoByteString> data, |
| 65 virtual ~ExternalStringUTF16Buffer() {} | 106 int start_position, |
| 66 void Initialize(Handle<StringType> data, | 107 int end_position); |
| 67 int start_position, | 108 virtual ~ExternalTwoByteStringUC16CharacterStream(); |
| 68 int end_position); | |
| 69 virtual void PushBack(uc32 ch); | |
| 70 virtual uc32 Advance(); | |
| 71 virtual void SeekForward(int pos); | |
| 72 | 109 |
| 73 private: | 110 virtual void PushBack(uc16 character) { |
| 74 const CharType* raw_data_; // Pointer to the actual array of characters. | 111 ASSERT(buffer_cursor_ > raw_data_); |
| 112 buffer_cursor_--; |
| 113 pos_--; |
| 114 } |
| 115 protected: |
| 116 virtual unsigned SlowSeekForward(unsigned delta) { |
| 117 // Fast case always handles seeking. |
| 118 return 0; |
| 119 } |
| 120 virtual bool ReadBlock() { |
| 121 // Entire string is read at start. |
| 122 return false; |
| 123 } |
| 124 Handle<ExternalTwoByteString> source_; |
| 125 const uc16* raw_data_; // Pointer to the actual array of characters. |
| 75 }; | 126 }; |
| 76 | 127 |
| 77 | 128 |
| 78 // Initializes a UTF16Buffer as input stream, using one of a number | |
| 79 // of strategies depending on the available character sources. | |
| 80 class StreamInitializer { | |
| 81 public: | |
| 82 UTF16Buffer* Init(Handle<String> source, | |
| 83 unibrow::CharacterStream* stream, | |
| 84 int start_position, | |
| 85 int end_position); | |
| 86 private: | |
| 87 // Different UTF16 buffers used to pull characters from. Based on input one of | |
| 88 // these will be initialized as the actual data source. | |
| 89 CharacterStreamUTF16Buffer char_stream_buffer_; | |
| 90 ExternalStringUTF16Buffer<ExternalTwoByteString, uint16_t> | |
| 91 two_byte_string_buffer_; | |
| 92 ExternalStringUTF16Buffer<ExternalAsciiString, char> ascii_string_buffer_; | |
| 93 | |
| 94 // Used to convert the source string into a character stream when a stream | |
| 95 // is not passed to the scanner. | |
| 96 SafeStringInputBuffer safe_string_input_buffer_; | |
| 97 }; | |
| 98 | |
| 99 // ---------------------------------------------------------------------------- | 129 // ---------------------------------------------------------------------------- |
| 100 // V8JavaScriptScanner | 130 // V8JavaScriptScanner |
| 101 // JavaScript scanner getting its input from either a V8 String or a unicode | 131 // JavaScript scanner getting its input from either a V8 String or a unicode |
| 102 // CharacterStream. | 132 // CharacterStream. |
| 103 | 133 |
| 104 class V8JavaScriptScanner : public JavaScriptScanner { | 134 class V8JavaScriptScanner : public JavaScriptScanner { |
| 105 public: | 135 public: |
| 106 explicit V8JavaScriptScanner(Isolate* isolate) | 136 explicit V8JavaScriptScanner(Isolate* isolate) |
| 107 : JavaScriptScanner(isolate->scanner_constants()) {} | 137 : JavaScriptScanner(isolate) {} |
| 108 | 138 |
| 109 // Initialize the Scanner to scan source. | 139 void Initialize(UC16CharacterStream* source, |
| 110 void Initialize(Handle<String> source, int literal_flags = kAllLiterals); | |
| 111 void Initialize(Handle<String> source, | |
| 112 unibrow::CharacterStream* stream, | |
| 113 int literal_flags = kAllLiterals); | 140 int literal_flags = kAllLiterals); |
| 114 void Initialize(Handle<String> source, | |
| 115 int start_position, int end_position, | |
| 116 int literal_flags = kAllLiterals); | |
| 117 | |
| 118 protected: | |
| 119 StreamInitializer stream_initializer_; | |
| 120 }; | 141 }; |
| 121 | 142 |
| 122 | 143 |
| 123 class JsonScanner : public Scanner { | 144 class JsonScanner : public Scanner { |
| 124 public: | 145 public: |
| 125 JsonScanner(); | 146 JsonScanner(Isolate* isolate); |
| 126 | 147 |
| 127 // Initialize the Scanner to scan source. | 148 void Initialize(UC16CharacterStream* source); |
| 128 void Initialize(Handle<String> source); | |
| 129 | 149 |
| 130 // Returns the next token. | 150 // Returns the next token. |
| 131 Token::Value Next(); | 151 Token::Value Next(); |
| 132 | 152 |
| 133 protected: | 153 protected: |
| 134 // Skip past JSON whitespace (only space, tab, newline and carrige-return). | 154 // Skip past JSON whitespace (only space, tab, newline and carrige-return). |
| 135 bool SkipJsonWhiteSpace(); | 155 bool SkipJsonWhiteSpace(); |
| 136 | 156 |
| 137 // Scan a single JSON token. The JSON lexical grammar is specified in the | 157 // Scan a single JSON token. The JSON lexical grammar is specified in the |
| 138 // ECMAScript 5 standard, section 15.12.1.1. | 158 // ECMAScript 5 standard, section 15.12.1.1. |
| 139 // Recognizes all of the single-character tokens directly, or calls a function | 159 // Recognizes all of the single-character tokens directly, or calls a function |
| 140 // to scan a number, string or identifier literal. | 160 // to scan a number, string or identifier literal. |
| 141 // The only allowed whitespace characters between tokens are tab, | 161 // The only allowed whitespace characters between tokens are tab, |
| 142 // carrige-return, newline and space. | 162 // carriage-return, newline and space. |
| 143 void ScanJson(); | 163 void ScanJson(); |
| 144 | 164 |
| 145 // A JSON number (production JSONNumber) is a subset of the valid JavaScript | 165 // A JSON number (production JSONNumber) is a subset of the valid JavaScript |
| 146 // decimal number literals. | 166 // decimal number literals. |
| 147 // It includes an optional minus sign, must have at least one | 167 // It includes an optional minus sign, must have at least one |
| 148 // digit before and after a decimal point, may not have prefixed zeros (unless | 168 // digit before and after a decimal point, may not have prefixed zeros (unless |
| 149 // the integer part is zero), and may include an exponent part (e.g., "e-10"). | 169 // the integer part is zero), and may include an exponent part (e.g., "e-10"). |
| 150 // Hexadecimal and octal numbers are not allowed. | 170 // Hexadecimal and octal numbers are not allowed. |
| 151 Token::Value ScanJsonNumber(); | 171 Token::Value ScanJsonNumber(); |
| 152 | 172 |
| 153 // A JSON string (production JSONString) is subset of valid JavaScript string | 173 // A JSON string (production JSONString) is subset of valid JavaScript string |
| 154 // literals. The string must only be double-quoted (not single-quoted), and | 174 // literals. The string must only be double-quoted (not single-quoted), and |
| 155 // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and | 175 // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and |
| 156 // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid. | 176 // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid. |
| 157 Token::Value ScanJsonString(); | 177 Token::Value ScanJsonString(); |
| 158 | 178 |
| 159 // Used to recognizes one of the literals "true", "false", or "null". These | 179 // Used to recognizes one of the literals "true", "false", or "null". These |
| 160 // are the only valid JSON identifiers (productions JSONBooleanLiteral, | 180 // are the only valid JSON identifiers (productions JSONBooleanLiteral, |
| 161 // JSONNullLiteral). | 181 // JSONNullLiteral). |
| 162 Token::Value ScanJsonIdentifier(const char* text, Token::Value token); | 182 Token::Value ScanJsonIdentifier(const char* text, Token::Value token); |
| 163 | |
| 164 StreamInitializer stream_initializer_; | |
| 165 }; | 183 }; |
| 166 | 184 |
| 167 | |
| 168 // ExternalStringUTF16Buffer | |
| 169 template <typename StringType, typename CharType> | |
| 170 ExternalStringUTF16Buffer<StringType, CharType>::ExternalStringUTF16Buffer() | |
| 171 : raw_data_(NULL) { } | |
| 172 | |
| 173 | |
| 174 template <typename StringType, typename CharType> | |
| 175 void ExternalStringUTF16Buffer<StringType, CharType>::Initialize( | |
| 176 Handle<StringType> data, | |
| 177 int start_position, | |
| 178 int end_position) { | |
| 179 ASSERT(!data.is_null()); | |
| 180 raw_data_ = data->resource()->data(); | |
| 181 | |
| 182 ASSERT(end_position <= data->length()); | |
| 183 if (start_position > 0) { | |
| 184 SeekForward(start_position); | |
| 185 } | |
| 186 end_ = | |
| 187 end_position != kNoEndPosition ? end_position : data->length(); | |
| 188 } | |
| 189 | |
| 190 | |
| 191 template <typename StringType, typename CharType> | |
| 192 uc32 ExternalStringUTF16Buffer<StringType, CharType>::Advance() { | |
| 193 if (pos_ < end_) { | |
| 194 return raw_data_[pos_++]; | |
| 195 } else { | |
| 196 // note: currently the following increment is necessary to avoid a | |
| 197 // test-parser problem! | |
| 198 pos_++; | |
| 199 return static_cast<uc32>(-1); | |
| 200 } | |
| 201 } | |
| 202 | |
| 203 | |
| 204 template <typename StringType, typename CharType> | |
| 205 void ExternalStringUTF16Buffer<StringType, CharType>::PushBack(uc32 ch) { | |
| 206 pos_--; | |
| 207 ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize); | |
| 208 ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch); | |
| 209 } | |
| 210 | |
| 211 | |
| 212 template <typename StringType, typename CharType> | |
| 213 void ExternalStringUTF16Buffer<StringType, CharType>::SeekForward(int pos) { | |
| 214 pos_ = pos; | |
| 215 } | |
| 216 | |
| 217 } } // namespace v8::internal | 185 } } // namespace v8::internal |
| 218 | 186 |
| 219 #endif // V8_SCANNER_H_ | 187 #endif // V8_SCANNER_H_ |
| OLD | NEW |