OLD | NEW |
1 // Copyright 2010 the V8 project authors. All rights reserved. | 1 // Copyright 2010 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 17 matching lines...) Expand all Loading... |
28 #ifndef V8_SCANNER_H_ | 28 #ifndef V8_SCANNER_H_ |
29 #define V8_SCANNER_H_ | 29 #define V8_SCANNER_H_ |
30 | 30 |
31 #include "token.h" | 31 #include "token.h" |
32 #include "char-predicates-inl.h" | 32 #include "char-predicates-inl.h" |
33 #include "scanner-base.h" | 33 #include "scanner-base.h" |
34 | 34 |
35 namespace v8 { | 35 namespace v8 { |
36 namespace internal { | 36 namespace internal { |
37 | 37 |
38 // A buffered character stream based on a random access character | 38 // UTF16 buffer to read characters from a character stream. |
39 // source (ReadBlock can be called with pos_ pointing to any position, | 39 class CharacterStreamUTF16Buffer: public UTF16Buffer { |
40 // even positions before the current). | |
41 class BufferedUC16CharacterStream: public UC16CharacterStream { | |
42 public: | 40 public: |
43 BufferedUC16CharacterStream(); | 41 CharacterStreamUTF16Buffer(); |
44 virtual ~BufferedUC16CharacterStream(); | 42 virtual ~CharacterStreamUTF16Buffer() {} |
| 43 void Initialize(Handle<String> data, |
| 44 unibrow::CharacterStream* stream, |
| 45 int start_position, |
| 46 int end_position); |
| 47 virtual void PushBack(uc32 ch); |
| 48 virtual uc32 Advance(); |
| 49 virtual void SeekForward(int pos); |
45 | 50 |
46 virtual void PushBack(uc16 character); | 51 private: |
| 52 List<uc32> pushback_buffer_; |
| 53 uc32 last_; |
| 54 unibrow::CharacterStream* stream_; |
47 | 55 |
48 protected: | 56 List<uc32>* pushback_buffer() { return &pushback_buffer_; } |
49 static const unsigned kBufferSize = 512; | |
50 static const unsigned kPushBackStepSize = 16; | |
51 | |
52 virtual unsigned SlowSeekForward(unsigned delta); | |
53 virtual bool ReadBlock(); | |
54 virtual void SlowPushBack(uc16 character); | |
55 | |
56 virtual unsigned BufferSeekForward(unsigned delta) = 0; | |
57 virtual unsigned FillBuffer(unsigned position, unsigned length) = 0; | |
58 | |
59 const uc16* pushback_limit_; | |
60 uc16 buffer_[kBufferSize]; | |
61 }; | |
62 | |
63 | |
64 // Generic string stream. | |
65 class GenericStringUC16CharacterStream: public BufferedUC16CharacterStream { | |
66 public: | |
67 GenericStringUC16CharacterStream(Handle<String> data, | |
68 unsigned start_position, | |
69 unsigned end_position); | |
70 virtual ~GenericStringUC16CharacterStream(); | |
71 | |
72 protected: | |
73 virtual unsigned BufferSeekForward(unsigned delta); | |
74 virtual unsigned FillBuffer(unsigned position, unsigned length); | |
75 | |
76 Handle<String> string_; | |
77 unsigned start_position_; | |
78 unsigned length_; | |
79 }; | |
80 | |
81 | |
82 // UC16 stream based on a literal UTF-8 string. | |
83 class Utf8ToUC16CharacterStream: public BufferedUC16CharacterStream { | |
84 public: | |
85 Utf8ToUC16CharacterStream(const byte* data, unsigned length); | |
86 virtual ~Utf8ToUC16CharacterStream(); | |
87 | |
88 protected: | |
89 virtual unsigned BufferSeekForward(unsigned delta); | |
90 virtual unsigned FillBuffer(unsigned char_position, unsigned length); | |
91 void SetRawPosition(unsigned char_position); | |
92 | |
93 const byte* raw_data_; | |
94 unsigned raw_data_length_; // Measured in bytes, not characters. | |
95 unsigned raw_data_pos_; | |
96 // The character position of the character at raw_data[raw_data_pos_]. | |
97 // Not necessarily the same as pos_. | |
98 unsigned raw_character_position_; | |
99 }; | 57 }; |
100 | 58 |
101 | 59 |
102 // UTF16 buffer to read characters from an external string. | 60 // UTF16 buffer to read characters from an external string. |
103 class ExternalTwoByteStringUC16CharacterStream: public UC16CharacterStream { | 61 template <typename StringType, typename CharType> |
| 62 class ExternalStringUTF16Buffer: public UTF16Buffer { |
104 public: | 63 public: |
105 ExternalTwoByteStringUC16CharacterStream(Handle<ExternalTwoByteString> data, | 64 ExternalStringUTF16Buffer(); |
106 int start_position, | 65 virtual ~ExternalStringUTF16Buffer() {} |
107 int end_position); | 66 void Initialize(Handle<StringType> data, |
108 virtual ~ExternalTwoByteStringUC16CharacterStream(); | 67 int start_position, |
| 68 int end_position); |
| 69 virtual void PushBack(uc32 ch); |
| 70 virtual uc32 Advance(); |
| 71 virtual void SeekForward(int pos); |
109 | 72 |
110 virtual void PushBack(uc16 character) { | 73 private: |
111 ASSERT(buffer_cursor_ > raw_data_); | 74 const CharType* raw_data_; // Pointer to the actual array of characters. |
112 buffer_cursor_--; | |
113 pos_--; | |
114 } | |
115 protected: | |
116 virtual unsigned SlowSeekForward(unsigned delta) { | |
117 // Fast case always handles seeking. | |
118 return 0; | |
119 } | |
120 virtual bool ReadBlock() { | |
121 // Entire string is read at start. | |
122 return false; | |
123 } | |
124 Handle<ExternalTwoByteString> source_; | |
125 const uc16* raw_data_; // Pointer to the actual array of characters. | |
126 }; | 75 }; |
127 | 76 |
128 | 77 |
| 78 // Initializes a UTF16Buffer as input stream, using one of a number |
| 79 // of strategies depending on the available character sources. |
| 80 class StreamInitializer { |
| 81 public: |
| 82 UTF16Buffer* Init(Handle<String> source, |
| 83 unibrow::CharacterStream* stream, |
| 84 int start_position, |
| 85 int end_position); |
| 86 private: |
| 87 // Different UTF16 buffers used to pull characters from. Based on input one of |
| 88 // these will be initialized as the actual data source. |
| 89 CharacterStreamUTF16Buffer char_stream_buffer_; |
| 90 ExternalStringUTF16Buffer<ExternalTwoByteString, uint16_t> |
| 91 two_byte_string_buffer_; |
| 92 ExternalStringUTF16Buffer<ExternalAsciiString, char> ascii_string_buffer_; |
| 93 |
| 94 // Used to convert the source string into a character stream when a stream |
| 95 // is not passed to the scanner. |
| 96 SafeStringInputBuffer safe_string_input_buffer_; |
| 97 }; |
| 98 |
129 // ---------------------------------------------------------------------------- | 99 // ---------------------------------------------------------------------------- |
130 // V8JavaScriptScanner | 100 // V8JavaScriptScanner |
131 // JavaScript scanner getting its input from either a V8 String or a unicode | 101 // JavaScript scanner getting its input from either a V8 String or a unicode |
132 // CharacterStream. | 102 // CharacterStream. |
133 | 103 |
134 class V8JavaScriptScanner : public JavaScriptScanner { | 104 class V8JavaScriptScanner : public JavaScriptScanner { |
135 public: | 105 public: |
136 V8JavaScriptScanner(); | 106 V8JavaScriptScanner() {} |
137 void Initialize(UC16CharacterStream* source, | 107 |
| 108 // Initialize the Scanner to scan source. |
| 109 void Initialize(Handle<String> source, int literal_flags = kAllLiterals); |
| 110 void Initialize(Handle<String> source, |
| 111 unibrow::CharacterStream* stream, |
138 int literal_flags = kAllLiterals); | 112 int literal_flags = kAllLiterals); |
| 113 void Initialize(Handle<String> source, |
| 114 int start_position, int end_position, |
| 115 int literal_flags = kAllLiterals); |
| 116 |
| 117 protected: |
| 118 StreamInitializer stream_initializer_; |
139 }; | 119 }; |
140 | 120 |
141 | 121 |
142 class JsonScanner : public Scanner { | 122 class JsonScanner : public Scanner { |
143 public: | 123 public: |
144 JsonScanner(); | 124 JsonScanner(); |
145 | 125 |
146 void Initialize(UC16CharacterStream* source); | 126 // Initialize the Scanner to scan source. |
| 127 void Initialize(Handle<String> source); |
147 | 128 |
148 // Returns the next token. | 129 // Returns the next token. |
149 Token::Value Next(); | 130 Token::Value Next(); |
150 | 131 |
151 protected: | 132 protected: |
152 // Skip past JSON whitespace (only space, tab, newline and carrige-return). | 133 // Skip past JSON whitespace (only space, tab, newline and carrige-return). |
153 bool SkipJsonWhiteSpace(); | 134 bool SkipJsonWhiteSpace(); |
154 | 135 |
155 // Scan a single JSON token. The JSON lexical grammar is specified in the | 136 // Scan a single JSON token. The JSON lexical grammar is specified in the |
156 // ECMAScript 5 standard, section 15.12.1.1. | 137 // ECMAScript 5 standard, section 15.12.1.1. |
157 // Recognizes all of the single-character tokens directly, or calls a function | 138 // Recognizes all of the single-character tokens directly, or calls a function |
158 // to scan a number, string or identifier literal. | 139 // to scan a number, string or identifier literal. |
159 // The only allowed whitespace characters between tokens are tab, | 140 // The only allowed whitespace characters between tokens are tab, |
160 // carriage-return, newline and space. | 141 // carrige-return, newline and space. |
161 void ScanJson(); | 142 void ScanJson(); |
162 | 143 |
163 // A JSON number (production JSONNumber) is a subset of the valid JavaScript | 144 // A JSON number (production JSONNumber) is a subset of the valid JavaScript |
164 // decimal number literals. | 145 // decimal number literals. |
165 // It includes an optional minus sign, must have at least one | 146 // It includes an optional minus sign, must have at least one |
166 // digit before and after a decimal point, may not have prefixed zeros (unless | 147 // digit before and after a decimal point, may not have prefixed zeros (unless |
167 // the integer part is zero), and may include an exponent part (e.g., "e-10"). | 148 // the integer part is zero), and may include an exponent part (e.g., "e-10"). |
168 // Hexadecimal and octal numbers are not allowed. | 149 // Hexadecimal and octal numbers are not allowed. |
169 Token::Value ScanJsonNumber(); | 150 Token::Value ScanJsonNumber(); |
170 | 151 |
171 // A JSON string (production JSONString) is subset of valid JavaScript string | 152 // A JSON string (production JSONString) is subset of valid JavaScript string |
172 // literals. The string must only be double-quoted (not single-quoted), and | 153 // literals. The string must only be double-quoted (not single-quoted), and |
173 // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and | 154 // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and |
174 // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid. | 155 // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid. |
175 Token::Value ScanJsonString(); | 156 Token::Value ScanJsonString(); |
176 | 157 |
177 // Used to recognizes one of the literals "true", "false", or "null". These | 158 // Used to recognizes one of the literals "true", "false", or "null". These |
178 // are the only valid JSON identifiers (productions JSONBooleanLiteral, | 159 // are the only valid JSON identifiers (productions JSONBooleanLiteral, |
179 // JSONNullLiteral). | 160 // JSONNullLiteral). |
180 Token::Value ScanJsonIdentifier(const char* text, Token::Value token); | 161 Token::Value ScanJsonIdentifier(const char* text, Token::Value token); |
| 162 |
| 163 StreamInitializer stream_initializer_; |
181 }; | 164 }; |
182 | 165 |
| 166 |
| 167 // ExternalStringUTF16Buffer |
| 168 template <typename StringType, typename CharType> |
| 169 ExternalStringUTF16Buffer<StringType, CharType>::ExternalStringUTF16Buffer() |
| 170 : raw_data_(NULL) { } |
| 171 |
| 172 |
| 173 template <typename StringType, typename CharType> |
| 174 void ExternalStringUTF16Buffer<StringType, CharType>::Initialize( |
| 175 Handle<StringType> data, |
| 176 int start_position, |
| 177 int end_position) { |
| 178 ASSERT(!data.is_null()); |
| 179 raw_data_ = data->resource()->data(); |
| 180 |
| 181 ASSERT(end_position <= data->length()); |
| 182 if (start_position > 0) { |
| 183 SeekForward(start_position); |
| 184 } |
| 185 end_ = |
| 186 end_position != kNoEndPosition ? end_position : data->length(); |
| 187 } |
| 188 |
| 189 |
| 190 template <typename StringType, typename CharType> |
| 191 uc32 ExternalStringUTF16Buffer<StringType, CharType>::Advance() { |
| 192 if (pos_ < end_) { |
| 193 return raw_data_[pos_++]; |
| 194 } else { |
| 195 // note: currently the following increment is necessary to avoid a |
| 196 // test-parser problem! |
| 197 pos_++; |
| 198 return static_cast<uc32>(-1); |
| 199 } |
| 200 } |
| 201 |
| 202 |
| 203 template <typename StringType, typename CharType> |
| 204 void ExternalStringUTF16Buffer<StringType, CharType>::PushBack(uc32 ch) { |
| 205 pos_--; |
| 206 ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize); |
| 207 ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch); |
| 208 } |
| 209 |
| 210 |
| 211 template <typename StringType, typename CharType> |
| 212 void ExternalStringUTF16Buffer<StringType, CharType>::SeekForward(int pos) { |
| 213 pos_ = pos; |
| 214 } |
| 215 |
183 } } // namespace v8::internal | 216 } } // namespace v8::internal |
184 | 217 |
185 #endif // V8_SCANNER_H_ | 218 #endif // V8_SCANNER_H_ |
OLD | NEW |