OLD | NEW |
---|---|
1 // Copyright 2010 the V8 project authors. All rights reserved. | 1 // Copyright 2010 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 17 matching lines...) Expand all Loading... | |
28 #ifndef V8_SCANNER_H_ | 28 #ifndef V8_SCANNER_H_ |
29 #define V8_SCANNER_H_ | 29 #define V8_SCANNER_H_ |
30 | 30 |
31 #include "token.h" | 31 #include "token.h" |
32 #include "char-predicates-inl.h" | 32 #include "char-predicates-inl.h" |
33 #include "scanner-base.h" | 33 #include "scanner-base.h" |
34 | 34 |
35 namespace v8 { | 35 namespace v8 { |
36 namespace internal { | 36 namespace internal { |
37 | 37 |
38 // UTF16 buffer to read characters from a character stream. | 38 // A buffered character stream based on a random access character |
39 class CharacterStreamUTF16Buffer: public UTF16Buffer { | 39 // source (ReadBlock can be called with pos_ pointing to any position, |
40 // even positions before the current). | |
41 class BufferedUC16CharacterStream: public UC16CharacterStream { | |
40 public: | 42 public: |
41 CharacterStreamUTF16Buffer(); | 43 BufferedUC16CharacterStream(); |
42 virtual ~CharacterStreamUTF16Buffer() {} | 44 virtual ~BufferedUC16CharacterStream(); |
43 void Initialize(Handle<String> data, | |
44 unibrow::CharacterStream* stream, | |
45 int start_position, | |
46 int end_position); | |
47 virtual void PushBack(uc32 ch); | |
48 virtual uc32 Advance(); | |
49 virtual void SeekForward(int pos); | |
50 | 45 |
51 private: | 46 virtual void PushBack(uc16 character); |
Erik Corry
2010/12/07 12:27:30
Missing blank line.
Lasse Reichstein
2010/12/07 14:05:54
Done.
| |
52 List<uc32> pushback_buffer_; | 47 protected: |
53 uc32 last_; | 48 static const unsigned kBufferSize = 512; |
54 unibrow::CharacterStream* stream_; | 49 static const unsigned kPushBackStepSize = 16; |
55 | 50 |
56 List<uc32>* pushback_buffer() { return &pushback_buffer_; } | 51 virtual unsigned SlowSeekForward(unsigned delta); |
52 virtual bool ReadBlock(); | |
53 virtual void SlowPushBack(uc16 character); | |
54 | |
55 virtual unsigned BufferSeekForward(unsigned delta) = 0; | |
56 virtual unsigned FillBuffer(unsigned position, unsigned length) = 0; | |
57 | |
58 const uc16* pushback_limit_; | |
59 uc16 buffer_[kBufferSize]; | |
60 }; | |
61 | |
62 | |
63 // Generic string stream. | |
64 class GenericStringUC16CharacterStream: public BufferedUC16CharacterStream { | |
65 public: | |
66 GenericStringUC16CharacterStream(Handle<String> data, | |
67 unsigned start_position, | |
68 unsigned end_position); | |
69 virtual ~GenericStringUC16CharacterStream(); | |
70 | |
71 protected: | |
72 virtual unsigned BufferSeekForward(unsigned delta); | |
73 virtual unsigned FillBuffer(unsigned position, unsigned length); | |
74 | |
75 Handle<String> string_; | |
76 unsigned start_position_; | |
77 unsigned length_; | |
78 }; | |
79 | |
80 | |
81 // UC16 stream based on a literal UTF-8 string. | |
82 class Utf8ToUC16CharacterStream: public BufferedUC16CharacterStream { | |
83 public: | |
84 Utf8ToUC16CharacterStream(const byte* data, unsigned length); | |
85 virtual ~Utf8ToUC16CharacterStream(); | |
Erik Corry
2010/12/07 12:27:30
Missing blank line
Lasse Reichstein
2010/12/07 14:05:54
Done.
| |
86 protected: | |
87 virtual unsigned BufferSeekForward(unsigned delta); | |
88 virtual unsigned FillBuffer(unsigned char_position, unsigned length); | |
89 void SetRawPosition(unsigned char_position); | |
90 | |
91 const byte* raw_data_; | |
92 unsigned raw_data_length_; // Not the number of characters! | |
Erik Corry
2010/12/07 12:27:30
Measured in bytes?
Lasse Reichstein
2010/12/07 14:05:54
Yes. Reworded to say so.
| |
93 unsigned raw_data_pos_; | |
94 // The character position of the character at raw_data[raw_data_pos_]. | |
95 // Not necessarily the same as pos_. | |
96 unsigned raw_character_position_; | |
57 }; | 97 }; |
58 | 98 |
59 | 99 |
60 // UTF16 buffer to read characters from an external string. | 100 // UTF16 buffer to read characters from an external string. |
61 template <typename StringType, typename CharType> | 101 class ExternalTwoByteStringUC16CharacterStream: public UC16CharacterStream { |
62 class ExternalStringUTF16Buffer: public UTF16Buffer { | |
63 public: | 102 public: |
64 ExternalStringUTF16Buffer(); | 103 ExternalTwoByteStringUC16CharacterStream(Handle<ExternalTwoByteString> data, |
65 virtual ~ExternalStringUTF16Buffer() {} | 104 int start_position, |
66 void Initialize(Handle<StringType> data, | 105 int end_position); |
67 int start_position, | 106 virtual ~ExternalTwoByteStringUC16CharacterStream(); |
68 int end_position); | |
69 virtual void PushBack(uc32 ch); | |
70 virtual uc32 Advance(); | |
71 virtual void SeekForward(int pos); | |
72 | 107 |
73 private: | 108 virtual void PushBack(uc16 character) { |
74 const CharType* raw_data_; // Pointer to the actual array of characters. | 109 ASSERT(buffer_cursor_ > raw_data_); |
110 buffer_cursor_--; | |
111 pos_--; | |
112 } | |
113 protected: | |
114 virtual unsigned SlowSeekForward(unsigned delta) { | |
115 // Fast case always handles seeking. | |
116 return 0; | |
117 } | |
118 virtual bool ReadBlock() { | |
119 // Entire string is read at start. | |
120 return false; | |
121 } | |
122 Handle<ExternalTwoByteString> source_; | |
123 const uc16* raw_data_; // Pointer to the actual array of characters. | |
75 }; | 124 }; |
76 | 125 |
77 | 126 |
78 // Initializes a UTF16Buffer as input stream, using one of a number | |
79 // of strategies depending on the available character sources. | |
80 class StreamInitializer { | |
81 public: | |
82 UTF16Buffer* Init(Handle<String> source, | |
83 unibrow::CharacterStream* stream, | |
84 int start_position, | |
85 int end_position); | |
86 private: | |
87 // Different UTF16 buffers used to pull characters from. Based on input one of | |
88 // these will be initialized as the actual data source. | |
89 CharacterStreamUTF16Buffer char_stream_buffer_; | |
90 ExternalStringUTF16Buffer<ExternalTwoByteString, uint16_t> | |
91 two_byte_string_buffer_; | |
92 ExternalStringUTF16Buffer<ExternalAsciiString, char> ascii_string_buffer_; | |
93 | |
94 // Used to convert the source string into a character stream when a stream | |
95 // is not passed to the scanner. | |
96 SafeStringInputBuffer safe_string_input_buffer_; | |
97 }; | |
98 | |
99 // ---------------------------------------------------------------------------- | 127 // ---------------------------------------------------------------------------- |
100 // V8JavaScriptScanner | 128 // V8JavaScriptScanner |
101 // JavaScript scanner getting its input from either a V8 String or a unicode | 129 // JavaScript scanner getting its input from either a V8 String or a unicode |
102 // CharacterStream. | 130 // CharacterStream. |
103 | 131 |
104 class V8JavaScriptScanner : public JavaScriptScanner { | 132 class V8JavaScriptScanner : public JavaScriptScanner { |
105 public: | 133 public: |
106 V8JavaScriptScanner() {} | 134 V8JavaScriptScanner(); |
107 | 135 void Initialize(UC16CharacterStream* source, |
108 // Initialize the Scanner to scan source. | |
109 void Initialize(Handle<String> source, int literal_flags = kAllLiterals); | |
110 void Initialize(Handle<String> source, | |
111 unibrow::CharacterStream* stream, | |
112 int literal_flags = kAllLiterals); | 136 int literal_flags = kAllLiterals); |
113 void Initialize(Handle<String> source, | |
114 int start_position, int end_position, | |
115 int literal_flags = kAllLiterals); | |
116 | |
117 protected: | |
118 StreamInitializer stream_initializer_; | |
119 }; | 137 }; |
120 | 138 |
121 | 139 |
122 class JsonScanner : public Scanner { | 140 class JsonScanner : public Scanner { |
123 public: | 141 public: |
124 JsonScanner(); | 142 explicit JsonScanner(); |
Erik Corry
2010/12/07 12:27:30
No explicit.
| |
125 | 143 |
126 // Initialize the Scanner to scan source. | 144 void Initialize(UC16CharacterStream* source); |
127 void Initialize(Handle<String> source); | |
128 | 145 |
129 // Returns the next token. | 146 // Returns the next token. |
130 Token::Value Next(); | 147 Token::Value Next(); |
131 | 148 |
132 protected: | 149 protected: |
133 // Skip past JSON whitespace (only space, tab, newline and carrige-return). | 150 // Skip past JSON whitespace (only space, tab, newline and carrige-return). |
134 bool SkipJsonWhiteSpace(); | 151 bool SkipJsonWhiteSpace(); |
135 | 152 |
136 // Scan a single JSON token. The JSON lexical grammar is specified in the | 153 // Scan a single JSON token. The JSON lexical grammar is specified in the |
137 // ECMAScript 5 standard, section 15.12.1.1. | 154 // ECMAScript 5 standard, section 15.12.1.1. |
138 // Recognizes all of the single-character tokens directly, or calls a function | 155 // Recognizes all of the single-character tokens directly, or calls a function |
139 // to scan a number, string or identifier literal. | 156 // to scan a number, string or identifier literal. |
140 // The only allowed whitespace characters between tokens are tab, | 157 // The only allowed whitespace characters between tokens are tab, |
141 // carrige-return, newline and space. | 158 // carriage-return, newline and space. |
142 void ScanJson(); | 159 void ScanJson(); |
143 | 160 |
144 // A JSON number (production JSONNumber) is a subset of the valid JavaScript | 161 // A JSON number (production JSONNumber) is a subset of the valid JavaScript |
145 // decimal number literals. | 162 // decimal number literals. |
146 // It includes an optional minus sign, must have at least one | 163 // It includes an optional minus sign, must have at least one |
147 // digit before and after a decimal point, may not have prefixed zeros (unless | 164 // digit before and after a decimal point, may not have prefixed zeros (unless |
148 // the integer part is zero), and may include an exponent part (e.g., "e-10"). | 165 // the integer part is zero), and may include an exponent part (e.g., "e-10"). |
149 // Hexadecimal and octal numbers are not allowed. | 166 // Hexadecimal and octal numbers are not allowed. |
150 Token::Value ScanJsonNumber(); | 167 Token::Value ScanJsonNumber(); |
151 | 168 |
152 // A JSON string (production JSONString) is subset of valid JavaScript string | 169 // A JSON string (production JSONString) is subset of valid JavaScript string |
153 // literals. The string must only be double-quoted (not single-quoted), and | 170 // literals. The string must only be double-quoted (not single-quoted), and |
154 // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and | 171 // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and |
155 // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid. | 172 // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid. |
156 Token::Value ScanJsonString(); | 173 Token::Value ScanJsonString(); |
157 | 174 |
158 // Used to recognizes one of the literals "true", "false", or "null". These | 175 // Used to recognizes one of the literals "true", "false", or "null". These |
159 // are the only valid JSON identifiers (productions JSONBooleanLiteral, | 176 // are the only valid JSON identifiers (productions JSONBooleanLiteral, |
160 // JSONNullLiteral). | 177 // JSONNullLiteral). |
161 Token::Value ScanJsonIdentifier(const char* text, Token::Value token); | 178 Token::Value ScanJsonIdentifier(const char* text, Token::Value token); |
162 | |
163 StreamInitializer stream_initializer_; | |
164 }; | 179 }; |
165 | 180 |
166 | |
167 // ExternalStringUTF16Buffer | |
168 template <typename StringType, typename CharType> | |
169 ExternalStringUTF16Buffer<StringType, CharType>::ExternalStringUTF16Buffer() | |
170 : raw_data_(NULL) { } | |
171 | |
172 | |
173 template <typename StringType, typename CharType> | |
174 void ExternalStringUTF16Buffer<StringType, CharType>::Initialize( | |
175 Handle<StringType> data, | |
176 int start_position, | |
177 int end_position) { | |
178 ASSERT(!data.is_null()); | |
179 raw_data_ = data->resource()->data(); | |
180 | |
181 ASSERT(end_position <= data->length()); | |
182 if (start_position > 0) { | |
183 SeekForward(start_position); | |
184 } | |
185 end_ = | |
186 end_position != kNoEndPosition ? end_position : data->length(); | |
187 } | |
188 | |
189 | |
190 template <typename StringType, typename CharType> | |
191 uc32 ExternalStringUTF16Buffer<StringType, CharType>::Advance() { | |
192 if (pos_ < end_) { | |
193 return raw_data_[pos_++]; | |
194 } else { | |
195 // note: currently the following increment is necessary to avoid a | |
196 // test-parser problem! | |
197 pos_++; | |
198 return static_cast<uc32>(-1); | |
199 } | |
200 } | |
201 | |
202 | |
203 template <typename StringType, typename CharType> | |
204 void ExternalStringUTF16Buffer<StringType, CharType>::PushBack(uc32 ch) { | |
205 pos_--; | |
206 ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize); | |
207 ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch); | |
208 } | |
209 | |
210 | |
211 template <typename StringType, typename CharType> | |
212 void ExternalStringUTF16Buffer<StringType, CharType>::SeekForward(int pos) { | |
213 pos_ = pos; | |
214 } | |
215 | |
216 } } // namespace v8::internal | 181 } } // namespace v8::internal |
217 | 182 |
218 #endif // V8_SCANNER_H_ | 183 #endif // V8_SCANNER_H_ |
OLD | NEW |