OLD | NEW |
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
77 } | 77 } |
78 | 78 |
79 static char* ComputeLimit(char* data, int capacity) { | 79 static char* ComputeLimit(char* data, int capacity) { |
80 return (data + capacity) - unibrow::Utf8::kMaxEncodedSize; | 80 return (data + capacity) - unibrow::Utf8::kMaxEncodedSize; |
81 } | 81 } |
82 | 82 |
83 void AddCharSlow(uc32 c); | 83 void AddCharSlow(uc32 c); |
84 }; | 84 }; |
85 | 85 |
86 | 86 |
| 87 // Interface through which the scanner reads characters from the input source. |
87 class UTF16Buffer { | 88 class UTF16Buffer { |
88 public: | 89 public: |
89 UTF16Buffer(); | 90 UTF16Buffer(); |
90 virtual ~UTF16Buffer() {} | 91 virtual ~UTF16Buffer() {} |
91 | 92 |
92 virtual void PushBack(uc32 ch) = 0; | 93 virtual void PushBack(uc32 ch) = 0; |
93 // returns a value < 0 when the buffer end is reached | 94 // Returns a value < 0 when the buffer end is reached. |
94 virtual uc32 Advance() = 0; | 95 virtual uc32 Advance() = 0; |
95 virtual void SeekForward(int pos) = 0; | 96 virtual void SeekForward(int pos) = 0; |
96 | 97 |
97 int pos() const { return pos_; } | 98 int pos() const { return pos_; } |
98 int size() const { return size_; } | |
99 Handle<String> SubString(int start, int end); | |
100 | 99 |
101 protected: | 100 protected: |
102 Handle<String> data_; | 101 int pos_; // Current position in the buffer. |
103 int pos_; | 102 int end_; // Position where scanning should stop (EOF). |
104 int size_; | |
105 }; | 103 }; |
106 | 104 |
107 | 105 |
| 106 // UTF16 buffer to read characters from a character stream. |
108 class CharacterStreamUTF16Buffer: public UTF16Buffer { | 107 class CharacterStreamUTF16Buffer: public UTF16Buffer { |
109 public: | 108 public: |
110 CharacterStreamUTF16Buffer(); | 109 CharacterStreamUTF16Buffer(); |
111 virtual ~CharacterStreamUTF16Buffer() {} | 110 virtual ~CharacterStreamUTF16Buffer() {} |
112 void Initialize(Handle<String> data, unibrow::CharacterStream* stream); | 111 void Initialize(Handle<String> data, |
| 112 unibrow::CharacterStream* stream, |
| 113 int start_position, |
| 114 int end_position); |
113 virtual void PushBack(uc32 ch); | 115 virtual void PushBack(uc32 ch); |
114 virtual uc32 Advance(); | 116 virtual uc32 Advance(); |
115 virtual void SeekForward(int pos); | 117 virtual void SeekForward(int pos); |
116 | 118 |
117 private: | 119 private: |
118 List<uc32> pushback_buffer_; | 120 List<uc32> pushback_buffer_; |
119 uc32 last_; | 121 uc32 last_; |
120 unibrow::CharacterStream* stream_; | 122 unibrow::CharacterStream* stream_; |
121 | 123 |
122 List<uc32>* pushback_buffer() { return &pushback_buffer_; } | 124 List<uc32>* pushback_buffer() { return &pushback_buffer_; } |
123 }; | 125 }; |
124 | 126 |
125 | 127 |
126 class TwoByteStringUTF16Buffer: public UTF16Buffer { | 128 // UTF16 buffer to read characters from an external string. |
| 129 template <typename StringType, typename CharType> |
| 130 class ExternalStringUTF16Buffer: public UTF16Buffer { |
127 public: | 131 public: |
128 TwoByteStringUTF16Buffer(); | 132 ExternalStringUTF16Buffer(); |
129 virtual ~TwoByteStringUTF16Buffer() {} | 133 virtual ~ExternalStringUTF16Buffer() {} |
130 void Initialize(Handle<ExternalTwoByteString> data); | 134 void Initialize(Handle<StringType> data, |
| 135 int start_position, |
| 136 int end_position); |
131 virtual void PushBack(uc32 ch); | 137 virtual void PushBack(uc32 ch); |
132 virtual uc32 Advance(); | 138 virtual uc32 Advance(); |
133 virtual void SeekForward(int pos); | 139 virtual void SeekForward(int pos); |
134 | 140 |
135 private: | 141 private: |
136 const uint16_t* raw_data_; | 142 const CharType* raw_data_; // Pointer to the actual array of characters. |
137 }; | 143 }; |
138 | 144 |
139 | 145 |
140 class KeywordMatcher { | 146 class KeywordMatcher { |
141 // Incrementally recognize keywords. | 147 // Incrementally recognize keywords. |
142 // | 148 // |
143 // Recognized keywords: | 149 // Recognized keywords: |
144 // break case catch const* continue debugger* default delete do else | 150 // break case catch const* continue debugger* default delete do else |
145 // finally false for function if in instanceof native* new null | 151 // finally false for function if in instanceof native* new null |
146 // return switch this throw true try typeof var void while with | 152 // return switch this throw true try typeof var void while with |
(...skipping 109 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
256 enum ParserLanguage { JAVASCRIPT, JSON }; | 262 enum ParserLanguage { JAVASCRIPT, JSON }; |
257 | 263 |
258 | 264 |
259 class Scanner { | 265 class Scanner { |
260 public: | 266 public: |
261 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; | 267 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; |
262 | 268 |
263 // Construction | 269 // Construction |
264 explicit Scanner(ParserMode parse_mode); | 270 explicit Scanner(ParserMode parse_mode); |
265 | 271 |
266 // Initialize the Scanner to scan source: | 272 // Initialize the Scanner to scan source. |
267 void Init(Handle<String> source, | 273 void Initialize(Handle<String> source, |
268 unibrow::CharacterStream* stream, | 274 ParserLanguage language); |
269 int position, | 275 void Initialize(Handle<String> source, |
270 ParserLanguage language); | 276 unibrow::CharacterStream* stream, |
| 277 ParserLanguage language); |
| 278 void Initialize(Handle<String> source, |
| 279 int start_position, int end_position, |
| 280 ParserLanguage language); |
271 | 281 |
272 // Returns the next token. | 282 // Returns the next token. |
273 Token::Value Next(); | 283 Token::Value Next(); |
274 | 284 |
275 // One token look-ahead (past the token returned by Next()). | 285 // One token look-ahead (past the token returned by Next()). |
276 Token::Value peek() const { return next_.token; } | 286 Token::Value peek() const { return next_.token; } |
277 | 287 |
278 // Returns true if there was a line terminator before the peek'ed token. | 288 // Returns true if there was a line terminator before the peek'ed token. |
279 bool has_line_terminator_before_next() const { | 289 bool has_line_terminator_before_next() const { |
280 return has_line_terminator_before_next_; | 290 return has_line_terminator_before_next_; |
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
328 // Returns true if regexp flags are scanned (always since flags can | 338 // Returns true if regexp flags are scanned (always since flags can |
329 // be empty). | 339 // be empty). |
330 bool ScanRegExpFlags(); | 340 bool ScanRegExpFlags(); |
331 | 341 |
332 // Seek forward to the given position. This operation does not | 342 // Seek forward to the given position. This operation does not |
333 // work in general, for instance when there are pushed back | 343 // work in general, for instance when there are pushed back |
334 // characters, but works for seeking forward until simple delimiter | 344 // characters, but works for seeking forward until simple delimiter |
335 // tokens, which is what it is used for. | 345 // tokens, which is what it is used for. |
336 void SeekForward(int pos); | 346 void SeekForward(int pos); |
337 | 347 |
338 Handle<String> SubString(int start_pos, int end_pos); | |
339 bool stack_overflow() { return stack_overflow_; } | 348 bool stack_overflow() { return stack_overflow_; } |
340 | 349 |
341 static StaticResource<Utf8Decoder>* utf8_decoder() { return &utf8_decoder_; } | 350 static StaticResource<Utf8Decoder>* utf8_decoder() { return &utf8_decoder_; } |
342 | 351 |
343 // Tells whether the buffer contains an identifier (no escapes). | 352 // Tells whether the buffer contains an identifier (no escapes). |
344 // Used for checking if a property name is an identifier. | 353 // Used for checking if a property name is an identifier. |
345 static bool IsIdentifier(unibrow::CharacterStream* buffer); | 354 static bool IsIdentifier(unibrow::CharacterStream* buffer); |
346 | 355 |
347 static unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart; | 356 static unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart; |
348 static unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart; | 357 static unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart; |
349 static unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator; | 358 static unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator; |
350 static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace; | 359 static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace; |
351 | 360 |
352 static const int kCharacterLookaheadBufferSize = 1; | 361 static const int kCharacterLookaheadBufferSize = 1; |
| 362 static const int kNoEndPosition = 1; |
353 | 363 |
354 private: | 364 private: |
| 365 void Init(Handle<String> source, |
| 366 unibrow::CharacterStream* stream, |
| 367 int start_position, int end_position, |
| 368 ParserLanguage language); |
| 369 |
| 370 |
| 371 // Different UTF16 buffers used to pull characters from. Based on input one of |
| 372 // these will be initialized as the actual data source. |
355 CharacterStreamUTF16Buffer char_stream_buffer_; | 373 CharacterStreamUTF16Buffer char_stream_buffer_; |
356 TwoByteStringUTF16Buffer two_byte_string_buffer_; | 374 ExternalStringUTF16Buffer<ExternalTwoByteString, uint16_t> |
| 375 two_byte_string_buffer_; |
| 376 ExternalStringUTF16Buffer<ExternalAsciiString, char> ascii_string_buffer_; |
357 | 377 |
358 // Source. | 378 // Source. Will point to one of the buffers declared above. |
359 UTF16Buffer* source_; | 379 UTF16Buffer* source_; |
360 int position_; | 380 |
| 381 // Used to convert the source string into a character stream when a stream |
| 382 // is not passed to the scanner. |
| 383 SafeStringInputBuffer safe_string_input_buffer_; |
361 | 384 |
362 // Buffer to hold literal values (identifiers, strings, numbers) | 385 // Buffer to hold literal values (identifiers, strings, numbers) |
363 // using 0-terminated UTF-8 encoding. | 386 // using 0-terminated UTF-8 encoding. |
364 UTF8Buffer literal_buffer_1_; | 387 UTF8Buffer literal_buffer_1_; |
365 UTF8Buffer literal_buffer_2_; | 388 UTF8Buffer literal_buffer_2_; |
366 | 389 |
367 bool stack_overflow_; | 390 bool stack_overflow_; |
368 static StaticResource<Utf8Decoder> utf8_decoder_; | 391 static StaticResource<Utf8Decoder> utf8_decoder_; |
369 | 392 |
370 // One Unicode character look-ahead; c0_ < 0 at the end of the input. | 393 // One Unicode character look-ahead; c0_ < 0 at the end of the input. |
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
453 uc32 ScanHexEscape(uc32 c, int length); | 476 uc32 ScanHexEscape(uc32 c, int length); |
454 uc32 ScanOctalEscape(uc32 c, int length); | 477 uc32 ScanOctalEscape(uc32 c, int length); |
455 void ScanEscape(); | 478 void ScanEscape(); |
456 Token::Value ScanString(); | 479 Token::Value ScanString(); |
457 | 480 |
458 // Scans a possible HTML comment -- begins with '<!'. | 481 // Scans a possible HTML comment -- begins with '<!'. |
459 Token::Value ScanHtmlComment(); | 482 Token::Value ScanHtmlComment(); |
460 | 483 |
461 // Return the current source position. | 484 // Return the current source position. |
462 int source_pos() { | 485 int source_pos() { |
463 return source_->pos() - kCharacterLookaheadBufferSize + position_; | 486 return source_->pos() - kCharacterLookaheadBufferSize; |
464 } | 487 } |
465 | 488 |
466 // Decodes a unicode escape-sequence which is part of an identifier. | 489 // Decodes a unicode escape-sequence which is part of an identifier. |
467 // If the escape sequence cannot be decoded the result is kBadRune. | 490 // If the escape sequence cannot be decoded the result is kBadRune. |
468 uc32 ScanIdentifierUnicodeEscape(); | 491 uc32 ScanIdentifierUnicodeEscape(); |
469 }; | 492 }; |
470 | 493 |
471 } } // namespace v8::internal | 494 } } // namespace v8::internal |
472 | 495 |
473 #endif // V8_SCANNER_H_ | 496 #endif // V8_SCANNER_H_ |
OLD | NEW |