| OLD | NEW |
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 77 } | 77 } |
| 78 | 78 |
| 79 static char* ComputeLimit(char* data, int capacity) { | 79 static char* ComputeLimit(char* data, int capacity) { |
| 80 return (data + capacity) - unibrow::Utf8::kMaxEncodedSize; | 80 return (data + capacity) - unibrow::Utf8::kMaxEncodedSize; |
| 81 } | 81 } |
| 82 | 82 |
| 83 void AddCharSlow(uc32 c); | 83 void AddCharSlow(uc32 c); |
| 84 }; | 84 }; |
| 85 | 85 |
| 86 | 86 |
| 87 // Interface through which the scanner reads characters from the input source. |
| 87 class UTF16Buffer { | 88 class UTF16Buffer { |
| 88 public: | 89 public: |
| 89 UTF16Buffer(); | 90 UTF16Buffer(); |
| 90 virtual ~UTF16Buffer() {} | 91 virtual ~UTF16Buffer() {} |
| 91 | 92 |
| 92 virtual void PushBack(uc32 ch) = 0; | 93 virtual void PushBack(uc32 ch) = 0; |
| 93 // returns a value < 0 when the buffer end is reached | 94 // Returns a value < 0 when the buffer end is reached. |
| 94 virtual uc32 Advance() = 0; | 95 virtual uc32 Advance() = 0; |
| 95 virtual void SeekForward(int pos) = 0; | 96 virtual void SeekForward(int pos) = 0; |
| 96 | 97 |
| 97 int pos() const { return pos_; } | 98 int pos() const { return pos_; } |
| 98 int size() const { return size_; } | |
| 99 Handle<String> SubString(int start, int end); | |
| 100 | 99 |
| 101 protected: | 100 protected: |
| 102 Handle<String> data_; | 101 int pos_; // Current position in the buffer. |
| 103 int pos_; | 102 int end_; // Position where scanning should stop (EOF). |
| 104 int size_; | |
| 105 }; | 103 }; |
| 106 | 104 |
| 107 | 105 |
| 106 // UTF16 buffer to read characters from a character stream. |
| 108 class CharacterStreamUTF16Buffer: public UTF16Buffer { | 107 class CharacterStreamUTF16Buffer: public UTF16Buffer { |
| 109 public: | 108 public: |
| 110 CharacterStreamUTF16Buffer(); | 109 CharacterStreamUTF16Buffer(); |
| 111 virtual ~CharacterStreamUTF16Buffer() {} | 110 virtual ~CharacterStreamUTF16Buffer() {} |
| 112 void Initialize(Handle<String> data, unibrow::CharacterStream* stream); | 111 void Initialize(Handle<String> data, |
| 112 unibrow::CharacterStream* stream, |
| 113 int start_position, |
| 114 int end_position); |
| 113 virtual void PushBack(uc32 ch); | 115 virtual void PushBack(uc32 ch); |
| 114 virtual uc32 Advance(); | 116 virtual uc32 Advance(); |
| 115 virtual void SeekForward(int pos); | 117 virtual void SeekForward(int pos); |
| 116 | 118 |
| 117 private: | 119 private: |
| 118 List<uc32> pushback_buffer_; | 120 List<uc32> pushback_buffer_; |
| 119 uc32 last_; | 121 uc32 last_; |
| 120 unibrow::CharacterStream* stream_; | 122 unibrow::CharacterStream* stream_; |
| 121 | 123 |
| 122 List<uc32>* pushback_buffer() { return &pushback_buffer_; } | 124 List<uc32>* pushback_buffer() { return &pushback_buffer_; } |
| 123 }; | 125 }; |
| 124 | 126 |
| 125 | 127 |
| 126 class TwoByteStringUTF16Buffer: public UTF16Buffer { | 128 // UTF16 buffer to read characters from an external string. |
| 129 template <typename StringType, typename CharType> |
| 130 class ExternalStringUTF16Buffer: public UTF16Buffer { |
| 127 public: | 131 public: |
| 128 TwoByteStringUTF16Buffer(); | 132 ExternalStringUTF16Buffer(); |
| 129 virtual ~TwoByteStringUTF16Buffer() {} | 133 virtual ~ExternalStringUTF16Buffer() {} |
| 130 void Initialize(Handle<ExternalTwoByteString> data); | 134 void Initialize(Handle<StringType> data, |
| 135 int start_position, |
| 136 int end_position); |
| 131 virtual void PushBack(uc32 ch); | 137 virtual void PushBack(uc32 ch); |
| 132 virtual uc32 Advance(); | 138 virtual uc32 Advance(); |
| 133 virtual void SeekForward(int pos); | 139 virtual void SeekForward(int pos); |
| 134 | 140 |
| 135 private: | 141 private: |
| 136 const uint16_t* raw_data_; | 142 const CharType* raw_data_; // Pointer to the actual array of characters. |
| 137 }; | 143 }; |
| 138 | 144 |
| 139 | 145 |
| 140 class KeywordMatcher { | 146 class KeywordMatcher { |
| 141 // Incrementally recognize keywords. | 147 // Incrementally recognize keywords. |
| 142 // | 148 // |
| 143 // Recognized keywords: | 149 // Recognized keywords: |
| 144 // break case catch const* continue debugger* default delete do else | 150 // break case catch const* continue debugger* default delete do else |
| 145 // finally false for function if in instanceof native* new null | 151 // finally false for function if in instanceof native* new null |
| 146 // return switch this throw true try typeof var void while with | 152 // return switch this throw true try typeof var void while with |
| (...skipping 109 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 256 enum ParserLanguage { JAVASCRIPT, JSON }; | 262 enum ParserLanguage { JAVASCRIPT, JSON }; |
| 257 | 263 |
| 258 | 264 |
| 259 class Scanner { | 265 class Scanner { |
| 260 public: | 266 public: |
| 261 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; | 267 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; |
| 262 | 268 |
| 263 // Construction | 269 // Construction |
| 264 explicit Scanner(ParserMode parse_mode); | 270 explicit Scanner(ParserMode parse_mode); |
| 265 | 271 |
| 266 // Initialize the Scanner to scan source: | 272 // Initialize the Scanner to scan source. |
| 267 void Init(Handle<String> source, | 273 void Initialize(Handle<String> source, |
| 268 unibrow::CharacterStream* stream, | 274 ParserLanguage language); |
| 269 int position, | 275 void Initialize(Handle<String> source, |
| 270 ParserLanguage language); | 276 unibrow::CharacterStream* stream, |
| 277 ParserLanguage language); |
| 278 void Initialize(Handle<String> source, |
| 279 int start_position, int end_position, |
| 280 ParserLanguage language); |
| 271 | 281 |
| 272 // Returns the next token. | 282 // Returns the next token. |
| 273 Token::Value Next(); | 283 Token::Value Next(); |
| 274 | 284 |
| 275 // One token look-ahead (past the token returned by Next()). | 285 // One token look-ahead (past the token returned by Next()). |
| 276 Token::Value peek() const { return next_.token; } | 286 Token::Value peek() const { return next_.token; } |
| 277 | 287 |
| 278 // Returns true if there was a line terminator before the peek'ed token. | 288 // Returns true if there was a line terminator before the peek'ed token. |
| 279 bool has_line_terminator_before_next() const { | 289 bool has_line_terminator_before_next() const { |
| 280 return has_line_terminator_before_next_; | 290 return has_line_terminator_before_next_; |
| (...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 328 // Returns true if regexp flags are scanned (always since flags can | 338 // Returns true if regexp flags are scanned (always since flags can |
| 329 // be empty). | 339 // be empty). |
| 330 bool ScanRegExpFlags(); | 340 bool ScanRegExpFlags(); |
| 331 | 341 |
| 332 // Seek forward to the given position. This operation does not | 342 // Seek forward to the given position. This operation does not |
| 333 // work in general, for instance when there are pushed back | 343 // work in general, for instance when there are pushed back |
| 334 // characters, but works for seeking forward until simple delimiter | 344 // characters, but works for seeking forward until simple delimiter |
| 335 // tokens, which is what it is used for. | 345 // tokens, which is what it is used for. |
| 336 void SeekForward(int pos); | 346 void SeekForward(int pos); |
| 337 | 347 |
| 338 Handle<String> SubString(int start_pos, int end_pos); | |
| 339 bool stack_overflow() { return stack_overflow_; } | 348 bool stack_overflow() { return stack_overflow_; } |
| 340 | 349 |
| 341 static StaticResource<Utf8Decoder>* utf8_decoder() { return &utf8_decoder_; } | 350 static StaticResource<Utf8Decoder>* utf8_decoder() { return &utf8_decoder_; } |
| 342 | 351 |
| 343 // Tells whether the buffer contains an identifier (no escapes). | 352 // Tells whether the buffer contains an identifier (no escapes). |
| 344 // Used for checking if a property name is an identifier. | 353 // Used for checking if a property name is an identifier. |
| 345 static bool IsIdentifier(unibrow::CharacterStream* buffer); | 354 static bool IsIdentifier(unibrow::CharacterStream* buffer); |
| 346 | 355 |
| 347 static unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart; | 356 static unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart; |
| 348 static unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart; | 357 static unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart; |
| 349 static unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator; | 358 static unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator; |
| 350 static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace; | 359 static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace; |
| 351 | 360 |
| 352 static const int kCharacterLookaheadBufferSize = 1; | 361 static const int kCharacterLookaheadBufferSize = 1; |
| 362 static const int kNoEndPosition = 1; |
| 353 | 363 |
| 354 private: | 364 private: |
| 365 void Init(Handle<String> source, |
| 366 unibrow::CharacterStream* stream, |
| 367 int start_position, int end_position, |
| 368 ParserLanguage language); |
| 369 |
| 370 |
| 371 // Different UTF16 buffers used to pull characters from. Based on input one of |
| 372 // these will be initialized as the actual data source. |
| 355 CharacterStreamUTF16Buffer char_stream_buffer_; | 373 CharacterStreamUTF16Buffer char_stream_buffer_; |
| 356 TwoByteStringUTF16Buffer two_byte_string_buffer_; | 374 ExternalStringUTF16Buffer<ExternalTwoByteString, uint16_t> |
| 375 two_byte_string_buffer_; |
| 376 ExternalStringUTF16Buffer<ExternalAsciiString, char> ascii_string_buffer_; |
| 357 | 377 |
| 358 // Source. | 378 // Source. Will point to one of the buffers declared above. |
| 359 UTF16Buffer* source_; | 379 UTF16Buffer* source_; |
| 360 int position_; | 380 |
| 381 // Used to convert the source string into a character stream when a stream |
| 382 // is not passed to the scanner. |
| 383 SafeStringInputBuffer safe_string_input_buffer_; |
| 361 | 384 |
| 362 // Buffer to hold literal values (identifiers, strings, numbers) | 385 // Buffer to hold literal values (identifiers, strings, numbers) |
| 363 // using 0-terminated UTF-8 encoding. | 386 // using 0-terminated UTF-8 encoding. |
| 364 UTF8Buffer literal_buffer_1_; | 387 UTF8Buffer literal_buffer_1_; |
| 365 UTF8Buffer literal_buffer_2_; | 388 UTF8Buffer literal_buffer_2_; |
| 366 | 389 |
| 367 bool stack_overflow_; | 390 bool stack_overflow_; |
| 368 static StaticResource<Utf8Decoder> utf8_decoder_; | 391 static StaticResource<Utf8Decoder> utf8_decoder_; |
| 369 | 392 |
| 370 // One Unicode character look-ahead; c0_ < 0 at the end of the input. | 393 // One Unicode character look-ahead; c0_ < 0 at the end of the input. |
| (...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 453 uc32 ScanHexEscape(uc32 c, int length); | 476 uc32 ScanHexEscape(uc32 c, int length); |
| 454 uc32 ScanOctalEscape(uc32 c, int length); | 477 uc32 ScanOctalEscape(uc32 c, int length); |
| 455 void ScanEscape(); | 478 void ScanEscape(); |
| 456 Token::Value ScanString(); | 479 Token::Value ScanString(); |
| 457 | 480 |
| 458 // Scans a possible HTML comment -- begins with '<!'. | 481 // Scans a possible HTML comment -- begins with '<!'. |
| 459 Token::Value ScanHtmlComment(); | 482 Token::Value ScanHtmlComment(); |
| 460 | 483 |
| 461 // Return the current source position. | 484 // Return the current source position. |
| 462 int source_pos() { | 485 int source_pos() { |
| 463 return source_->pos() - kCharacterLookaheadBufferSize + position_; | 486 return source_->pos() - kCharacterLookaheadBufferSize; |
| 464 } | 487 } |
| 465 | 488 |
| 466 // Decodes a unicode escape-sequence which is part of an identifier. | 489 // Decodes a unicode escape-sequence which is part of an identifier. |
| 467 // If the escape sequence cannot be decoded the result is kBadRune. | 490 // If the escape sequence cannot be decoded the result is kBadRune. |
| 468 uc32 ScanIdentifierUnicodeEscape(); | 491 uc32 ScanIdentifierUnicodeEscape(); |
| 469 }; | 492 }; |
| 470 | 493 |
| 471 } } // namespace v8::internal | 494 } } // namespace v8::internal |
| 472 | 495 |
| 473 #endif // V8_SCANNER_H_ | 496 #endif // V8_SCANNER_H_ |
| OLD | NEW |