| OLD | NEW |
| 1 // Copyright 2010 the V8 project authors. All rights reserved. | 1 // Copyright 2010 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 24 matching lines...) Expand all Loading... |
| 35 namespace v8 { | 35 namespace v8 { |
| 36 namespace internal { | 36 namespace internal { |
| 37 | 37 |
| 38 | 38 |
| 39 class UTF8Buffer { | 39 class UTF8Buffer { |
| 40 public: | 40 public: |
| 41 UTF8Buffer(); | 41 UTF8Buffer(); |
| 42 ~UTF8Buffer(); | 42 ~UTF8Buffer(); |
| 43 | 43 |
| 44 inline void AddChar(uc32 c) { | 44 inline void AddChar(uc32 c) { |
| 45 if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) { | 45 if (recording_) { |
| 46 buffer_.Add(static_cast<char>(c)); | 46 if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) { |
| 47 } else { | 47 buffer_.Add(static_cast<char>(c)); |
| 48 AddCharSlow(c); | 48 } else { |
| 49 AddCharSlow(c); |
| 50 } |
| 49 } | 51 } |
| 50 } | 52 } |
| 51 | 53 |
| 52 void StartLiteral() { | 54 void StartLiteral() { |
| 53 buffer_.StartSequence(); | 55 buffer_.StartSequence(); |
| 56 recording_ = true; |
| 54 } | 57 } |
| 55 | 58 |
| 56 Vector<const char> EndLiteral() { | 59 Vector<const char> EndLiteral() { |
| 57 buffer_.Add(kEndMarker); | 60 if (recording_) { |
| 58 Vector<char> sequence = buffer_.EndSequence(); | 61 recording_ = false; |
| 59 return Vector<const char>(sequence.start(), sequence.length()); | 62 buffer_.Add(kEndMarker); |
| 63 Vector<char> sequence = buffer_.EndSequence(); |
| 64 return Vector<const char>(sequence.start(), sequence.length()); |
| 65 } |
| 66 return Vector<const char>(); |
| 60 } | 67 } |
| 61 | 68 |
| 62 void DropLiteral() { | 69 void DropLiteral() { |
| 63 buffer_.DropSequence(); | 70 if (recording_) { |
| 71 recording_ = false; |
| 72 buffer_.DropSequence(); |
| 73 } |
| 64 } | 74 } |
| 65 | 75 |
| 66 void Reset() { | 76 void Reset() { |
| 67 buffer_.Reset(); | 77 buffer_.Reset(); |
| 68 } | 78 } |
| 69 | 79 |
| 70 // The end marker added after a parsed literal. | 80 // The end marker added after a parsed literal. |
| 71 // Using zero allows the usage of strlen and similar functions on | 81 // Using zero allows the usage of strlen and similar functions on |
| 72 // identifiers and numbers (but not strings, since they may contain zero | 82 // identifiers and numbers (but not strings, since they may contain zero |
| 73 // bytes). | 83 // bytes). |
| 74 // TODO(lrn): Use '\xff' as end marker, since it cannot occur inside | 84 // TODO(lrn): Use '\xff' as end marker, since it cannot occur inside |
| 75 // an utf-8 string. This requires changes in all places that uses | 85 // an utf-8 string. This requires changes in all places that uses |
| 76 // str-functions on the literals, but allows a single pointer to represent | 86 // str-functions on the literals, but allows a single pointer to represent |
| 77 // the literal, even if it contains embedded zeros. | 87 // the literal, even if it contains embedded zeros. |
| 78 static const char kEndMarker = '\x00'; | 88 static const char kEndMarker = '\x00'; |
| 79 private: | 89 private: |
| 80 static const int kInitialCapacity = 256; | 90 static const int kInitialCapacity = 256; |
| 81 SequenceCollector<char, 4> buffer_; | 91 SequenceCollector<char, 4> buffer_; |
| 82 | 92 bool recording_; |
| 83 void AddCharSlow(uc32 c); | 93 void AddCharSlow(uc32 c); |
| 84 }; | 94 }; |
| 85 | 95 |
| 86 | 96 |
| 87 // Interface through which the scanner reads characters from the input source. | |
| 88 class UTF16Buffer { | |
| 89 public: | |
| 90 UTF16Buffer(); | |
| 91 virtual ~UTF16Buffer() {} | |
| 92 | |
| 93 virtual void PushBack(uc32 ch) = 0; | |
| 94 // Returns a value < 0 when the buffer end is reached. | |
| 95 virtual uc32 Advance() = 0; | |
| 96 virtual void SeekForward(int pos) = 0; | |
| 97 | |
| 98 int pos() const { return pos_; } | |
| 99 | |
| 100 protected: | |
| 101 int pos_; // Current position in the buffer. | |
| 102 int end_; // Position where scanning should stop (EOF). | |
| 103 }; | |
| 104 | |
| 105 | |
| 106 // UTF16 buffer to read characters from a character stream. | 97 // UTF16 buffer to read characters from a character stream. |
| 107 class CharacterStreamUTF16Buffer: public UTF16Buffer { | 98 class CharacterStreamUTF16Buffer: public UTF16Buffer { |
| 108 public: | 99 public: |
| 109 CharacterStreamUTF16Buffer(); | 100 CharacterStreamUTF16Buffer(); |
| 110 virtual ~CharacterStreamUTF16Buffer() {} | 101 virtual ~CharacterStreamUTF16Buffer() {} |
| 111 void Initialize(Handle<String> data, | 102 void Initialize(Handle<String> data, |
| 112 unibrow::CharacterStream* stream, | 103 unibrow::CharacterStream* stream, |
| 113 int start_position, | 104 int start_position, |
| 114 int end_position); | 105 int end_position); |
| 115 virtual void PushBack(uc32 ch); | 106 virtual void PushBack(uc32 ch); |
| (...skipping 266 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 382 // Buffer to hold literal values (identifiers, strings, numbers) | 373 // Buffer to hold literal values (identifiers, strings, numbers) |
| 383 // using '\x00'-terminated UTF-8 encoding. Handles allocation internally. | 374 // using '\x00'-terminated UTF-8 encoding. Handles allocation internally. |
| 384 UTF8Buffer literal_buffer_; | 375 UTF8Buffer literal_buffer_; |
| 385 | 376 |
| 386 bool stack_overflow_; | 377 bool stack_overflow_; |
| 387 | 378 |
| 388 // One Unicode character look-ahead; c0_ < 0 at the end of the input. | 379 // One Unicode character look-ahead; c0_ < 0 at the end of the input. |
| 389 uc32 c0_; | 380 uc32 c0_; |
| 390 }; | 381 }; |
| 391 | 382 |
| 383 |
| 384 // ExternalStringUTF16Buffer |
| 385 template <typename StringType, typename CharType> |
| 386 ExternalStringUTF16Buffer<StringType, CharType>::ExternalStringUTF16Buffer() |
| 387 : raw_data_(NULL) { } |
| 388 |
| 389 |
| 390 template <typename StringType, typename CharType> |
| 391 void ExternalStringUTF16Buffer<StringType, CharType>::Initialize( |
| 392 Handle<StringType> data, |
| 393 int start_position, |
| 394 int end_position) { |
| 395 ASSERT(!data.is_null()); |
| 396 raw_data_ = data->resource()->data(); |
| 397 |
| 398 ASSERT(end_position <= data->length()); |
| 399 if (start_position > 0) { |
| 400 SeekForward(start_position); |
| 401 } |
| 402 end_ = |
| 403 end_position != Scanner::kNoEndPosition ? end_position : data->length(); |
| 404 } |
| 405 |
| 406 |
| 407 template <typename StringType, typename CharType> |
| 408 uc32 ExternalStringUTF16Buffer<StringType, CharType>::Advance() { |
| 409 if (pos_ < end_) { |
| 410 return raw_data_[pos_++]; |
| 411 } else { |
| 412 // note: currently the following increment is necessary to avoid a |
| 413 // test-parser problem! |
| 414 pos_++; |
| 415 return static_cast<uc32>(-1); |
| 416 } |
| 417 } |
| 418 |
| 419 |
| 420 template <typename StringType, typename CharType> |
| 421 void ExternalStringUTF16Buffer<StringType, CharType>::PushBack(uc32 ch) { |
| 422 pos_--; |
| 423 ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize); |
| 424 ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch); |
| 425 } |
| 426 |
| 427 |
| 428 template <typename StringType, typename CharType> |
| 429 void ExternalStringUTF16Buffer<StringType, CharType>::SeekForward(int pos) { |
| 430 pos_ = pos; |
| 431 } |
| 432 |
| 392 } } // namespace v8::internal | 433 } } // namespace v8::internal |
| 393 | 434 |
| 394 #endif // V8_SCANNER_H_ | 435 #endif // V8_SCANNER_H_ |
| OLD | NEW |