OLD | NEW |
1 // Copyright 2010 the V8 project authors. All rights reserved. | 1 // Copyright 2010 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 24 matching lines...) Expand all Loading... |
35 namespace v8 { | 35 namespace v8 { |
36 namespace internal { | 36 namespace internal { |
37 | 37 |
38 | 38 |
39 class UTF8Buffer { | 39 class UTF8Buffer { |
40 public: | 40 public: |
41 UTF8Buffer(); | 41 UTF8Buffer(); |
42 ~UTF8Buffer(); | 42 ~UTF8Buffer(); |
43 | 43 |
44 inline void AddChar(uc32 c) { | 44 inline void AddChar(uc32 c) { |
45 if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) { | 45 if (recording_) { |
46 buffer_.Add(static_cast<char>(c)); | 46 if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) { |
47 } else { | 47 buffer_.Add(static_cast<char>(c)); |
48 AddCharSlow(c); | 48 } else { |
| 49 AddCharSlow(c); |
| 50 } |
49 } | 51 } |
50 } | 52 } |
51 | 53 |
52 void StartLiteral() { | 54 void StartLiteral() { |
53 buffer_.StartSequence(); | 55 buffer_.StartSequence(); |
| 56 recording_ = true; |
54 } | 57 } |
55 | 58 |
56 Vector<const char> EndLiteral() { | 59 Vector<const char> EndLiteral() { |
57 buffer_.Add(kEndMarker); | 60 if (recording_) { |
58 Vector<char> sequence = buffer_.EndSequence(); | 61 recording_ = false; |
59 return Vector<const char>(sequence.start(), sequence.length()); | 62 buffer_.Add(kEndMarker); |
| 63 Vector<char> sequence = buffer_.EndSequence(); |
| 64 return Vector<const char>(sequence.start(), sequence.length()); |
| 65 } |
| 66 return Vector<const char>(); |
60 } | 67 } |
61 | 68 |
62 void DropLiteral() { | 69 void DropLiteral() { |
63 buffer_.DropSequence(); | 70 if (recording_) { |
| 71 recording_ = false; |
| 72 buffer_.DropSequence(); |
| 73 } |
64 } | 74 } |
65 | 75 |
66 void Reset() { | 76 void Reset() { |
67 buffer_.Reset(); | 77 buffer_.Reset(); |
68 } | 78 } |
69 | 79 |
70 // The end marker added after a parsed literal. | 80 // The end marker added after a parsed literal. |
71 // Using zero allows the usage of strlen and similar functions on | 81 // Using zero allows the usage of strlen and similar functions on |
72 // identifiers and numbers (but not strings, since they may contain zero | 82 // identifiers and numbers (but not strings, since they may contain zero |
73 // bytes). | 83 // bytes). |
74 // TODO(lrn): Use '\xff' as end marker, since it cannot occur inside | 84 // TODO(lrn): Use '\xff' as end marker, since it cannot occur inside |
75 // an utf-8 string. This requires changes in all places that uses | 85 // an utf-8 string. This requires changes in all places that uses |
76 // str-functions on the literals, but allows a single pointer to represent | 86 // str-functions on the literals, but allows a single pointer to represent |
77 // the literal, even if it contains embedded zeros. | 87 // the literal, even if it contains embedded zeros. |
78 static const char kEndMarker = '\x00'; | 88 static const char kEndMarker = '\x00'; |
79 private: | 89 private: |
80 static const int kInitialCapacity = 256; | 90 static const int kInitialCapacity = 256; |
81 SequenceCollector<char, 4> buffer_; | 91 SequenceCollector<char, 4> buffer_; |
82 | 92 bool recording_; |
83 void AddCharSlow(uc32 c); | 93 void AddCharSlow(uc32 c); |
84 }; | 94 }; |
85 | 95 |
86 | 96 |
87 // Interface through which the scanner reads characters from the input source. | |
88 class UTF16Buffer { | |
89 public: | |
90 UTF16Buffer(); | |
91 virtual ~UTF16Buffer() {} | |
92 | |
93 virtual void PushBack(uc32 ch) = 0; | |
94 // Returns a value < 0 when the buffer end is reached. | |
95 virtual uc32 Advance() = 0; | |
96 virtual void SeekForward(int pos) = 0; | |
97 | |
98 int pos() const { return pos_; } | |
99 | |
100 protected: | |
101 int pos_; // Current position in the buffer. | |
102 int end_; // Position where scanning should stop (EOF). | |
103 }; | |
104 | |
105 | |
106 // UTF16 buffer to read characters from a character stream. | 97 // UTF16 buffer to read characters from a character stream. |
107 class CharacterStreamUTF16Buffer: public UTF16Buffer { | 98 class CharacterStreamUTF16Buffer: public UTF16Buffer { |
108 public: | 99 public: |
109 CharacterStreamUTF16Buffer(); | 100 CharacterStreamUTF16Buffer(); |
110 virtual ~CharacterStreamUTF16Buffer() {} | 101 virtual ~CharacterStreamUTF16Buffer() {} |
111 void Initialize(Handle<String> data, | 102 void Initialize(Handle<String> data, |
112 unibrow::CharacterStream* stream, | 103 unibrow::CharacterStream* stream, |
113 int start_position, | 104 int start_position, |
114 int end_position); | 105 int end_position); |
115 virtual void PushBack(uc32 ch); | 106 virtual void PushBack(uc32 ch); |
(...skipping 266 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
382 // Buffer to hold literal values (identifiers, strings, numbers) | 373 // Buffer to hold literal values (identifiers, strings, numbers) |
383 // using '\x00'-terminated UTF-8 encoding. Handles allocation internally. | 374 // using '\x00'-terminated UTF-8 encoding. Handles allocation internally. |
384 UTF8Buffer literal_buffer_; | 375 UTF8Buffer literal_buffer_; |
385 | 376 |
386 bool stack_overflow_; | 377 bool stack_overflow_; |
387 | 378 |
388 // One Unicode character look-ahead; c0_ < 0 at the end of the input. | 379 // One Unicode character look-ahead; c0_ < 0 at the end of the input. |
389 uc32 c0_; | 380 uc32 c0_; |
390 }; | 381 }; |
391 | 382 |
| 383 |
| 384 // ExternalStringUTF16Buffer |
| 385 template <typename StringType, typename CharType> |
| 386 ExternalStringUTF16Buffer<StringType, CharType>::ExternalStringUTF16Buffer() |
| 387 : raw_data_(NULL) { } |
| 388 |
| 389 |
| 390 template <typename StringType, typename CharType> |
| 391 void ExternalStringUTF16Buffer<StringType, CharType>::Initialize( |
| 392 Handle<StringType> data, |
| 393 int start_position, |
| 394 int end_position) { |
| 395 ASSERT(!data.is_null()); |
| 396 raw_data_ = data->resource()->data(); |
| 397 |
| 398 ASSERT(end_position <= data->length()); |
| 399 if (start_position > 0) { |
| 400 SeekForward(start_position); |
| 401 } |
| 402 end_ = |
| 403 end_position != Scanner::kNoEndPosition ? end_position : data->length(); |
| 404 } |
| 405 |
| 406 |
| 407 template <typename StringType, typename CharType> |
| 408 uc32 ExternalStringUTF16Buffer<StringType, CharType>::Advance() { |
| 409 if (pos_ < end_) { |
| 410 return raw_data_[pos_++]; |
| 411 } else { |
| 412 // note: currently the following increment is necessary to avoid a |
| 413 // test-parser problem! |
| 414 pos_++; |
| 415 return static_cast<uc32>(-1); |
| 416 } |
| 417 } |
| 418 |
| 419 |
| 420 template <typename StringType, typename CharType> |
| 421 void ExternalStringUTF16Buffer<StringType, CharType>::PushBack(uc32 ch) { |
| 422 pos_--; |
| 423 ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize); |
| 424 ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch); |
| 425 } |
| 426 |
| 427 |
| 428 template <typename StringType, typename CharType> |
| 429 void ExternalStringUTF16Buffer<StringType, CharType>::SeekForward(int pos) { |
| 430 pos_ = pos; |
| 431 } |
| 432 |
392 } } // namespace v8::internal | 433 } } // namespace v8::internal |
393 | 434 |
394 #endif // V8_SCANNER_H_ | 435 #endif // V8_SCANNER_H_ |
OLD | NEW |