OLD | NEW |
1 // Copyright 2010 the V8 project authors. All rights reserved. | 1 // Copyright 2010 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 24 matching lines...) Expand all Loading... |
35 namespace v8 { | 35 namespace v8 { |
36 namespace internal { | 36 namespace internal { |
37 | 37 |
38 | 38 |
39 class UTF8Buffer { | 39 class UTF8Buffer { |
40 public: | 40 public: |
41 UTF8Buffer(); | 41 UTF8Buffer(); |
42 ~UTF8Buffer(); | 42 ~UTF8Buffer(); |
43 | 43 |
44 inline void AddChar(uc32 c) { | 44 inline void AddChar(uc32 c) { |
45 if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) { | 45 if (recording_) { |
46 buffer_.Add(static_cast<char>(c)); | 46 if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) { |
47 } else { | 47 buffer_.Add(static_cast<char>(c)); |
48 AddCharSlow(c); | 48 } else { |
| 49 AddCharSlow(c); |
| 50 } |
49 } | 51 } |
50 } | 52 } |
51 | 53 |
52 void StartLiteral() { | 54 void StartLiteral() { |
53 buffer_.StartSequence(); | 55 buffer_.StartSequence(); |
| 56 recording_ = true; |
54 } | 57 } |
55 | 58 |
56 Vector<const char> EndLiteral() { | 59 Vector<const char> EndLiteral() { |
57 buffer_.Add(kEndMarker); | 60 if (recording_) { |
58 Vector<char> sequence = buffer_.EndSequence(); | 61 recording_ = false; |
59 return Vector<const char>(sequence.start(), sequence.length()); | 62 buffer_.Add(kEndMarker); |
| 63 Vector<char> sequence = buffer_.EndSequence(); |
| 64 return Vector<const char>(sequence.start(), sequence.length()); |
| 65 } |
| 66 return Vector<const char>(); |
60 } | 67 } |
61 | 68 |
62 void DropLiteral() { | 69 void DropLiteral() { |
63 buffer_.DropSequence(); | 70 if (recording_) { |
| 71 recording_ = false; |
| 72 buffer_.DropSequence(); |
| 73 } |
64 } | 74 } |
65 | 75 |
66 void Reset() { | 76 void Reset() { |
67 buffer_.Reset(); | 77 buffer_.Reset(); |
68 } | 78 } |
69 | 79 |
70 // The end marker added after a parsed literal. | 80 // The end marker added after a parsed literal. |
71 // Using zero allows the usage of strlen and similar functions on | 81 // Using zero allows the usage of strlen and similar functions on |
72 // identifiers and numbers (but not strings, since they may contain zero | 82 // identifiers and numbers (but not strings, since they may contain zero |
73 // bytes). | 83 // bytes). |
74 // TODO(lrn): Use '\xff' as end marker, since it cannot occur inside | 84 // TODO(lrn): Use '\xff' as end marker, since it cannot occur inside |
75 // an utf-8 string. This requires changes in all places that uses | 85 // an utf-8 string. This requires changes in all places that uses |
76 // str-functions on the literals, but allows a single pointer to represent | 86 // str-functions on the literals, but allows a single pointer to represent |
77 // the literal, even if it contains embedded zeros. | 87 // the literal, even if it contains embedded zeros. |
78 static const char kEndMarker = '\x00'; | 88 static const char kEndMarker = '\x00'; |
79 private: | 89 private: |
80 static const int kInitialCapacity = 256; | 90 static const int kInitialCapacity = 256; |
81 SequenceCollector<char, 4> buffer_; | 91 SequenceCollector<char, 4> buffer_; |
82 | 92 bool recording_; |
83 void AddCharSlow(uc32 c); | 93 void AddCharSlow(uc32 c); |
84 }; | 94 }; |
85 | 95 |
86 | 96 |
87 // Interface through which the scanner reads characters from the input source. | |
88 class UTF16Buffer { | |
89 public: | |
90 UTF16Buffer(); | |
91 virtual ~UTF16Buffer() {} | |
92 | |
93 virtual void PushBack(uc32 ch) = 0; | |
94 // Returns a value < 0 when the buffer end is reached. | |
95 virtual uc32 Advance() = 0; | |
96 virtual void SeekForward(int pos) = 0; | |
97 | |
98 int pos() const { return pos_; } | |
99 | |
100 protected: | |
101 int pos_; // Current position in the buffer. | |
102 int end_; // Position where scanning should stop (EOF). | |
103 }; | |
104 | |
105 | |
106 // UTF16 buffer to read characters from a character stream. | 97 // UTF16 buffer to read characters from a character stream. |
107 class CharacterStreamUTF16Buffer: public UTF16Buffer { | 98 class CharacterStreamUTF16Buffer: public UTF16Buffer { |
108 public: | 99 public: |
109 CharacterStreamUTF16Buffer(); | 100 CharacterStreamUTF16Buffer(); |
110 virtual ~CharacterStreamUTF16Buffer() {} | 101 virtual ~CharacterStreamUTF16Buffer() {} |
111 void Initialize(Handle<String> data, | 102 void Initialize(Handle<String> data, |
112 unibrow::CharacterStream* stream, | 103 unibrow::CharacterStream* stream, |
113 int start_position, | 104 int start_position, |
114 int end_position); | 105 int end_position); |
115 virtual void PushBack(uc32 ch); | 106 virtual void PushBack(uc32 ch); |
(...skipping 128 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
244 bool ScanRegExpFlags(); | 235 bool ScanRegExpFlags(); |
245 | 236 |
246 // Seek forward to the given position. This operation does not | 237 // Seek forward to the given position. This operation does not |
247 // work in general, for instance when there are pushed back | 238 // work in general, for instance when there are pushed back |
248 // characters, but works for seeking forward until simple delimiter | 239 // characters, but works for seeking forward until simple delimiter |
249 // tokens, which is what it is used for. | 240 // tokens, which is what it is used for. |
250 void SeekForward(int pos); | 241 void SeekForward(int pos); |
251 | 242 |
252 bool stack_overflow() { return stack_overflow_; } | 243 bool stack_overflow() { return stack_overflow_; } |
253 | 244 |
254 static StaticResource<Utf8Decoder>* utf8_decoder() { return &utf8_decoder_; } | |
255 | |
256 // Tells whether the buffer contains an identifier (no escapes). | 245 // Tells whether the buffer contains an identifier (no escapes). |
257 // Used for checking if a property name is an identifier. | 246 // Used for checking if a property name is an identifier. |
258 static bool IsIdentifier(unibrow::CharacterStream* buffer); | 247 static bool IsIdentifier(unibrow::CharacterStream* buffer); |
259 | 248 |
260 static unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart; | |
261 static unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart; | |
262 static unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator; | |
263 static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace; | |
264 | |
265 static const int kCharacterLookaheadBufferSize = 1; | 249 static const int kCharacterLookaheadBufferSize = 1; |
266 static const int kNoEndPosition = 1; | 250 static const int kNoEndPosition = 1; |
267 | 251 |
268 private: | 252 private: |
269 // The current and look-ahead token. | 253 // The current and look-ahead token. |
270 struct TokenDesc { | 254 struct TokenDesc { |
271 Token::Value token; | 255 Token::Value token; |
272 Location location; | 256 Location location; |
273 Vector<const char> literal_chars; | 257 Vector<const char> literal_chars; |
274 }; | 258 }; |
275 | 259 |
276 void Init(Handle<String> source, | 260 void Init(Handle<String> source, |
277 unibrow::CharacterStream* stream, | 261 unibrow::CharacterStream* stream, |
278 int start_position, int end_position, | 262 int start_position, int end_position, |
279 ParserLanguage language); | 263 ParserLanguage language); |
280 | 264 |
281 // Literal buffer support | 265 // Literal buffer support |
282 inline void StartLiteral(); | 266 inline void StartLiteral(); |
283 inline void AddChar(uc32 ch); | 267 inline void AddLiteralChar(uc32 ch); |
284 inline void AddCharAdvance(); | 268 inline void AddLiteralCharAdvance(); |
285 inline void TerminateLiteral(); | 269 inline void TerminateLiteral(); |
286 // Stops scanning of a literal, e.g., due to an encountered error. | 270 // Stops scanning of a literal, e.g., due to an encountered error. |
287 inline void DropLiteral(); | 271 inline void DropLiteral(); |
288 | 272 |
289 // Low-level scanning support. | 273 // Low-level scanning support. |
290 void Advance() { c0_ = source_->Advance(); } | 274 void Advance() { c0_ = source_->Advance(); } |
291 void PushBack(uc32 ch) { | 275 void PushBack(uc32 ch) { |
292 source_->PushBack(ch); | 276 source_->PushBack(ch); |
293 c0_ = ch; | 277 c0_ = ch; |
294 } | 278 } |
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
384 | 368 |
385 // Used to convert the source string into a character stream when a stream | 369 // Used to convert the source string into a character stream when a stream |
386 // is not passed to the scanner. | 370 // is not passed to the scanner. |
387 SafeStringInputBuffer safe_string_input_buffer_; | 371 SafeStringInputBuffer safe_string_input_buffer_; |
388 | 372 |
389 // Buffer to hold literal values (identifiers, strings, numbers) | 373 // Buffer to hold literal values (identifiers, strings, numbers) |
390 // using '\x00'-terminated UTF-8 encoding. Handles allocation internally. | 374 // using '\x00'-terminated UTF-8 encoding. Handles allocation internally. |
391 UTF8Buffer literal_buffer_; | 375 UTF8Buffer literal_buffer_; |
392 | 376 |
393 bool stack_overflow_; | 377 bool stack_overflow_; |
394 static StaticResource<Utf8Decoder> utf8_decoder_; | |
395 | 378 |
396 // One Unicode character look-ahead; c0_ < 0 at the end of the input. | 379 // One Unicode character look-ahead; c0_ < 0 at the end of the input. |
397 uc32 c0_; | 380 uc32 c0_; |
398 }; | 381 }; |
399 | 382 |
| 383 |
| 384 // ExternalStringUTF16Buffer |
| 385 template <typename StringType, typename CharType> |
| 386 ExternalStringUTF16Buffer<StringType, CharType>::ExternalStringUTF16Buffer() |
| 387 : raw_data_(NULL) { } |
| 388 |
| 389 |
| 390 template <typename StringType, typename CharType> |
| 391 void ExternalStringUTF16Buffer<StringType, CharType>::Initialize( |
| 392 Handle<StringType> data, |
| 393 int start_position, |
| 394 int end_position) { |
| 395 ASSERT(!data.is_null()); |
| 396 raw_data_ = data->resource()->data(); |
| 397 |
| 398 ASSERT(end_position <= data->length()); |
| 399 if (start_position > 0) { |
| 400 SeekForward(start_position); |
| 401 } |
| 402 end_ = |
| 403 end_position != Scanner::kNoEndPosition ? end_position : data->length(); |
| 404 } |
| 405 |
| 406 |
| 407 template <typename StringType, typename CharType> |
| 408 uc32 ExternalStringUTF16Buffer<StringType, CharType>::Advance() { |
| 409 if (pos_ < end_) { |
| 410 return raw_data_[pos_++]; |
| 411 } else { |
| 412 // note: currently the following increment is necessary to avoid a |
| 413 // test-parser problem! |
| 414 pos_++; |
| 415 return static_cast<uc32>(-1); |
| 416 } |
| 417 } |
| 418 |
| 419 |
| 420 template <typename StringType, typename CharType> |
| 421 void ExternalStringUTF16Buffer<StringType, CharType>::PushBack(uc32 ch) { |
| 422 pos_--; |
| 423 ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize); |
| 424 ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch); |
| 425 } |
| 426 |
| 427 |
| 428 template <typename StringType, typename CharType> |
| 429 void ExternalStringUTF16Buffer<StringType, CharType>::SeekForward(int pos) { |
| 430 pos_ = pos; |
| 431 } |
| 432 |
400 } } // namespace v8::internal | 433 } } // namespace v8::internal |
401 | 434 |
402 #endif // V8_SCANNER_H_ | 435 #endif // V8_SCANNER_H_ |
OLD | NEW |