| OLD | NEW |
| 1 // Copyright 2010 the V8 project authors. All rights reserved. | 1 // Copyright 2010 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 24 matching lines...) Expand all Loading... |
| 35 namespace v8 { | 35 namespace v8 { |
| 36 namespace internal { | 36 namespace internal { |
| 37 | 37 |
| 38 | 38 |
| 39 class UTF8Buffer { | 39 class UTF8Buffer { |
| 40 public: | 40 public: |
| 41 UTF8Buffer(); | 41 UTF8Buffer(); |
| 42 ~UTF8Buffer(); | 42 ~UTF8Buffer(); |
| 43 | 43 |
| 44 inline void AddChar(uc32 c) { | 44 inline void AddChar(uc32 c) { |
| 45 if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) { | 45 if (recording_) { |
| 46 buffer_.Add(static_cast<char>(c)); | 46 if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) { |
| 47 } else { | 47 buffer_.Add(static_cast<char>(c)); |
| 48 AddCharSlow(c); | 48 } else { |
| 49 AddCharSlow(c); |
| 50 } |
| 49 } | 51 } |
| 50 } | 52 } |
| 51 | 53 |
| 52 void StartLiteral() { | 54 void StartLiteral() { |
| 53 buffer_.StartSequence(); | 55 buffer_.StartSequence(); |
| 56 recording_ = true; |
| 54 } | 57 } |
| 55 | 58 |
| 56 Vector<const char> EndLiteral() { | 59 Vector<const char> EndLiteral() { |
| 57 buffer_.Add(kEndMarker); | 60 if (recording_) { |
| 58 Vector<char> sequence = buffer_.EndSequence(); | 61 recording_ = false; |
| 59 return Vector<const char>(sequence.start(), sequence.length()); | 62 buffer_.Add(kEndMarker); |
| 63 Vector<char> sequence = buffer_.EndSequence(); |
| 64 return Vector<const char>(sequence.start(), sequence.length()); |
| 65 } |
| 66 return Vector<const char>(); |
| 60 } | 67 } |
| 61 | 68 |
| 62 void DropLiteral() { | 69 void DropLiteral() { |
| 63 buffer_.DropSequence(); | 70 if (recording_) { |
| 71 recording_ = false; |
| 72 buffer_.DropSequence(); |
| 73 } |
| 64 } | 74 } |
| 65 | 75 |
| 66 void Reset() { | 76 void Reset() { |
| 67 buffer_.Reset(); | 77 buffer_.Reset(); |
| 68 } | 78 } |
| 69 | 79 |
| 70 // The end marker added after a parsed literal. | 80 // The end marker added after a parsed literal. |
| 71 // Using zero allows the usage of strlen and similar functions on | 81 // Using zero allows the usage of strlen and similar functions on |
| 72 // identifiers and numbers (but not strings, since they may contain zero | 82 // identifiers and numbers (but not strings, since they may contain zero |
| 73 // bytes). | 83 // bytes). |
| 74 // TODO(lrn): Use '\xff' as end marker, since it cannot occur inside | 84 // TODO(lrn): Use '\xff' as end marker, since it cannot occur inside |
| 75 // an utf-8 string. This requires changes in all places that uses | 85 // an utf-8 string. This requires changes in all places that uses |
| 76 // str-functions on the literals, but allows a single pointer to represent | 86 // str-functions on the literals, but allows a single pointer to represent |
| 77 // the literal, even if it contains embedded zeros. | 87 // the literal, even if it contains embedded zeros. |
| 78 static const char kEndMarker = '\x00'; | 88 static const char kEndMarker = '\x00'; |
| 79 private: | 89 private: |
| 80 static const int kInitialCapacity = 256; | 90 static const int kInitialCapacity = 256; |
| 81 SequenceCollector<char, 4> buffer_; | 91 SequenceCollector<char, 4> buffer_; |
| 82 | 92 bool recording_; |
| 83 void AddCharSlow(uc32 c); | 93 void AddCharSlow(uc32 c); |
| 84 }; | 94 }; |
| 85 | 95 |
| 86 | 96 |
| 87 // Interface through which the scanner reads characters from the input source. | |
| 88 class UTF16Buffer { | |
| 89 public: | |
| 90 UTF16Buffer(); | |
| 91 virtual ~UTF16Buffer() {} | |
| 92 | |
| 93 virtual void PushBack(uc32 ch) = 0; | |
| 94 // Returns a value < 0 when the buffer end is reached. | |
| 95 virtual uc32 Advance() = 0; | |
| 96 virtual void SeekForward(int pos) = 0; | |
| 97 | |
| 98 int pos() const { return pos_; } | |
| 99 | |
| 100 protected: | |
| 101 int pos_; // Current position in the buffer. | |
| 102 int end_; // Position where scanning should stop (EOF). | |
| 103 }; | |
| 104 | |
| 105 | |
| 106 // UTF16 buffer to read characters from a character stream. | 97 // UTF16 buffer to read characters from a character stream. |
| 107 class CharacterStreamUTF16Buffer: public UTF16Buffer { | 98 class CharacterStreamUTF16Buffer: public UTF16Buffer { |
| 108 public: | 99 public: |
| 109 CharacterStreamUTF16Buffer(); | 100 CharacterStreamUTF16Buffer(); |
| 110 virtual ~CharacterStreamUTF16Buffer() {} | 101 virtual ~CharacterStreamUTF16Buffer() {} |
| 111 void Initialize(Handle<String> data, | 102 void Initialize(Handle<String> data, |
| 112 unibrow::CharacterStream* stream, | 103 unibrow::CharacterStream* stream, |
| 113 int start_position, | 104 int start_position, |
| 114 int end_position); | 105 int end_position); |
| 115 virtual void PushBack(uc32 ch); | 106 virtual void PushBack(uc32 ch); |
| (...skipping 128 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 244 bool ScanRegExpFlags(); | 235 bool ScanRegExpFlags(); |
| 245 | 236 |
| 246 // Seek forward to the given position. This operation does not | 237 // Seek forward to the given position. This operation does not |
| 247 // work in general, for instance when there are pushed back | 238 // work in general, for instance when there are pushed back |
| 248 // characters, but works for seeking forward until simple delimiter | 239 // characters, but works for seeking forward until simple delimiter |
| 249 // tokens, which is what it is used for. | 240 // tokens, which is what it is used for. |
| 250 void SeekForward(int pos); | 241 void SeekForward(int pos); |
| 251 | 242 |
| 252 bool stack_overflow() { return stack_overflow_; } | 243 bool stack_overflow() { return stack_overflow_; } |
| 253 | 244 |
| 254 static StaticResource<Utf8Decoder>* utf8_decoder() { return &utf8_decoder_; } | |
| 255 | |
| 256 // Tells whether the buffer contains an identifier (no escapes). | 245 // Tells whether the buffer contains an identifier (no escapes). |
| 257 // Used for checking if a property name is an identifier. | 246 // Used for checking if a property name is an identifier. |
| 258 static bool IsIdentifier(unibrow::CharacterStream* buffer); | 247 static bool IsIdentifier(unibrow::CharacterStream* buffer); |
| 259 | 248 |
| 260 static unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart; | |
| 261 static unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart; | |
| 262 static unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator; | |
| 263 static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace; | |
| 264 | |
| 265 static const int kCharacterLookaheadBufferSize = 1; | 249 static const int kCharacterLookaheadBufferSize = 1; |
| 266 static const int kNoEndPosition = 1; | 250 static const int kNoEndPosition = 1; |
| 267 | 251 |
| 268 private: | 252 private: |
| 269 // The current and look-ahead token. | 253 // The current and look-ahead token. |
| 270 struct TokenDesc { | 254 struct TokenDesc { |
| 271 Token::Value token; | 255 Token::Value token; |
| 272 Location location; | 256 Location location; |
| 273 Vector<const char> literal_chars; | 257 Vector<const char> literal_chars; |
| 274 }; | 258 }; |
| 275 | 259 |
| 276 void Init(Handle<String> source, | 260 void Init(Handle<String> source, |
| 277 unibrow::CharacterStream* stream, | 261 unibrow::CharacterStream* stream, |
| 278 int start_position, int end_position, | 262 int start_position, int end_position, |
| 279 ParserLanguage language); | 263 ParserLanguage language); |
| 280 | 264 |
| 281 // Literal buffer support | 265 // Literal buffer support |
| 282 inline void StartLiteral(); | 266 inline void StartLiteral(); |
| 283 inline void AddChar(uc32 ch); | 267 inline void AddLiteralChar(uc32 ch); |
| 284 inline void AddCharAdvance(); | 268 inline void AddLiteralCharAdvance(); |
| 285 inline void TerminateLiteral(); | 269 inline void TerminateLiteral(); |
| 286 // Stops scanning of a literal, e.g., due to an encountered error. | 270 // Stops scanning of a literal, e.g., due to an encountered error. |
| 287 inline void DropLiteral(); | 271 inline void DropLiteral(); |
| 288 | 272 |
| 289 // Low-level scanning support. | 273 // Low-level scanning support. |
| 290 void Advance() { c0_ = source_->Advance(); } | 274 void Advance() { c0_ = source_->Advance(); } |
| 291 void PushBack(uc32 ch) { | 275 void PushBack(uc32 ch) { |
| 292 source_->PushBack(ch); | 276 source_->PushBack(ch); |
| 293 c0_ = ch; | 277 c0_ = ch; |
| 294 } | 278 } |
| (...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 384 | 368 |
| 385 // Used to convert the source string into a character stream when a stream | 369 // Used to convert the source string into a character stream when a stream |
| 386 // is not passed to the scanner. | 370 // is not passed to the scanner. |
| 387 SafeStringInputBuffer safe_string_input_buffer_; | 371 SafeStringInputBuffer safe_string_input_buffer_; |
| 388 | 372 |
| 389 // Buffer to hold literal values (identifiers, strings, numbers) | 373 // Buffer to hold literal values (identifiers, strings, numbers) |
| 390 // using '\x00'-terminated UTF-8 encoding. Handles allocation internally. | 374 // using '\x00'-terminated UTF-8 encoding. Handles allocation internally. |
| 391 UTF8Buffer literal_buffer_; | 375 UTF8Buffer literal_buffer_; |
| 392 | 376 |
| 393 bool stack_overflow_; | 377 bool stack_overflow_; |
| 394 static StaticResource<Utf8Decoder> utf8_decoder_; | |
| 395 | 378 |
| 396 // One Unicode character look-ahead; c0_ < 0 at the end of the input. | 379 // One Unicode character look-ahead; c0_ < 0 at the end of the input. |
| 397 uc32 c0_; | 380 uc32 c0_; |
| 398 }; | 381 }; |
| 399 | 382 |
| 383 |
| 384 // ExternalStringUTF16Buffer |
| 385 template <typename StringType, typename CharType> |
| 386 ExternalStringUTF16Buffer<StringType, CharType>::ExternalStringUTF16Buffer() |
| 387 : raw_data_(NULL) { } |
| 388 |
| 389 |
| 390 template <typename StringType, typename CharType> |
| 391 void ExternalStringUTF16Buffer<StringType, CharType>::Initialize( |
| 392 Handle<StringType> data, |
| 393 int start_position, |
| 394 int end_position) { |
| 395 ASSERT(!data.is_null()); |
| 396 raw_data_ = data->resource()->data(); |
| 397 |
| 398 ASSERT(end_position <= data->length()); |
| 399 if (start_position > 0) { |
| 400 SeekForward(start_position); |
| 401 } |
| 402 end_ = |
| 403 end_position != Scanner::kNoEndPosition ? end_position : data->length(); |
| 404 } |
| 405 |
| 406 |
| 407 template <typename StringType, typename CharType> |
| 408 uc32 ExternalStringUTF16Buffer<StringType, CharType>::Advance() { |
| 409 if (pos_ < end_) { |
| 410 return raw_data_[pos_++]; |
| 411 } else { |
| 412 // note: currently the following increment is necessary to avoid a |
| 413 // test-parser problem! |
| 414 pos_++; |
| 415 return static_cast<uc32>(-1); |
| 416 } |
| 417 } |
| 418 |
| 419 |
| 420 template <typename StringType, typename CharType> |
| 421 void ExternalStringUTF16Buffer<StringType, CharType>::PushBack(uc32 ch) { |
| 422 pos_--; |
| 423 ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize); |
| 424 ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch); |
| 425 } |
| 426 |
| 427 |
| 428 template <typename StringType, typename CharType> |
| 429 void ExternalStringUTF16Buffer<StringType, CharType>::SeekForward(int pos) { |
| 430 pos_ = pos; |
| 431 } |
| 432 |
| 400 } } // namespace v8::internal | 433 } } // namespace v8::internal |
| 401 | 434 |
| 402 #endif // V8_SCANNER_H_ | 435 #endif // V8_SCANNER_H_ |
| OLD | NEW |