| OLD | NEW | 
|---|
| 1 // Copyright 2010 the V8 project authors. All rights reserved. | 1 // Copyright 2010 the V8 project authors. All rights reserved. | 
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without | 
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are | 
| 4 // met: | 4 // met: | 
| 5 // | 5 // | 
| 6 //     * Redistributions of source code must retain the above copyright | 6 //     * Redistributions of source code must retain the above copyright | 
| 7 //       notice, this list of conditions and the following disclaimer. | 7 //       notice, this list of conditions and the following disclaimer. | 
| 8 //     * Redistributions in binary form must reproduce the above | 8 //     * Redistributions in binary form must reproduce the above | 
| 9 //       copyright notice, this list of conditions and the following | 9 //       copyright notice, this list of conditions and the following | 
| 10 //       disclaimer in the documentation and/or other materials provided | 10 //       disclaimer in the documentation and/or other materials provided | 
| (...skipping 24 matching lines...) Expand all  Loading... | 
| 35 namespace v8 { | 35 namespace v8 { | 
| 36 namespace internal { | 36 namespace internal { | 
| 37 | 37 | 
| 38 | 38 | 
| 39 class UTF8Buffer { | 39 class UTF8Buffer { | 
| 40  public: | 40  public: | 
| 41   UTF8Buffer(); | 41   UTF8Buffer(); | 
| 42   ~UTF8Buffer(); | 42   ~UTF8Buffer(); | 
| 43 | 43 | 
| 44   inline void AddChar(uc32 c) { | 44   inline void AddChar(uc32 c) { | 
| 45     if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) { | 45     if (recording_) { | 
| 46       buffer_.Add(static_cast<char>(c)); | 46       if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) { | 
| 47     } else { | 47         buffer_.Add(static_cast<char>(c)); | 
| 48       AddCharSlow(c); | 48       } else { | 
|  | 49         AddCharSlow(c); | 
|  | 50       } | 
| 49     } | 51     } | 
| 50   } | 52   } | 
| 51 | 53 | 
| 52   void StartLiteral() { | 54   void StartLiteral() { | 
| 53     buffer_.StartSequence(); | 55     buffer_.StartSequence(); | 
|  | 56     recording_ = true; | 
| 54   } | 57   } | 
| 55 | 58 | 
| 56   Vector<const char> EndLiteral() { | 59   Vector<const char> EndLiteral() { | 
| 57     buffer_.Add(kEndMarker); | 60     if (recording_) { | 
| 58     Vector<char> sequence = buffer_.EndSequence(); | 61       recording_ = false; | 
| 59     return Vector<const char>(sequence.start(), sequence.length()); | 62       buffer_.Add(kEndMarker); | 
|  | 63       Vector<char> sequence = buffer_.EndSequence(); | 
|  | 64       return Vector<const char>(sequence.start(), sequence.length()); | 
|  | 65     } | 
|  | 66     return Vector<const char>(); | 
| 60   } | 67   } | 
| 61 | 68 | 
| 62   void DropLiteral() { | 69   void DropLiteral() { | 
| 63     buffer_.DropSequence(); | 70     if (recording_) { | 
|  | 71       recording_ = false; | 
|  | 72       buffer_.DropSequence(); | 
|  | 73     } | 
| 64   } | 74   } | 
| 65 | 75 | 
| 66   void Reset() { | 76   void Reset() { | 
| 67     buffer_.Reset(); | 77     buffer_.Reset(); | 
| 68   } | 78   } | 
| 69 | 79 | 
| 70   // The end marker added after a parsed literal. | 80   // The end marker added after a parsed literal. | 
| 71   // Using zero allows the usage of strlen and similar functions on | 81   // Using zero allows the usage of strlen and similar functions on | 
| 72   // identifiers and numbers (but not strings, since they may contain zero | 82   // identifiers and numbers (but not strings, since they may contain zero | 
| 73   // bytes). | 83   // bytes). | 
| 74   // TODO(lrn): Use '\xff' as end marker, since it cannot occur inside | 84   // TODO(lrn): Use '\xff' as end marker, since it cannot occur inside | 
| 75   // an utf-8 string. This requires changes in all places that uses | 85   // an utf-8 string. This requires changes in all places that uses | 
| 76   // str-functions on the literals, but allows a single pointer to represent | 86   // str-functions on the literals, but allows a single pointer to represent | 
| 77   // the literal, even if it contains embedded zeros. | 87   // the literal, even if it contains embedded zeros. | 
| 78   static const char kEndMarker = '\x00'; | 88   static const char kEndMarker = '\x00'; | 
| 79  private: | 89  private: | 
| 80   static const int kInitialCapacity = 256; | 90   static const int kInitialCapacity = 256; | 
| 81   SequenceCollector<char, 4> buffer_; | 91   SequenceCollector<char, 4> buffer_; | 
| 82 | 92   bool recording_; | 
| 83   void AddCharSlow(uc32 c); | 93   void AddCharSlow(uc32 c); | 
| 84 }; | 94 }; | 
| 85 | 95 | 
| 86 | 96 | 
| 87 // Interface through which the scanner reads characters from the input source. |  | 
| 88 class UTF16Buffer { |  | 
| 89  public: |  | 
| 90   UTF16Buffer(); |  | 
| 91   virtual ~UTF16Buffer() {} |  | 
| 92 |  | 
| 93   virtual void PushBack(uc32 ch) = 0; |  | 
| 94   // Returns a value < 0 when the buffer end is reached. |  | 
| 95   virtual uc32 Advance() = 0; |  | 
| 96   virtual void SeekForward(int pos) = 0; |  | 
| 97 |  | 
| 98   int pos() const { return pos_; } |  | 
| 99 |  | 
| 100  protected: |  | 
| 101   int pos_;  // Current position in the buffer. |  | 
| 102   int end_;  // Position where scanning should stop (EOF). |  | 
| 103 }; |  | 
| 104 |  | 
| 105 |  | 
| 106 // UTF16 buffer to read characters from a character stream. | 97 // UTF16 buffer to read characters from a character stream. | 
| 107 class CharacterStreamUTF16Buffer: public UTF16Buffer { | 98 class CharacterStreamUTF16Buffer: public UTF16Buffer { | 
| 108  public: | 99  public: | 
| 109   CharacterStreamUTF16Buffer(); | 100   CharacterStreamUTF16Buffer(); | 
| 110   virtual ~CharacterStreamUTF16Buffer() {} | 101   virtual ~CharacterStreamUTF16Buffer() {} | 
| 111   void Initialize(Handle<String> data, | 102   void Initialize(Handle<String> data, | 
| 112                   unibrow::CharacterStream* stream, | 103                   unibrow::CharacterStream* stream, | 
| 113                   int start_position, | 104                   int start_position, | 
| 114                   int end_position); | 105                   int end_position); | 
| 115   virtual void PushBack(uc32 ch); | 106   virtual void PushBack(uc32 ch); | 
| (...skipping 266 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 382   // Buffer to hold literal values (identifiers, strings, numbers) | 373   // Buffer to hold literal values (identifiers, strings, numbers) | 
| 383   // using '\x00'-terminated UTF-8 encoding. Handles allocation internally. | 374   // using '\x00'-terminated UTF-8 encoding. Handles allocation internally. | 
| 384   UTF8Buffer literal_buffer_; | 375   UTF8Buffer literal_buffer_; | 
| 385 | 376 | 
| 386   bool stack_overflow_; | 377   bool stack_overflow_; | 
| 387 | 378 | 
| 388   // One Unicode character look-ahead; c0_ < 0 at the end of the input. | 379   // One Unicode character look-ahead; c0_ < 0 at the end of the input. | 
| 389   uc32 c0_; | 380   uc32 c0_; | 
| 390 }; | 381 }; | 
| 391 | 382 | 
|  | 383 | 
|  | 384 // ExternalStringUTF16Buffer | 
|  | 385 template <typename StringType, typename CharType> | 
|  | 386 ExternalStringUTF16Buffer<StringType, CharType>::ExternalStringUTF16Buffer() | 
|  | 387     : raw_data_(NULL) { } | 
|  | 388 | 
|  | 389 | 
|  | 390 template <typename StringType, typename CharType> | 
|  | 391 void ExternalStringUTF16Buffer<StringType, CharType>::Initialize( | 
|  | 392      Handle<StringType> data, | 
|  | 393      int start_position, | 
|  | 394      int end_position) { | 
|  | 395   ASSERT(!data.is_null()); | 
|  | 396   raw_data_ = data->resource()->data(); | 
|  | 397 | 
|  | 398   ASSERT(end_position <= data->length()); | 
|  | 399   if (start_position > 0) { | 
|  | 400     SeekForward(start_position); | 
|  | 401   } | 
|  | 402   end_ = | 
|  | 403       end_position != Scanner::kNoEndPosition ? end_position : data->length(); | 
|  | 404 } | 
|  | 405 | 
|  | 406 | 
|  | 407 template <typename StringType, typename CharType> | 
|  | 408 uc32 ExternalStringUTF16Buffer<StringType, CharType>::Advance() { | 
|  | 409   if (pos_ < end_) { | 
|  | 410     return raw_data_[pos_++]; | 
|  | 411   } else { | 
|  | 412     // note: currently the following increment is necessary to avoid a | 
|  | 413     // test-parser problem! | 
|  | 414     pos_++; | 
|  | 415     return static_cast<uc32>(-1); | 
|  | 416   } | 
|  | 417 } | 
|  | 418 | 
|  | 419 | 
|  | 420 template <typename StringType, typename CharType> | 
|  | 421 void ExternalStringUTF16Buffer<StringType, CharType>::PushBack(uc32 ch) { | 
|  | 422   pos_--; | 
|  | 423   ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize); | 
|  | 424   ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch); | 
|  | 425 } | 
|  | 426 | 
|  | 427 | 
|  | 428 template <typename StringType, typename CharType> | 
|  | 429 void ExternalStringUTF16Buffer<StringType, CharType>::SeekForward(int pos) { | 
|  | 430   pos_ = pos; | 
|  | 431 } | 
|  | 432 | 
| 392 } }  // namespace v8::internal | 433 } }  // namespace v8::internal | 
| 393 | 434 | 
| 394 #endif  // V8_SCANNER_H_ | 435 #endif  // V8_SCANNER_H_ | 
| OLD | NEW | 
|---|