| OLD | NEW | 
|---|
| 1 // Copyright 2010 the V8 project authors. All rights reserved. | 1 // Copyright 2010 the V8 project authors. All rights reserved. | 
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without | 
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are | 
| 4 // met: | 4 // met: | 
| 5 // | 5 // | 
| 6 //     * Redistributions of source code must retain the above copyright | 6 //     * Redistributions of source code must retain the above copyright | 
| 7 //       notice, this list of conditions and the following disclaimer. | 7 //       notice, this list of conditions and the following disclaimer. | 
| 8 //     * Redistributions in binary form must reproduce the above | 8 //     * Redistributions in binary form must reproduce the above | 
| 9 //       copyright notice, this list of conditions and the following | 9 //       copyright notice, this list of conditions and the following | 
| 10 //       disclaimer in the documentation and/or other materials provided | 10 //       disclaimer in the documentation and/or other materials provided | 
| (...skipping 17 matching lines...) Expand all  Loading... | 
| 28 #ifndef V8_SCANNER_H_ | 28 #ifndef V8_SCANNER_H_ | 
| 29 #define V8_SCANNER_H_ | 29 #define V8_SCANNER_H_ | 
| 30 | 30 | 
| 31 #include "token.h" | 31 #include "token.h" | 
| 32 #include "char-predicates-inl.h" | 32 #include "char-predicates-inl.h" | 
| 33 #include "scanner-base.h" | 33 #include "scanner-base.h" | 
| 34 | 34 | 
| 35 namespace v8 { | 35 namespace v8 { | 
| 36 namespace internal { | 36 namespace internal { | 
| 37 | 37 | 
| 38 // A buffered character stream based on a random access character | 38 // UTF16 buffer to read characters from a character stream. | 
| 39 // source (ReadBlock can be called with pos_ pointing to any position, | 39 class CharacterStreamUTF16Buffer: public UTF16Buffer { | 
| 40 // even positions before the current). |  | 
| 41 class BufferedUC16CharacterStream: public UC16CharacterStream { |  | 
| 42  public: | 40  public: | 
| 43   BufferedUC16CharacterStream(); | 41   CharacterStreamUTF16Buffer(); | 
| 44   virtual ~BufferedUC16CharacterStream(); | 42   virtual ~CharacterStreamUTF16Buffer() {} | 
|  | 43   void Initialize(Handle<String> data, | 
|  | 44                   unibrow::CharacterStream* stream, | 
|  | 45                   int start_position, | 
|  | 46                   int end_position); | 
|  | 47   virtual void PushBack(uc32 ch); | 
|  | 48   virtual uc32 Advance(); | 
|  | 49   virtual void SeekForward(int pos); | 
| 45 | 50 | 
| 46   virtual void PushBack(uc16 character); | 51  private: | 
|  | 52   List<uc32> pushback_buffer_; | 
|  | 53   uc32 last_; | 
|  | 54   unibrow::CharacterStream* stream_; | 
| 47 | 55 | 
| 48  protected: | 56   List<uc32>* pushback_buffer() { return &pushback_buffer_; } | 
| 49   static const unsigned kBufferSize = 512; |  | 
| 50   static const unsigned kPushBackStepSize = 16; |  | 
| 51 |  | 
| 52   virtual unsigned SlowSeekForward(unsigned delta); |  | 
| 53   virtual bool ReadBlock(); |  | 
| 54   virtual void SlowPushBack(uc16 character); |  | 
| 55 |  | 
| 56   virtual unsigned BufferSeekForward(unsigned delta) = 0; |  | 
| 57   virtual unsigned FillBuffer(unsigned position, unsigned length) = 0; |  | 
| 58 |  | 
| 59   const uc16* pushback_limit_; |  | 
| 60   uc16 buffer_[kBufferSize]; |  | 
| 61 }; |  | 
| 62 |  | 
| 63 |  | 
| 64 // Generic string stream. |  | 
| 65 class GenericStringUC16CharacterStream: public BufferedUC16CharacterStream { |  | 
| 66  public: |  | 
| 67   GenericStringUC16CharacterStream(Handle<String> data, |  | 
| 68                                    unsigned start_position, |  | 
| 69                                    unsigned end_position); |  | 
| 70   virtual ~GenericStringUC16CharacterStream(); |  | 
| 71 |  | 
| 72  protected: |  | 
| 73   virtual unsigned BufferSeekForward(unsigned delta); |  | 
| 74   virtual unsigned FillBuffer(unsigned position, unsigned length); |  | 
| 75 |  | 
| 76   Handle<String> string_; |  | 
| 77   unsigned start_position_; |  | 
| 78   unsigned length_; |  | 
| 79 }; |  | 
| 80 |  | 
| 81 |  | 
| 82 // UC16 stream based on a literal UTF-8 string. |  | 
| 83 class Utf8ToUC16CharacterStream: public BufferedUC16CharacterStream { |  | 
| 84  public: |  | 
| 85   Utf8ToUC16CharacterStream(const byte* data, unsigned length); |  | 
| 86   virtual ~Utf8ToUC16CharacterStream(); |  | 
| 87 |  | 
| 88  protected: |  | 
| 89   virtual unsigned BufferSeekForward(unsigned delta); |  | 
| 90   virtual unsigned FillBuffer(unsigned char_position, unsigned length); |  | 
| 91   void SetRawPosition(unsigned char_position); |  | 
| 92 |  | 
| 93   const byte* raw_data_; |  | 
| 94   unsigned raw_data_length_;  // Measured in bytes, not characters. |  | 
| 95   unsigned raw_data_pos_; |  | 
| 96   // The character position of the character at raw_data[raw_data_pos_]. |  | 
| 97   // Not necessarily the same as pos_. |  | 
| 98   unsigned raw_character_position_; |  | 
| 99 }; | 57 }; | 
| 100 | 58 | 
| 101 | 59 | 
| 102 // UTF16 buffer to read characters from an external string. | 60 // UTF16 buffer to read characters from an external string. | 
| 103 class ExternalTwoByteStringUC16CharacterStream: public UC16CharacterStream { | 61 template <typename StringType, typename CharType> | 
|  | 62 class ExternalStringUTF16Buffer: public UTF16Buffer { | 
| 104  public: | 63  public: | 
| 105   ExternalTwoByteStringUC16CharacterStream(Handle<ExternalTwoByteString> data, | 64   ExternalStringUTF16Buffer(); | 
| 106                                            int start_position, | 65   virtual ~ExternalStringUTF16Buffer() {} | 
| 107                                            int end_position); | 66   void Initialize(Handle<StringType> data, | 
| 108   virtual ~ExternalTwoByteStringUC16CharacterStream(); | 67                   int start_position, | 
|  | 68                   int end_position); | 
|  | 69   virtual void PushBack(uc32 ch); | 
|  | 70   virtual uc32 Advance(); | 
|  | 71   virtual void SeekForward(int pos); | 
| 109 | 72 | 
| 110   virtual void PushBack(uc16 character) { | 73  private: | 
| 111     ASSERT(buffer_cursor_ > raw_data_); | 74   const CharType* raw_data_;  // Pointer to the actual array of characters. | 
| 112     buffer_cursor_--; |  | 
| 113     pos_--; |  | 
| 114   } |  | 
| 115  protected: |  | 
| 116   virtual unsigned SlowSeekForward(unsigned delta) { |  | 
| 117     // Fast case always handles seeking. |  | 
| 118     return 0; |  | 
| 119   } |  | 
| 120   virtual bool ReadBlock() { |  | 
| 121     // Entire string is read at start. |  | 
| 122     return false; |  | 
| 123   } |  | 
| 124   Handle<ExternalTwoByteString> source_; |  | 
| 125   const uc16* raw_data_;  // Pointer to the actual array of characters. |  | 
| 126 }; | 75 }; | 
| 127 | 76 | 
| 128 | 77 | 
|  | 78 // Initializes a UTF16Buffer as input stream, using one of a number | 
|  | 79 // of strategies depending on the available character sources. | 
|  | 80 class StreamInitializer { | 
|  | 81  public: | 
|  | 82   UTF16Buffer* Init(Handle<String> source, | 
|  | 83                     unibrow::CharacterStream* stream, | 
|  | 84                     int start_position, | 
|  | 85                     int end_position); | 
|  | 86  private: | 
|  | 87   // Different UTF16 buffers used to pull characters from. Based on input one of | 
|  | 88   // these will be initialized as the actual data source. | 
|  | 89   CharacterStreamUTF16Buffer char_stream_buffer_; | 
|  | 90   ExternalStringUTF16Buffer<ExternalTwoByteString, uint16_t> | 
|  | 91       two_byte_string_buffer_; | 
|  | 92   ExternalStringUTF16Buffer<ExternalAsciiString, char> ascii_string_buffer_; | 
|  | 93 | 
|  | 94   // Used to convert the source string into a character stream when a stream | 
|  | 95   // is not passed to the scanner. | 
|  | 96   SafeStringInputBuffer safe_string_input_buffer_; | 
|  | 97 }; | 
|  | 98 | 
| 129 // ---------------------------------------------------------------------------- | 99 // ---------------------------------------------------------------------------- | 
| 130 // V8JavaScriptScanner | 100 // V8JavaScriptScanner | 
| 131 // JavaScript scanner getting its input from either a V8 String or a unicode | 101 // JavaScript scanner getting its input from either a V8 String or a unicode | 
| 132 // CharacterStream. | 102 // CharacterStream. | 
| 133 | 103 | 
| 134 class V8JavaScriptScanner : public JavaScriptScanner { | 104 class V8JavaScriptScanner : public JavaScriptScanner { | 
| 135  public: | 105  public: | 
| 136   V8JavaScriptScanner(); | 106   V8JavaScriptScanner() {} | 
| 137   void Initialize(UC16CharacterStream* source, | 107 | 
|  | 108   // Initialize the Scanner to scan source. | 
|  | 109   void Initialize(Handle<String> source, int literal_flags = kAllLiterals); | 
|  | 110   void Initialize(Handle<String> source, | 
|  | 111                   unibrow::CharacterStream* stream, | 
| 138                   int literal_flags = kAllLiterals); | 112                   int literal_flags = kAllLiterals); | 
|  | 113   void Initialize(Handle<String> source, | 
|  | 114                   int start_position, int end_position, | 
|  | 115                   int literal_flags = kAllLiterals); | 
|  | 116 | 
|  | 117  protected: | 
|  | 118   StreamInitializer stream_initializer_; | 
| 139 }; | 119 }; | 
| 140 | 120 | 
| 141 | 121 | 
| 142 class JsonScanner : public Scanner { | 122 class JsonScanner : public Scanner { | 
| 143  public: | 123  public: | 
| 144   JsonScanner(); | 124   JsonScanner(); | 
| 145 | 125 | 
| 146   void Initialize(UC16CharacterStream* source); | 126   // Initialize the Scanner to scan source. | 
|  | 127   void Initialize(Handle<String> source); | 
| 147 | 128 | 
| 148   // Returns the next token. | 129   // Returns the next token. | 
| 149   Token::Value Next(); | 130   Token::Value Next(); | 
| 150 | 131 | 
| 151  protected: | 132  protected: | 
| 152   // Skip past JSON whitespace (only space, tab, newline and carrige-return). | 133   // Skip past JSON whitespace (only space, tab, newline and carrige-return). | 
| 153   bool SkipJsonWhiteSpace(); | 134   bool SkipJsonWhiteSpace(); | 
| 154 | 135 | 
| 155   // Scan a single JSON token. The JSON lexical grammar is specified in the | 136   // Scan a single JSON token. The JSON lexical grammar is specified in the | 
| 156   // ECMAScript 5 standard, section 15.12.1.1. | 137   // ECMAScript 5 standard, section 15.12.1.1. | 
| 157   // Recognizes all of the single-character tokens directly, or calls a function | 138   // Recognizes all of the single-character tokens directly, or calls a function | 
| 158   // to scan a number, string or identifier literal. | 139   // to scan a number, string or identifier literal. | 
| 159   // The only allowed whitespace characters between tokens are tab, | 140   // The only allowed whitespace characters between tokens are tab, | 
| 160   // carriage-return, newline and space. | 141   // carrige-return, newline and space. | 
| 161   void ScanJson(); | 142   void ScanJson(); | 
| 162 | 143 | 
| 163   // A JSON number (production JSONNumber) is a subset of the valid JavaScript | 144   // A JSON number (production JSONNumber) is a subset of the valid JavaScript | 
| 164   // decimal number literals. | 145   // decimal number literals. | 
| 165   // It includes an optional minus sign, must have at least one | 146   // It includes an optional minus sign, must have at least one | 
| 166   // digit before and after a decimal point, may not have prefixed zeros (unless | 147   // digit before and after a decimal point, may not have prefixed zeros (unless | 
| 167   // the integer part is zero), and may include an exponent part (e.g., "e-10"). | 148   // the integer part is zero), and may include an exponent part (e.g., "e-10"). | 
| 168   // Hexadecimal and octal numbers are not allowed. | 149   // Hexadecimal and octal numbers are not allowed. | 
| 169   Token::Value ScanJsonNumber(); | 150   Token::Value ScanJsonNumber(); | 
| 170 | 151 | 
| 171   // A JSON string (production JSONString) is subset of valid JavaScript string | 152   // A JSON string (production JSONString) is subset of valid JavaScript string | 
| 172   // literals. The string must only be double-quoted (not single-quoted), and | 153   // literals. The string must only be double-quoted (not single-quoted), and | 
| 173   // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and | 154   // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and | 
| 174   // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid. | 155   // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid. | 
| 175   Token::Value ScanJsonString(); | 156   Token::Value ScanJsonString(); | 
| 176 | 157 | 
| 177   // Used to recognizes one of the literals "true", "false", or "null". These | 158   // Used to recognizes one of the literals "true", "false", or "null". These | 
| 178   // are the only valid JSON identifiers (productions JSONBooleanLiteral, | 159   // are the only valid JSON identifiers (productions JSONBooleanLiteral, | 
| 179   // JSONNullLiteral). | 160   // JSONNullLiteral). | 
| 180   Token::Value ScanJsonIdentifier(const char* text, Token::Value token); | 161   Token::Value ScanJsonIdentifier(const char* text, Token::Value token); | 
|  | 162 | 
|  | 163   StreamInitializer stream_initializer_; | 
| 181 }; | 164 }; | 
| 182 | 165 | 
|  | 166 | 
|  | 167 // ExternalStringUTF16Buffer | 
|  | 168 template <typename StringType, typename CharType> | 
|  | 169 ExternalStringUTF16Buffer<StringType, CharType>::ExternalStringUTF16Buffer() | 
|  | 170     : raw_data_(NULL) { } | 
|  | 171 | 
|  | 172 | 
|  | 173 template <typename StringType, typename CharType> | 
|  | 174 void ExternalStringUTF16Buffer<StringType, CharType>::Initialize( | 
|  | 175      Handle<StringType> data, | 
|  | 176      int start_position, | 
|  | 177      int end_position) { | 
|  | 178   ASSERT(!data.is_null()); | 
|  | 179   raw_data_ = data->resource()->data(); | 
|  | 180 | 
|  | 181   ASSERT(end_position <= data->length()); | 
|  | 182   if (start_position > 0) { | 
|  | 183     SeekForward(start_position); | 
|  | 184   } | 
|  | 185   end_ = | 
|  | 186       end_position != kNoEndPosition ? end_position : data->length(); | 
|  | 187 } | 
|  | 188 | 
|  | 189 | 
|  | 190 template <typename StringType, typename CharType> | 
|  | 191 uc32 ExternalStringUTF16Buffer<StringType, CharType>::Advance() { | 
|  | 192   if (pos_ < end_) { | 
|  | 193     return raw_data_[pos_++]; | 
|  | 194   } else { | 
|  | 195     // note: currently the following increment is necessary to avoid a | 
|  | 196     // test-parser problem! | 
|  | 197     pos_++; | 
|  | 198     return static_cast<uc32>(-1); | 
|  | 199   } | 
|  | 200 } | 
|  | 201 | 
|  | 202 | 
|  | 203 template <typename StringType, typename CharType> | 
|  | 204 void ExternalStringUTF16Buffer<StringType, CharType>::PushBack(uc32 ch) { | 
|  | 205   pos_--; | 
|  | 206   ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize); | 
|  | 207   ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch); | 
|  | 208 } | 
|  | 209 | 
|  | 210 | 
|  | 211 template <typename StringType, typename CharType> | 
|  | 212 void ExternalStringUTF16Buffer<StringType, CharType>::SeekForward(int pos) { | 
|  | 213   pos_ = pos; | 
|  | 214 } | 
|  | 215 | 
| 183 } }  // namespace v8::internal | 216 } }  // namespace v8::internal | 
| 184 | 217 | 
| 185 #endif  // V8_SCANNER_H_ | 218 #endif  // V8_SCANNER_H_ | 
| OLD | NEW | 
|---|