| OLD | NEW |
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 22 matching lines...) Expand all Loading... |
| 33 | 33 |
| 34 namespace v8 { | 34 namespace v8 { |
| 35 namespace internal { | 35 namespace internal { |
| 36 | 36 |
| 37 | 37 |
| 38 class UTF8Buffer { | 38 class UTF8Buffer { |
| 39 public: | 39 public: |
| 40 UTF8Buffer(); | 40 UTF8Buffer(); |
| 41 ~UTF8Buffer(); | 41 ~UTF8Buffer(); |
| 42 | 42 |
| 43 void AddChar(uc32 c) { | 43 inline void AddChar(uc32 c) { |
| 44 ASSERT_NOT_NULL(data_); | 44 if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) { |
| 45 if (cursor_ <= limit_ && | 45 buffer_.Add(static_cast<char>(c)); |
| 46 static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) { | |
| 47 *cursor_++ = static_cast<char>(c); | |
| 48 } else { | 46 } else { |
| 49 AddCharSlow(c); | 47 AddCharSlow(c); |
| 50 } | 48 } |
| 51 } | 49 } |
| 52 | 50 |
| 53 void Reset() { | 51 void StartLiteral() { |
| 54 if (data_ == NULL) { | 52 buffer_.StartSequence(); |
| 55 data_ = NewArray<char>(kInitialCapacity); | |
| 56 limit_ = ComputeLimit(data_, kInitialCapacity); | |
| 57 } | |
| 58 cursor_ = data_; | |
| 59 } | 53 } |
| 60 | 54 |
| 61 int pos() const { | 55 Vector<const char> EndLiteral() { |
| 62 ASSERT_NOT_NULL(data_); | 56 buffer_.Add(kEndMarker); |
| 63 return static_cast<int>(cursor_ - data_); | 57 Vector<char> sequence = buffer_.EndSequence(); |
| 58 return Vector<const char>(sequence.start(), sequence.length()); |
| 64 } | 59 } |
| 65 | 60 |
| 66 char* data() const { return data_; } | 61 // The end marker added after a parsed literal. |
| 67 | 62 // Using zero allows the usage of strlen and similar functions on |
| 63 // identifiers and numbers (but not strings, since they may contain zero |
| 64 // bytes). |
| 65 // TODO(lrn): Use '\xff' as end marker, since it cannot occur inside |
| 66 // an utf-8 string. This requires changes in all places that uses |
| 67 // str-functions on the literals, but allows a single pointer to represent |
| 68 // the literal, even if it contains embedded zeros. |
| 69 static const char kEndMarker = '\x00'; |
| 68 private: | 70 private: |
| 69 static const int kInitialCapacity = 256; | 71 static const int kInitialCapacity = 256; |
| 70 char* data_; | 72 SequenceCollector<char> buffer_; |
| 71 char* cursor_; | |
| 72 char* limit_; | |
| 73 | |
| 74 int Capacity() const { | |
| 75 ASSERT_NOT_NULL(data_); | |
| 76 return static_cast<int>(limit_ - data_) + unibrow::Utf8::kMaxEncodedSize; | |
| 77 } | |
| 78 | |
| 79 static char* ComputeLimit(char* data, int capacity) { | |
| 80 return (data + capacity) - unibrow::Utf8::kMaxEncodedSize; | |
| 81 } | |
| 82 | 73 |
| 83 void AddCharSlow(uc32 c); | 74 void AddCharSlow(uc32 c); |
| 84 }; | 75 }; |
| 85 | 76 |
| 86 | 77 |
| 87 // Interface through which the scanner reads characters from the input source. | 78 // Interface through which the scanner reads characters from the input source. |
| 88 class UTF16Buffer { | 79 class UTF16Buffer { |
| 89 public: | 80 public: |
| 90 UTF16Buffer(); | 81 UTF16Buffer(); |
| 91 virtual ~UTF16Buffer() {} | 82 virtual ~UTF16Buffer() {} |
| (...skipping 215 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 307 Location location() const { return current_.location; } | 298 Location location() const { return current_.location; } |
| 308 Location peek_location() const { return next_.location; } | 299 Location peek_location() const { return next_.location; } |
| 309 | 300 |
| 310 // Returns the literal string, if any, for the current token (the | 301 // Returns the literal string, if any, for the current token (the |
| 311 // token returned by Next()). The string is 0-terminated and in | 302 // token returned by Next()). The string is 0-terminated and in |
| 312 // UTF-8 format; they may contain 0-characters. Literal strings are | 303 // UTF-8 format; they may contain 0-characters. Literal strings are |
| 313 // collected for identifiers, strings, and numbers. | 304 // collected for identifiers, strings, and numbers. |
| 314 // These functions only give the correct result if the literal | 305 // These functions only give the correct result if the literal |
| 315 // was scanned between calls to StartLiteral() and TerminateLiteral(). | 306 // was scanned between calls to StartLiteral() and TerminateLiteral(). |
| 316 const char* literal_string() const { | 307 const char* literal_string() const { |
| 317 return current_.literal_buffer->data(); | 308 return current_.literal_chars.start(); |
| 318 } | 309 } |
| 310 |
| 319 int literal_length() const { | 311 int literal_length() const { |
| 320 // Excluding terminal '\0' added by TerminateLiteral(). | 312 // Excluding terminal '\x00' added by TerminateLiteral(). |
| 321 return current_.literal_buffer->pos() - 1; | 313 return current_.literal_chars.length() - 1; |
| 314 } |
| 315 |
| 316 Vector<const char> literal() const { |
| 317 return Vector<const char>(literal_string(), literal_length()); |
| 322 } | 318 } |
| 323 | 319 |
| 324 // Returns the literal string for the next token (the token that | 320 // Returns the literal string for the next token (the token that |
| 325 // would be returned if Next() were called). | 321 // would be returned if Next() were called). |
| 326 const char* next_literal_string() const { | 322 const char* next_literal_string() const { |
| 327 return next_.literal_buffer->data(); | 323 return next_.literal_chars.start(); |
| 328 } | 324 } |
| 325 |
| 326 |
| 329 // Returns the length of the next token (that would be returned if | 327 // Returns the length of the next token (that would be returned if |
| 330 // Next() were called). | 328 // Next() were called). |
| 331 int next_literal_length() const { | 329 int next_literal_length() const { |
| 332 return next_.literal_buffer->pos() - 1; | 330 // Excluding terminal '\x00' added by TerminateLiteral(). |
| 331 return next_.literal_chars.length() - 1; |
| 333 } | 332 } |
| 334 | 333 |
| 335 Vector<const char> next_literal() const { | 334 Vector<const char> next_literal() const { |
| 336 return Vector<const char>(next_literal_string(), | 335 return Vector<const char>(next_literal_string(), next_literal_length()); |
| 337 next_literal_length()); | |
| 338 } | 336 } |
| 339 | 337 |
| 340 // Scans the input as a regular expression pattern, previous | 338 // Scans the input as a regular expression pattern, previous |
| 341 // character(s) must be /(=). Returns true if a pattern is scanned. | 339 // character(s) must be /(=). Returns true if a pattern is scanned. |
| 342 bool ScanRegExpPattern(bool seen_equal); | 340 bool ScanRegExpPattern(bool seen_equal); |
| 343 // Returns true if regexp flags are scanned (always since flags can | 341 // Returns true if regexp flags are scanned (always since flags can |
| 344 // be empty). | 342 // be empty). |
| 345 bool ScanRegExpFlags(); | 343 bool ScanRegExpFlags(); |
| 346 | 344 |
| 347 // Seek forward to the given position. This operation does not | 345 // Seek forward to the given position. This operation does not |
| (...skipping 16 matching lines...) Expand all Loading... |
| 364 static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace; | 362 static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace; |
| 365 | 363 |
| 366 static const int kCharacterLookaheadBufferSize = 1; | 364 static const int kCharacterLookaheadBufferSize = 1; |
| 367 static const int kNoEndPosition = 1; | 365 static const int kNoEndPosition = 1; |
| 368 | 366 |
| 369 private: | 367 private: |
| 370 // The current and look-ahead token. | 368 // The current and look-ahead token. |
| 371 struct TokenDesc { | 369 struct TokenDesc { |
| 372 Token::Value token; | 370 Token::Value token; |
| 373 Location location; | 371 Location location; |
| 374 UTF8Buffer* literal_buffer; | 372 Vector<const char> literal_chars; |
| 375 }; | 373 }; |
| 376 | 374 |
| 377 void Init(Handle<String> source, | 375 void Init(Handle<String> source, |
| 378 unibrow::CharacterStream* stream, | 376 unibrow::CharacterStream* stream, |
| 379 int start_position, int end_position, | 377 int start_position, int end_position, |
| 380 ParserLanguage language); | 378 ParserLanguage language); |
| 381 | 379 |
| 382 // Literal buffer support | 380 // Literal buffer support |
| 383 void StartLiteral(); | 381 inline void StartLiteral(); |
| 384 void AddChar(uc32 ch); | 382 inline void AddChar(uc32 ch); |
| 385 void AddCharAdvance(); | 383 inline void AddCharAdvance(); |
| 386 void TerminateLiteral(); | 384 inline void TerminateLiteral(); |
| 387 | 385 |
| 388 // Low-level scanning support. | 386 // Low-level scanning support. |
| 389 void Advance() { c0_ = source_->Advance(); } | 387 void Advance() { c0_ = source_->Advance(); } |
| 390 void PushBack(uc32 ch) { | 388 void PushBack(uc32 ch) { |
| 391 source_->PushBack(ch); | 389 source_->PushBack(ch); |
| 392 c0_ = ch; | 390 c0_ = ch; |
| 393 } | 391 } |
| 394 | 392 |
| 395 bool SkipWhiteSpace() { | 393 bool SkipWhiteSpace() { |
| 396 if (is_parsing_json_) { | 394 if (is_parsing_json_) { |
| (...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 480 ExternalStringUTF16Buffer<ExternalAsciiString, char> ascii_string_buffer_; | 478 ExternalStringUTF16Buffer<ExternalAsciiString, char> ascii_string_buffer_; |
| 481 | 479 |
| 482 // Source. Will point to one of the buffers declared above. | 480 // Source. Will point to one of the buffers declared above. |
| 483 UTF16Buffer* source_; | 481 UTF16Buffer* source_; |
| 484 | 482 |
| 485 // Used to convert the source string into a character stream when a stream | 483 // Used to convert the source string into a character stream when a stream |
| 486 // is not passed to the scanner. | 484 // is not passed to the scanner. |
| 487 SafeStringInputBuffer safe_string_input_buffer_; | 485 SafeStringInputBuffer safe_string_input_buffer_; |
| 488 | 486 |
| 489 // Buffer to hold literal values (identifiers, strings, numbers) | 487 // Buffer to hold literal values (identifiers, strings, numbers) |
| 490 // using 0-terminated UTF-8 encoding. | 488 // using '\x00'-terminated UTF-8 encoding. Handles allocation internally. |
| 491 UTF8Buffer literal_buffer_1_; | 489 UTF8Buffer literal_buffer_; |
| 492 UTF8Buffer literal_buffer_2_; | |
| 493 | 490 |
| 494 bool stack_overflow_; | 491 bool stack_overflow_; |
| 495 static StaticResource<Utf8Decoder> utf8_decoder_; | 492 static StaticResource<Utf8Decoder> utf8_decoder_; |
| 496 | 493 |
| 497 // One Unicode character look-ahead; c0_ < 0 at the end of the input. | 494 // One Unicode character look-ahead; c0_ < 0 at the end of the input. |
| 498 uc32 c0_; | 495 uc32 c0_; |
| 499 }; | 496 }; |
| 500 | 497 |
| 501 } } // namespace v8::internal | 498 } } // namespace v8::internal |
| 502 | 499 |
| 503 #endif // V8_SCANNER_H_ | 500 #endif // V8_SCANNER_H_ |
| OLD | NEW |