OLD | NEW |
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 22 matching lines...) Expand all Loading... |
33 | 33 |
34 namespace v8 { | 34 namespace v8 { |
35 namespace internal { | 35 namespace internal { |
36 | 36 |
37 | 37 |
38 class UTF8Buffer { | 38 class UTF8Buffer { |
39 public: | 39 public: |
40 UTF8Buffer(); | 40 UTF8Buffer(); |
41 ~UTF8Buffer(); | 41 ~UTF8Buffer(); |
42 | 42 |
43 void AddChar(uc32 c) { | 43 inline void AddChar(uc32 c) { |
44 ASSERT_NOT_NULL(data_); | 44 if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) { |
45 if (cursor_ <= limit_ && | 45 buffer_.Add(static_cast<char>(c)); |
46 static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) { | |
47 *cursor_++ = static_cast<char>(c); | |
48 } else { | 46 } else { |
49 AddCharSlow(c); | 47 AddCharSlow(c); |
50 } | 48 } |
51 } | 49 } |
52 | 50 |
53 void Reset() { | 51 void StartLiteral() { |
54 if (data_ == NULL) { | 52 buffer_.StartSequence(); |
55 data_ = NewArray<char>(kInitialCapacity); | |
56 limit_ = ComputeLimit(data_, kInitialCapacity); | |
57 } | |
58 cursor_ = data_; | |
59 } | 53 } |
60 | 54 |
61 int pos() const { | 55 Vector<const char> EndLiteral() { |
62 ASSERT_NOT_NULL(data_); | 56 buffer_.Add(kEndMarker); |
63 return static_cast<int>(cursor_ - data_); | 57 Vector<char> sequence = buffer_.EndSequence(); |
| 58 return Vector<const char>(sequence.start(), sequence.length()); |
64 } | 59 } |
65 | 60 |
66 char* data() const { return data_; } | 61 // The end marker added after a parsed literal. |
67 | 62 // Using zero allows the usage of strlen and similar functions on |
| 63 // identifiers and numbers (but not strings, since they may contain zero |
| 64 // bytes). |
| 65 // TODO(lrn): Use '\xff' as end marker, since it cannot occur inside |
| 66 // an utf-8 string. This requires changes in all places that uses |
| 67 // str-functions on the literals, but allows a single pointer to represent |
| 68 // the literal, even if it contains embedded zeros. |
| 69 static const char kEndMarker = '\x00'; |
68 private: | 70 private: |
69 static const int kInitialCapacity = 256; | 71 static const int kInitialCapacity = 256; |
70 char* data_; | 72 SequenceCollector<char> buffer_; |
71 char* cursor_; | |
72 char* limit_; | |
73 | |
74 int Capacity() const { | |
75 ASSERT_NOT_NULL(data_); | |
76 return static_cast<int>(limit_ - data_) + unibrow::Utf8::kMaxEncodedSize; | |
77 } | |
78 | |
79 static char* ComputeLimit(char* data, int capacity) { | |
80 return (data + capacity) - unibrow::Utf8::kMaxEncodedSize; | |
81 } | |
82 | 73 |
83 void AddCharSlow(uc32 c); | 74 void AddCharSlow(uc32 c); |
84 }; | 75 }; |
85 | 76 |
86 | 77 |
87 // Interface through which the scanner reads characters from the input source. | 78 // Interface through which the scanner reads characters from the input source. |
88 class UTF16Buffer { | 79 class UTF16Buffer { |
89 public: | 80 public: |
90 UTF16Buffer(); | 81 UTF16Buffer(); |
91 virtual ~UTF16Buffer() {} | 82 virtual ~UTF16Buffer() {} |
(...skipping 215 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
307 Location location() const { return current_.location; } | 298 Location location() const { return current_.location; } |
308 Location peek_location() const { return next_.location; } | 299 Location peek_location() const { return next_.location; } |
309 | 300 |
310 // Returns the literal string, if any, for the current token (the | 301 // Returns the literal string, if any, for the current token (the |
311 // token returned by Next()). The string is 0-terminated and in | 302 // token returned by Next()). The string is 0-terminated and in |
312 // UTF-8 format; they may contain 0-characters. Literal strings are | 303 // UTF-8 format; they may contain 0-characters. Literal strings are |
313 // collected for identifiers, strings, and numbers. | 304 // collected for identifiers, strings, and numbers. |
314 // These functions only give the correct result if the literal | 305 // These functions only give the correct result if the literal |
315 // was scanned between calls to StartLiteral() and TerminateLiteral(). | 306 // was scanned between calls to StartLiteral() and TerminateLiteral(). |
316 const char* literal_string() const { | 307 const char* literal_string() const { |
317 return current_.literal_buffer->data(); | 308 return current_.literal_chars.start(); |
318 } | 309 } |
| 310 |
319 int literal_length() const { | 311 int literal_length() const { |
320 // Excluding terminal '\0' added by TerminateLiteral(). | 312 // Excluding terminal '\x00' added by TerminateLiteral(). |
321 return current_.literal_buffer->pos() - 1; | 313 return current_.literal_chars.length() - 1; |
| 314 } |
| 315 |
| 316 Vector<const char> literal() const { |
| 317 return Vector<const char>(literal_string(), literal_length()); |
322 } | 318 } |
323 | 319 |
324 // Returns the literal string for the next token (the token that | 320 // Returns the literal string for the next token (the token that |
325 // would be returned if Next() were called). | 321 // would be returned if Next() were called). |
326 const char* next_literal_string() const { | 322 const char* next_literal_string() const { |
327 return next_.literal_buffer->data(); | 323 return next_.literal_chars.start(); |
328 } | 324 } |
| 325 |
| 326 |
329 // Returns the length of the next token (that would be returned if | 327 // Returns the length of the next token (that would be returned if |
330 // Next() were called). | 328 // Next() were called). |
331 int next_literal_length() const { | 329 int next_literal_length() const { |
332 return next_.literal_buffer->pos() - 1; | 330 // Excluding terminal '\x00' added by TerminateLiteral(). |
| 331 return next_.literal_chars.length() - 1; |
333 } | 332 } |
334 | 333 |
335 Vector<const char> next_literal() const { | 334 Vector<const char> next_literal() const { |
336 return Vector<const char>(next_literal_string(), | 335 return Vector<const char>(next_literal_string(), next_literal_length()); |
337 next_literal_length()); | |
338 } | 336 } |
339 | 337 |
340 // Scans the input as a regular expression pattern, previous | 338 // Scans the input as a regular expression pattern, previous |
341 // character(s) must be /(=). Returns true if a pattern is scanned. | 339 // character(s) must be /(=). Returns true if a pattern is scanned. |
342 bool ScanRegExpPattern(bool seen_equal); | 340 bool ScanRegExpPattern(bool seen_equal); |
343 // Returns true if regexp flags are scanned (always since flags can | 341 // Returns true if regexp flags are scanned (always since flags can |
344 // be empty). | 342 // be empty). |
345 bool ScanRegExpFlags(); | 343 bool ScanRegExpFlags(); |
346 | 344 |
347 // Seek forward to the given position. This operation does not | 345 // Seek forward to the given position. This operation does not |
(...skipping 16 matching lines...) Expand all Loading... |
364 static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace; | 362 static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace; |
365 | 363 |
366 static const int kCharacterLookaheadBufferSize = 1; | 364 static const int kCharacterLookaheadBufferSize = 1; |
367 static const int kNoEndPosition = 1; | 365 static const int kNoEndPosition = 1; |
368 | 366 |
369 private: | 367 private: |
370 // The current and look-ahead token. | 368 // The current and look-ahead token. |
371 struct TokenDesc { | 369 struct TokenDesc { |
372 Token::Value token; | 370 Token::Value token; |
373 Location location; | 371 Location location; |
374 UTF8Buffer* literal_buffer; | 372 Vector<const char> literal_chars; |
375 }; | 373 }; |
376 | 374 |
377 void Init(Handle<String> source, | 375 void Init(Handle<String> source, |
378 unibrow::CharacterStream* stream, | 376 unibrow::CharacterStream* stream, |
379 int start_position, int end_position, | 377 int start_position, int end_position, |
380 ParserLanguage language); | 378 ParserLanguage language); |
381 | 379 |
382 // Literal buffer support | 380 // Literal buffer support |
383 void StartLiteral(); | 381 inline void StartLiteral(); |
384 void AddChar(uc32 ch); | 382 inline void AddChar(uc32 ch); |
385 void AddCharAdvance(); | 383 inline void AddCharAdvance(); |
386 void TerminateLiteral(); | 384 inline void TerminateLiteral(); |
387 | 385 |
388 // Low-level scanning support. | 386 // Low-level scanning support. |
389 void Advance() { c0_ = source_->Advance(); } | 387 void Advance() { c0_ = source_->Advance(); } |
390 void PushBack(uc32 ch) { | 388 void PushBack(uc32 ch) { |
391 source_->PushBack(ch); | 389 source_->PushBack(ch); |
392 c0_ = ch; | 390 c0_ = ch; |
393 } | 391 } |
394 | 392 |
395 bool SkipWhiteSpace() { | 393 bool SkipWhiteSpace() { |
396 if (is_parsing_json_) { | 394 if (is_parsing_json_) { |
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
480 ExternalStringUTF16Buffer<ExternalAsciiString, char> ascii_string_buffer_; | 478 ExternalStringUTF16Buffer<ExternalAsciiString, char> ascii_string_buffer_; |
481 | 479 |
482 // Source. Will point to one of the buffers declared above. | 480 // Source. Will point to one of the buffers declared above. |
483 UTF16Buffer* source_; | 481 UTF16Buffer* source_; |
484 | 482 |
485 // Used to convert the source string into a character stream when a stream | 483 // Used to convert the source string into a character stream when a stream |
486 // is not passed to the scanner. | 484 // is not passed to the scanner. |
487 SafeStringInputBuffer safe_string_input_buffer_; | 485 SafeStringInputBuffer safe_string_input_buffer_; |
488 | 486 |
489 // Buffer to hold literal values (identifiers, strings, numbers) | 487 // Buffer to hold literal values (identifiers, strings, numbers) |
490 // using 0-terminated UTF-8 encoding. | 488 // using '\x00'-terminated UTF-8 encoding. Handles allocation internally. |
491 UTF8Buffer literal_buffer_1_; | 489 UTF8Buffer literal_buffer_; |
492 UTF8Buffer literal_buffer_2_; | |
493 | 490 |
494 bool stack_overflow_; | 491 bool stack_overflow_; |
495 static StaticResource<Utf8Decoder> utf8_decoder_; | 492 static StaticResource<Utf8Decoder> utf8_decoder_; |
496 | 493 |
497 // One Unicode character look-ahead; c0_ < 0 at the end of the input. | 494 // One Unicode character look-ahead; c0_ < 0 at the end of the input. |
498 uc32 c0_; | 495 uc32 c0_; |
499 }; | 496 }; |
500 | 497 |
501 } } // namespace v8::internal | 498 } } // namespace v8::internal |
502 | 499 |
503 #endif // V8_SCANNER_H_ | 500 #endif // V8_SCANNER_H_ |
OLD | NEW |