Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Side by Side Diff: src/scanner.h

Issue 3181036: Created collector class and used it to collect identifiers during scanning. (Closed)
Patch Set: Created 10 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/runtime.cc ('k') | src/scanner.cc » ('j') | src/utils.h » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 22 matching lines...) Expand all
33 33
34 namespace v8 { 34 namespace v8 {
35 namespace internal { 35 namespace internal {
36 36
37 37
38 class UTF8Buffer { 38 class UTF8Buffer {
39 public: 39 public:
40 UTF8Buffer(); 40 UTF8Buffer();
41 ~UTF8Buffer(); 41 ~UTF8Buffer();
42 42
43 void AddChar(uc32 c) { 43 inline void AddChar(uc32 c) {
44 ASSERT_NOT_NULL(data_); 44 if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) {
45 if (cursor_ <= limit_ && 45 buffer_.Add(static_cast<char>(c));
46 static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) {
47 *cursor_++ = static_cast<char>(c);
48 } else { 46 } else {
49 AddCharSlow(c); 47 AddCharSlow(c);
50 } 48 }
51 } 49 }
52 50
53 void Reset() { 51 void StartLiteral() {
54 if (data_ == NULL) { 52 buffer_.StartSequence();
55 data_ = NewArray<char>(kInitialCapacity);
56 limit_ = ComputeLimit(data_, kInitialCapacity);
57 }
58 cursor_ = data_;
59 } 53 }
60 54
61 int pos() const { 55 Vector<const char> EndLiteral() {
62 ASSERT_NOT_NULL(data_); 56 buffer_.Add(kEndMarker);
63 return static_cast<int>(cursor_ - data_); 57 Vector<char> sequence = buffer_.EndSequence();
58 return Vector<const char>(sequence.start(), sequence.length());
64 } 59 }
65 60
66 char* data() const { return data_; } 61 // The end marker added after a parsed literal.
67 62 // Using zero allows the usage of strlen and similar functions on
63 // identifiers and numbers (but not strings, since they may contain zero
64 // bytes).
65 // TODO(lrn): Use '\xff' as end marker, since it cannot occur inside
66 // an utf-8 string. This requires changes in all places that uses
67 // str-functions on the literals, but allows a single pointer to represent
68 // the literal, even if it contains embedded zeros.
69 static const char kEndMarker = '\x00';
68 private: 70 private:
69 static const int kInitialCapacity = 256; 71 static const int kInitialCapacity = 256;
70 char* data_; 72 SequenceCollector<char> buffer_;
71 char* cursor_;
72 char* limit_;
73
74 int Capacity() const {
75 ASSERT_NOT_NULL(data_);
76 return static_cast<int>(limit_ - data_) + unibrow::Utf8::kMaxEncodedSize;
77 }
78
79 static char* ComputeLimit(char* data, int capacity) {
80 return (data + capacity) - unibrow::Utf8::kMaxEncodedSize;
81 }
82 73
83 void AddCharSlow(uc32 c); 74 void AddCharSlow(uc32 c);
84 }; 75 };
85 76
86 77
87 // Interface through which the scanner reads characters from the input source. 78 // Interface through which the scanner reads characters from the input source.
88 class UTF16Buffer { 79 class UTF16Buffer {
89 public: 80 public:
90 UTF16Buffer(); 81 UTF16Buffer();
91 virtual ~UTF16Buffer() {} 82 virtual ~UTF16Buffer() {}
(...skipping 215 matching lines...) Expand 10 before | Expand all | Expand 10 after
307 Location location() const { return current_.location; } 298 Location location() const { return current_.location; }
308 Location peek_location() const { return next_.location; } 299 Location peek_location() const { return next_.location; }
309 300
310 // Returns the literal string, if any, for the current token (the 301 // Returns the literal string, if any, for the current token (the
311 // token returned by Next()). The string is 0-terminated and in 302 // token returned by Next()). The string is 0-terminated and in
312 // UTF-8 format; they may contain 0-characters. Literal strings are 303 // UTF-8 format; they may contain 0-characters. Literal strings are
313 // collected for identifiers, strings, and numbers. 304 // collected for identifiers, strings, and numbers.
314 // These functions only give the correct result if the literal 305 // These functions only give the correct result if the literal
315 // was scanned between calls to StartLiteral() and TerminateLiteral(). 306 // was scanned between calls to StartLiteral() and TerminateLiteral().
316 const char* literal_string() const { 307 const char* literal_string() const {
317 return current_.literal_buffer->data(); 308 return current_.literal_chars.start();
318 } 309 }
310
319 int literal_length() const { 311 int literal_length() const {
320 // Excluding terminal '\0' added by TerminateLiteral(). 312 // Excluding terminal '\x00' added by TerminateLiteral().
321 return current_.literal_buffer->pos() - 1; 313 return current_.literal_chars.length() - 1;
314 }
315
316 Vector<const char> literal() const {
317 return Vector<const char>(literal_string(), literal_length());
322 } 318 }
323 319
324 // Returns the literal string for the next token (the token that 320 // Returns the literal string for the next token (the token that
325 // would be returned if Next() were called). 321 // would be returned if Next() were called).
326 const char* next_literal_string() const { 322 const char* next_literal_string() const {
327 return next_.literal_buffer->data(); 323 return next_.literal_chars.start();
328 } 324 }
325
326
329 // Returns the length of the next token (that would be returned if 327 // Returns the length of the next token (that would be returned if
330 // Next() were called). 328 // Next() were called).
331 int next_literal_length() const { 329 int next_literal_length() const {
332 return next_.literal_buffer->pos() - 1; 330 // Excluding terminal '\x00' added by TerminateLiteral().
331 return next_.literal_chars.length() - 1;
333 } 332 }
334 333
335 Vector<const char> next_literal() const { 334 Vector<const char> next_literal() const {
336 return Vector<const char>(next_literal_string(), 335 return Vector<const char>(next_literal_string(), next_literal_length());
337 next_literal_length());
338 } 336 }
339 337
340 // Scans the input as a regular expression pattern, previous 338 // Scans the input as a regular expression pattern, previous
341 // character(s) must be /(=). Returns true if a pattern is scanned. 339 // character(s) must be /(=). Returns true if a pattern is scanned.
342 bool ScanRegExpPattern(bool seen_equal); 340 bool ScanRegExpPattern(bool seen_equal);
343 // Returns true if regexp flags are scanned (always since flags can 341 // Returns true if regexp flags are scanned (always since flags can
344 // be empty). 342 // be empty).
345 bool ScanRegExpFlags(); 343 bool ScanRegExpFlags();
346 344
347 // Seek forward to the given position. This operation does not 345 // Seek forward to the given position. This operation does not
(...skipping 16 matching lines...) Expand all
364 static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace; 362 static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace;
365 363
366 static const int kCharacterLookaheadBufferSize = 1; 364 static const int kCharacterLookaheadBufferSize = 1;
367 static const int kNoEndPosition = 1; 365 static const int kNoEndPosition = 1;
368 366
369 private: 367 private:
370 // The current and look-ahead token. 368 // The current and look-ahead token.
371 struct TokenDesc { 369 struct TokenDesc {
372 Token::Value token; 370 Token::Value token;
373 Location location; 371 Location location;
374 UTF8Buffer* literal_buffer; 372 Vector<const char> literal_chars;
375 }; 373 };
376 374
377 void Init(Handle<String> source, 375 void Init(Handle<String> source,
378 unibrow::CharacterStream* stream, 376 unibrow::CharacterStream* stream,
379 int start_position, int end_position, 377 int start_position, int end_position,
380 ParserLanguage language); 378 ParserLanguage language);
381 379
382 // Literal buffer support 380 // Literal buffer support
383 void StartLiteral(); 381 inline void StartLiteral();
384 void AddChar(uc32 ch); 382 inline void AddChar(uc32 ch);
385 void AddCharAdvance(); 383 inline void AddCharAdvance();
386 void TerminateLiteral(); 384 inline void TerminateLiteral();
387 385
388 // Low-level scanning support. 386 // Low-level scanning support.
389 void Advance() { c0_ = source_->Advance(); } 387 void Advance() { c0_ = source_->Advance(); }
390 void PushBack(uc32 ch) { 388 void PushBack(uc32 ch) {
391 source_->PushBack(ch); 389 source_->PushBack(ch);
392 c0_ = ch; 390 c0_ = ch;
393 } 391 }
394 392
395 bool SkipWhiteSpace() { 393 bool SkipWhiteSpace() {
396 if (is_parsing_json_) { 394 if (is_parsing_json_) {
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after
480 ExternalStringUTF16Buffer<ExternalAsciiString, char> ascii_string_buffer_; 478 ExternalStringUTF16Buffer<ExternalAsciiString, char> ascii_string_buffer_;
481 479
482 // Source. Will point to one of the buffers declared above. 480 // Source. Will point to one of the buffers declared above.
483 UTF16Buffer* source_; 481 UTF16Buffer* source_;
484 482
485 // Used to convert the source string into a character stream when a stream 483 // Used to convert the source string into a character stream when a stream
486 // is not passed to the scanner. 484 // is not passed to the scanner.
487 SafeStringInputBuffer safe_string_input_buffer_; 485 SafeStringInputBuffer safe_string_input_buffer_;
488 486
489 // Buffer to hold literal values (identifiers, strings, numbers) 487 // Buffer to hold literal values (identifiers, strings, numbers)
490 // using 0-terminated UTF-8 encoding. 488 // using '\x00'-terminated UTF-8 encoding. Handles allocation internally.
491 UTF8Buffer literal_buffer_1_; 489 UTF8Buffer literal_buffer_;
492 UTF8Buffer literal_buffer_2_;
493 490
494 bool stack_overflow_; 491 bool stack_overflow_;
495 static StaticResource<Utf8Decoder> utf8_decoder_; 492 static StaticResource<Utf8Decoder> utf8_decoder_;
496 493
497 // One Unicode character look-ahead; c0_ < 0 at the end of the input. 494 // One Unicode character look-ahead; c0_ < 0 at the end of the input.
498 uc32 c0_; 495 uc32 c0_;
499 }; 496 };
500 497
501 } } // namespace v8::internal 498 } } // namespace v8::internal
502 499
503 #endif // V8_SCANNER_H_ 500 #endif // V8_SCANNER_H_
OLDNEW
« no previous file with comments | « src/runtime.cc ('k') | src/scanner.cc » ('j') | src/utils.h » ('J')

Powered by Google App Engine
This is Rietveld 408576698