Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(22)

Side by Side Diff: src/scanner-base.h

Issue 6529032: Merge 6168:6800 from bleeding_edge to experimental/gc branch. (Closed) Base URL: http://v8.googlecode.com/svn/branches/experimental/gc/
Patch Set: Created 9 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/scanner.cc ('k') | src/scanner-base.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2010 the V8 project authors. All rights reserved. 1 // Copyright 2010 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after
57 // Buffered stream of characters, using an internal UC16 buffer. 57 // Buffered stream of characters, using an internal UC16 buffer.
58 58
59 class UC16CharacterStream { 59 class UC16CharacterStream {
60 public: 60 public:
61 UC16CharacterStream() : pos_(0) { } 61 UC16CharacterStream() : pos_(0) { }
62 virtual ~UC16CharacterStream() { } 62 virtual ~UC16CharacterStream() { }
63 63
64 // Returns and advances past the next UC16 character in the input 64 // Returns and advances past the next UC16 character in the input
65 // stream. If there are no more characters, it returns a negative 65 // stream. If there are no more characters, it returns a negative
66 // value. 66 // value.
67 inline int32_t Advance() { 67 inline uc32 Advance() {
68 if (buffer_cursor_ < buffer_end_ || ReadBlock()) { 68 if (buffer_cursor_ < buffer_end_ || ReadBlock()) {
69 pos_++; 69 pos_++;
70 return *(buffer_cursor_++); 70 return static_cast<uc32>(*(buffer_cursor_++));
71 } 71 }
72 // Note: currently the following increment is necessary to avoid a 72 // Note: currently the following increment is necessary to avoid a
73 // parser problem! The scanner treats the final kEndOfInput as 73 // parser problem! The scanner treats the final kEndOfInput as
74 // a character with a position, and does math relative to that 74 // a character with a position, and does math relative to that
75 // position. 75 // position.
76 pos_++; 76 pos_++;
77 77
78 return kEndOfInput; 78 return kEndOfInput;
79 } 79 }
80 80
81 // Return the current position in the character stream. 81 // Return the current position in the character stream.
82 // Starts at zero. 82 // Starts at zero.
83 inline unsigned pos() const { return pos_; } 83 inline unsigned pos() const { return pos_; }
84 84
85 // Skips forward past the next character_count UC16 characters 85 // Skips forward past the next character_count UC16 characters
86 // in the input, or until the end of input if that comes sooner. 86 // in the input, or until the end of input if that comes sooner.
87 // Returns the number of characters actually skipped. If less 87 // Returns the number of characters actually skipped. If less
88 // than character_count, 88 // than character_count,
89 inline unsigned SeekForward(unsigned character_count) { 89 inline unsigned SeekForward(unsigned character_count) {
90 unsigned buffered_chars = 90 unsigned buffered_chars =
91 static_cast<unsigned>(buffer_end_ - buffer_cursor_); 91 static_cast<unsigned>(buffer_end_ - buffer_cursor_);
92 if (character_count <= buffered_chars) { 92 if (character_count <= buffered_chars) {
93 buffer_cursor_ += character_count; 93 buffer_cursor_ += character_count;
94 pos_ += character_count; 94 pos_ += character_count;
95 return character_count; 95 return character_count;
96 } 96 }
97 return SlowSeekForward(character_count); 97 return SlowSeekForward(character_count);
98 } 98 }
99 99
100 // Pushes back the most recently read UC16 character, i.e., 100 // Pushes back the most recently read UC16 character (or negative
101 // the value returned by the most recent call to Advance. 101 // value if at end of input), i.e., the value returned by the most recent
102 // call to Advance.
102 // Must not be used right after calling SeekForward. 103 // Must not be used right after calling SeekForward.
103 virtual void PushBack(uc16 character) = 0; 104 virtual void PushBack(int32_t character) = 0;
104 105
105 protected: 106 protected:
106 static const int32_t kEndOfInput = -1; 107 static const uc32 kEndOfInput = -1;
107 108
108 // Ensures that the buffer_cursor_ points to the character at 109 // Ensures that the buffer_cursor_ points to the character at
109 // position pos_ of the input, if possible. If the position 110 // position pos_ of the input, if possible. If the position
110 // is at or after the end of the input, return false. If there 111 // is at or after the end of the input, return false. If there
111 // are more characters available, return true. 112 // are more characters available, return true.
112 virtual bool ReadBlock() = 0; 113 virtual bool ReadBlock() = 0;
113 virtual unsigned SlowSeekForward(unsigned character_count) = 0; 114 virtual unsigned SlowSeekForward(unsigned character_count) = 0;
114 115
115 const uc16* buffer_cursor_; 116 const uc16* buffer_cursor_;
116 const uc16* buffer_end_; 117 const uc16* buffer_end_;
(...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after
239 Vector<byte> backing_store_; 240 Vector<byte> backing_store_;
240 }; 241 };
241 242
242 243
243 // ---------------------------------------------------------------------------- 244 // ----------------------------------------------------------------------------
244 // Scanner base-class. 245 // Scanner base-class.
245 246
246 // Generic functionality used by both JSON and JavaScript scanners. 247 // Generic functionality used by both JSON and JavaScript scanners.
247 class Scanner { 248 class Scanner {
248 public: 249 public:
250 // -1 is outside of the range of any real source code.
251 static const int kNoOctalLocation = -1;
252
249 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; 253 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;
250 254
251 class LiteralScope { 255 class LiteralScope {
252 public: 256 public:
253 explicit LiteralScope(Scanner* self); 257 explicit LiteralScope(Scanner* self);
254 ~LiteralScope(); 258 ~LiteralScope();
255 void Complete(); 259 void Complete();
256 260
257 private: 261 private:
258 Scanner* scanner_; 262 Scanner* scanner_;
259 bool complete_; 263 bool complete_;
260 }; 264 };
261 265
262 Scanner(); 266 Scanner();
263 267
264 // Returns the current token again. 268 // Returns the current token again.
265 Token::Value current_token() { return current_.token; } 269 Token::Value current_token() { return current_.token; }
266 270
267 // One token look-ahead (past the token returned by Next()). 271 // One token look-ahead (past the token returned by Next()).
268 Token::Value peek() const { return next_.token; } 272 Token::Value peek() const { return next_.token; }
269 273
270 struct Location { 274 struct Location {
271 Location(int b, int e) : beg_pos(b), end_pos(e) { } 275 Location(int b, int e) : beg_pos(b), end_pos(e) { }
272 Location() : beg_pos(0), end_pos(0) { } 276 Location() : beg_pos(0), end_pos(0) { }
277
278 bool IsValid() const {
279 return beg_pos >= 0 && end_pos >= beg_pos;
280 }
281
273 int beg_pos; 282 int beg_pos;
274 int end_pos; 283 int end_pos;
275 }; 284 };
276 285
286 static Location NoLocation() {
287 return Location(-1, -1);
288 }
289
277 // Returns the location information for the current token 290 // Returns the location information for the current token
278 // (the token returned by Next()). 291 // (the token returned by Next()).
279 Location location() const { return current_.location; } 292 Location location() const { return current_.location; }
280 Location peek_location() const { return next_.location; } 293 Location peek_location() const { return next_.location; }
281 294
295 // Returns the location of the last seen octal literal
296 int octal_position() const { return octal_pos_; }
297 void clear_octal_position() { octal_pos_ = -1; }
298
282 // Returns the literal string, if any, for the current token (the 299 // Returns the literal string, if any, for the current token (the
283 // token returned by Next()). The string is 0-terminated and in 300 // token returned by Next()). The string is 0-terminated and in
284 // UTF-8 format; they may contain 0-characters. Literal strings are 301 // UTF-8 format; they may contain 0-characters. Literal strings are
285 // collected for identifiers, strings, and numbers. 302 // collected for identifiers, strings, and numbers.
286 // These functions only give the correct result if the literal 303 // These functions only give the correct result if the literal
287 // was scanned between calls to StartLiteral() and TerminateLiteral(). 304 // was scanned between calls to StartLiteral() and TerminateLiteral().
288 bool is_literal_ascii() { 305 bool is_literal_ascii() {
289 ASSERT_NOT_NULL(current_.literal_chars); 306 ASSERT_NOT_NULL(current_.literal_chars);
290 return current_.literal_chars->is_ascii(); 307 return current_.literal_chars->is_ascii();
291 } 308 }
(...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after
385 Advance(); 402 Advance();
386 if (c0_ == next) { 403 if (c0_ == next) {
387 Advance(); 404 Advance();
388 return then; 405 return then;
389 } else { 406 } else {
390 return else_; 407 return else_;
391 } 408 }
392 } 409 }
393 410
394 uc32 ScanHexEscape(uc32 c, int length); 411 uc32 ScanHexEscape(uc32 c, int length);
412
413 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence.
395 uc32 ScanOctalEscape(uc32 c, int length); 414 uc32 ScanOctalEscape(uc32 c, int length);
396 415
397 // Return the current source position. 416 // Return the current source position.
398 int source_pos() { 417 int source_pos() {
399 return source_->pos() - kCharacterLookaheadBufferSize; 418 return source_->pos() - kCharacterLookaheadBufferSize;
400 } 419 }
401 420
402 // Buffers collecting literal strings, numbers, etc. 421 // Buffers collecting literal strings, numbers, etc.
403 LiteralBuffer literal_buffer1_; 422 LiteralBuffer literal_buffer1_;
404 LiteralBuffer literal_buffer2_; 423 LiteralBuffer literal_buffer2_;
405 424
406 TokenDesc current_; // desc for current token (as returned by Next()) 425 TokenDesc current_; // desc for current token (as returned by Next())
407 TokenDesc next_; // desc for next token (one token look-ahead) 426 TokenDesc next_; // desc for next token (one token look-ahead)
408 427
409 // Input stream. Must be initialized to an UC16CharacterStream. 428 // Input stream. Must be initialized to an UC16CharacterStream.
410 UC16CharacterStream* source_; 429 UC16CharacterStream* source_;
411 430
431 // Start position of the octal literal last scanned.
432 int octal_pos_;
412 433
413 // One Unicode character look-ahead; c0_ < 0 at the end of the input. 434 // One Unicode character look-ahead; c0_ < 0 at the end of the input.
414 uc32 c0_; 435 uc32 c0_;
415 }; 436 };
416 437
417 // ---------------------------------------------------------------------------- 438 // ----------------------------------------------------------------------------
418 // JavaScriptScanner - base logic for JavaScript scanning. 439 // JavaScriptScanner - base logic for JavaScript scanning.
419 440
420 class JavaScriptScanner : public Scanner { 441 class JavaScriptScanner : public Scanner {
421 public: 442 public:
(...skipping 114 matching lines...) Expand 10 before | Expand all | Expand 10 after
536 UNMATCHABLE, 557 UNMATCHABLE,
537 INITIAL, 558 INITIAL,
538 KEYWORD_PREFIX, 559 KEYWORD_PREFIX,
539 KEYWORD_MATCHED, 560 KEYWORD_MATCHED,
540 C, 561 C,
541 CA, 562 CA,
542 CO, 563 CO,
543 CON, 564 CON,
544 D, 565 D,
545 DE, 566 DE,
567 E,
568 EX,
546 F, 569 F,
547 I, 570 I,
571 IM,
572 IMP,
548 IN, 573 IN,
549 N, 574 N,
575 P,
576 PR,
577 S,
550 T, 578 T,
551 TH, 579 TH,
552 TR, 580 TR,
553 V, 581 V,
554 W 582 W
555 }; 583 };
556 584
557 struct FirstState { 585 struct FirstState {
558 const char* keyword; 586 const char* keyword;
559 State state; 587 State state;
560 Token::Value token; 588 Token::Value token;
561 }; 589 };
562 590
563 // Range of possible first characters of a keyword. 591 // Range of possible first characters of a keyword.
564 static const unsigned int kFirstCharRangeMin = 'b'; 592 static const unsigned int kFirstCharRangeMin = 'b';
565 static const unsigned int kFirstCharRangeMax = 'w'; 593 static const unsigned int kFirstCharRangeMax = 'y';
566 static const unsigned int kFirstCharRangeLength = 594 static const unsigned int kFirstCharRangeLength =
567 kFirstCharRangeMax - kFirstCharRangeMin + 1; 595 kFirstCharRangeMax - kFirstCharRangeMin + 1;
568 // State map for first keyword character range. 596 // State map for first keyword character range.
569 static FirstState first_states_[kFirstCharRangeLength]; 597 static FirstState first_states_[kFirstCharRangeLength];
570 598
571 // If input equals keyword's character at position, continue matching keyword 599 // If input equals keyword's character at position, continue matching keyword
572 // from that position. 600 // from that position.
573 inline bool MatchKeywordStart(unibrow::uchar input, 601 inline bool MatchKeywordStart(unibrow::uchar input,
574 const char* keyword, 602 const char* keyword,
575 int position, 603 int position,
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
616 // keyword with the current prefix). 644 // keyword with the current prefix).
617 const char* keyword_; 645 const char* keyword_;
618 int counter_; 646 int counter_;
619 Token::Value keyword_token_; 647 Token::Value keyword_token_;
620 }; 648 };
621 649
622 650
623 } } // namespace v8::internal 651 } } // namespace v8::internal
624 652
625 #endif // V8_SCANNER_BASE_H_ 653 #endif // V8_SCANNER_BASE_H_
OLDNEW
« no previous file with comments | « src/scanner.cc ('k') | src/scanner-base.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698