Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(73)

Side by Side Diff: src/scanner-base.h

Issue 6580038: [Isolates] Merge from bleeding_edge, revisions 5934-6100. (Closed) Base URL: http://v8.googlecode.com/svn/branches/experimental/isolates/
Patch Set: '' Created 9 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/scanner.cc ('k') | src/scanner-base.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2010 the V8 project authors. All rights reserved. 1 // Copyright 2010 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
45 // Returns the value (0 .. 15) of a hexadecimal character c. 45 // Returns the value (0 .. 15) of a hexadecimal character c.
46 // If c is not a legal hexadecimal character, returns a value < 0. 46 // If c is not a legal hexadecimal character, returns a value < 0.
47 inline int HexValue(uc32 c) { 47 inline int HexValue(uc32 c) {
48 c -= '0'; 48 c -= '0';
49 if (static_cast<unsigned>(c) <= 9) return c; 49 if (static_cast<unsigned>(c) <= 9) return c;
50 c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36. 50 c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36.
51 if (static_cast<unsigned>(c) <= 5) return c + 10; 51 if (static_cast<unsigned>(c) <= 5) return c + 10;
52 return -1; 52 return -1;
53 } 53 }
54 54
55 // ----------------------------------------------------------------------------
56 // UTF16Buffer - scanner input source with pushback.
57 55
58 class UTF16Buffer { 56 // ---------------------------------------------------------------------
57 // Buffered stream of characters, using an internal UC16 buffer.
58
59 class UC16CharacterStream {
59 public: 60 public:
60 UTF16Buffer(); 61 UC16CharacterStream() : pos_(0) { }
61 virtual ~UTF16Buffer() {} 62 virtual ~UC16CharacterStream() { }
62 63
63 virtual void PushBack(uc32 ch) = 0; 64 // Returns and advances past the next UC16 character in the input
64 // Returns a value < 0 when the buffer end is reached. 65 // stream. If there are no more characters, it returns a negative
65 virtual uc32 Advance() = 0; 66 // value.
66 virtual void SeekForward(int pos) = 0; 67 inline int32_t Advance() {
68 if (buffer_cursor_ < buffer_end_ || ReadBlock()) {
69 pos_++;
70 return *(buffer_cursor_++);
71 }
72 // Note: currently the following increment is necessary to avoid a
73 // parser problem! The scanner treats the final kEndOfInput as
74 // a character with a position, and does math relative to that
75 // position.
76 pos_++;
67 77
68 int pos() const { return pos_; } 78 return kEndOfInput;
79 }
69 80
70 static const int kNoEndPosition = 1; 81 // Return the current position in the character stream.
82 // Starts at zero.
83 inline unsigned pos() const { return pos_; }
84
85 // Skips forward past the next character_count UC16 characters
86 // in the input, or until the end of input if that comes sooner.
87 // Returns the number of characters actually skipped. If less
88 // than character_count,
89 inline unsigned SeekForward(unsigned character_count) {
90 unsigned buffered_chars =
91 static_cast<unsigned>(buffer_end_ - buffer_cursor_);
92 if (character_count <= buffered_chars) {
93 buffer_cursor_ += character_count;
94 pos_ += character_count;
95 return character_count;
96 }
97 return SlowSeekForward(character_count);
98 }
99
100 // Pushes back the most recently read UC16 character, i.e.,
101 // the value returned by the most recent call to Advance.
102 // Must not be used right after calling SeekForward.
103 virtual void PushBack(uc16 character) = 0;
71 104
72 protected: 105 protected:
73 // Initial value of end_ before the input stream is initialized. 106 static const int32_t kEndOfInput = -1;
74 107
75 int pos_; // Current position in the buffer. 108 // Ensures that the buffer_cursor_ points to the character at
76 int end_; // Position where scanning should stop (EOF). 109 // position pos_ of the input, if possible. If the position
110 // is at or after the end of the input, return false. If there
111 // are more characters available, return true.
112 virtual bool ReadBlock() = 0;
113 virtual unsigned SlowSeekForward(unsigned character_count) = 0;
114
115 const uc16* buffer_cursor_;
116 const uc16* buffer_end_;
117 unsigned pos_;
77 }; 118 };
78 119
79 120
80 class ScannerConstants { 121 class ScannerConstants {
122 // ---------------------------------------------------------------------
123 // Constants used by scanners.
81 public: 124 public:
82 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; 125 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;
83 126
84 StaticResource<Utf8Decoder>* utf8_decoder() { 127 StaticResource<Utf8Decoder>* utf8_decoder() {
85 return &utf8_decoder_; 128 return &utf8_decoder_;
86 } 129 }
87 130
88 bool IsIdentifierStart(unibrow::uchar c) { return kIsIdentifierStart.get(c); } 131 bool IsIdentifierStart(unibrow::uchar c) { return kIsIdentifierStart.get(c); }
89 bool IsIdentifierPart(unibrow::uchar c) { return kIsIdentifierPart.get(c); } 132 bool IsIdentifierPart(unibrow::uchar c) { return kIsIdentifierPart.get(c); }
90 bool IsLineTerminator(unibrow::uchar c) { return kIsLineTerminator.get(c); } 133 bool IsLineTerminator(unibrow::uchar c) { return kIsLineTerminator.get(c); }
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after
173 public: 216 public:
174 explicit LiteralScope(Scanner* self); 217 explicit LiteralScope(Scanner* self);
175 ~LiteralScope(); 218 ~LiteralScope();
176 void Complete(); 219 void Complete();
177 220
178 private: 221 private:
179 Scanner* scanner_; 222 Scanner* scanner_;
180 bool complete_; 223 bool complete_;
181 }; 224 };
182 225
183 Scanner(); 226 explicit Scanner(Isolate* isolate);
184 227
185 // Returns the current token again. 228 // Returns the current token again.
186 Token::Value current_token() { return current_.token; } 229 Token::Value current_token() { return current_.token; }
187 230
188 // One token look-ahead (past the token returned by Next()). 231 // One token look-ahead (past the token returned by Next()).
189 Token::Value peek() const { return next_.token; } 232 Token::Value peek() const { return next_.token; }
190 233
191 struct Location { 234 struct Location {
192 Location(int b, int e) : beg_pos(b), end_pos(e) { } 235 Location(int b, int e) : beg_pos(b), end_pos(e) { }
193 Location() : beg_pos(0), end_pos(0) { } 236 Location() : beg_pos(0), end_pos(0) { }
(...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after
279 } 322 }
280 323
281 inline void AddLiteralCharAdvance() { 324 inline void AddLiteralCharAdvance() {
282 AddLiteralChar(c0_); 325 AddLiteralChar(c0_);
283 Advance(); 326 Advance();
284 } 327 }
285 328
286 // Low-level scanning support. 329 // Low-level scanning support.
287 void Advance() { c0_ = source_->Advance(); } 330 void Advance() { c0_ = source_->Advance(); }
288 void PushBack(uc32 ch) { 331 void PushBack(uc32 ch) {
289 source_->PushBack(ch); 332 source_->PushBack(c0_);
290 c0_ = ch; 333 c0_ = ch;
291 } 334 }
292 335
293 inline Token::Value Select(Token::Value tok) { 336 inline Token::Value Select(Token::Value tok) {
294 Advance(); 337 Advance();
295 return tok; 338 return tok;
296 } 339 }
297 340
298 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) { 341 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) {
299 Advance(); 342 Advance();
(...skipping 11 matching lines...) Expand all
311 // Return the current source position. 354 // Return the current source position.
312 int source_pos() { 355 int source_pos() {
313 return source_->pos() - kCharacterLookaheadBufferSize; 356 return source_->pos() - kCharacterLookaheadBufferSize;
314 } 357 }
315 358
316 ScannerConstants* scanner_constants_; 359 ScannerConstants* scanner_constants_;
317 360
318 TokenDesc current_; // desc for current token (as returned by Next()) 361 TokenDesc current_; // desc for current token (as returned by Next())
319 TokenDesc next_; // desc for next token (one token look-ahead) 362 TokenDesc next_; // desc for next token (one token look-ahead)
320 363
321 // Input stream. Must be initialized to an UTF16Buffer. 364 // Input stream. Must be initialized to an UC16CharacterStream.
322 UTF16Buffer* source_; 365 UC16CharacterStream* source_;
323 366
324 // Buffer to hold literal values (identifiers, strings, numbers) 367 // Buffer to hold literal values (identifiers, strings, numbers)
325 // using '\x00'-terminated UTF-8 encoding. Handles allocation internally. 368 // using '\x00'-terminated UTF-8 encoding. Handles allocation internally.
326 LiteralCollector literal_buffer_; 369 LiteralCollector literal_buffer_;
327 370
328 // One Unicode character look-ahead; c0_ < 0 at the end of the input. 371 // One Unicode character look-ahead; c0_ < 0 at the end of the input.
329 uc32 c0_; 372 uc32 c0_;
330 }; 373 };
331 374
332 // ---------------------------------------------------------------------------- 375 // ----------------------------------------------------------------------------
(...skipping 30 matching lines...) Expand all
363 void Complete() { 406 void Complete() {
364 scanner_->TerminateLiteral(); 407 scanner_->TerminateLiteral();
365 complete_ = true; 408 complete_ = true;
366 } 409 }
367 410
368 private: 411 private:
369 JavaScriptScanner* scanner_; 412 JavaScriptScanner* scanner_;
370 bool complete_; 413 bool complete_;
371 }; 414 };
372 415
373 explicit JavaScriptScanner(ScannerConstants* scanner_constants); 416 explicit JavaScriptScanner(Isolate* isolate);
374 417
375 // Returns the next token. 418 // Returns the next token.
376 Token::Value Next(); 419 Token::Value Next();
377 420
378 // Returns true if there was a line terminator before the peek'ed token. 421 // Returns true if there was a line terminator before the peek'ed token.
379 bool has_line_terminator_before_next() const { 422 bool has_line_terminator_before_next() const {
380 return has_line_terminator_before_next_; 423 return has_line_terminator_before_next_;
381 } 424 }
382 425
383 // Scans the input as a regular expression pattern, previous 426 // Scans the input as a regular expression pattern, previous
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
418 void ScanEscape(); 461 void ScanEscape();
419 Token::Value ScanString(); 462 Token::Value ScanString();
420 463
421 // Scans a possible HTML comment -- begins with '<!'. 464 // Scans a possible HTML comment -- begins with '<!'.
422 Token::Value ScanHtmlComment(); 465 Token::Value ScanHtmlComment();
423 466
424 // Decodes a unicode escape-sequence which is part of an identifier. 467 // Decodes a unicode escape-sequence which is part of an identifier.
425 // If the escape sequence cannot be decoded the result is kBadChar. 468 // If the escape sequence cannot be decoded the result is kBadChar.
426 uc32 ScanIdentifierUnicodeEscape(); 469 uc32 ScanIdentifierUnicodeEscape();
427 470
428 ScannerConstants* scanner_constants_;
429 int literal_flags_; 471 int literal_flags_;
430 bool has_line_terminator_before_next_; 472 bool has_line_terminator_before_next_;
431 }; 473 };
432 474
433 475
434 // ---------------------------------------------------------------------------- 476 // ----------------------------------------------------------------------------
435 // Keyword matching state machine. 477 // Keyword matching state machine.
436 478
437 class KeywordMatcher { 479 class KeywordMatcher {
438 // Incrementally recognize keywords. 480 // Incrementally recognize keywords.
(...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after
552 // keyword with the current prefix). 594 // keyword with the current prefix).
553 const char* keyword_; 595 const char* keyword_;
554 int counter_; 596 int counter_;
555 Token::Value keyword_token_; 597 Token::Value keyword_token_;
556 }; 598 };
557 599
558 600
559 } } // namespace v8::internal 601 } } // namespace v8::internal
560 602
561 #endif // V8_SCANNER_BASE_H_ 603 #endif // V8_SCANNER_BASE_H_
OLDNEW
« no previous file with comments | « src/scanner.cc ('k') | src/scanner-base.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698