OLD | NEW |
1 // Copyright 2010 the V8 project authors. All rights reserved. | 1 // Copyright 2010 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
45 // Returns the value (0 .. 15) of a hexadecimal character c. | 45 // Returns the value (0 .. 15) of a hexadecimal character c. |
46 // If c is not a legal hexadecimal character, returns a value < 0. | 46 // If c is not a legal hexadecimal character, returns a value < 0. |
47 inline int HexValue(uc32 c) { | 47 inline int HexValue(uc32 c) { |
48 c -= '0'; | 48 c -= '0'; |
49 if (static_cast<unsigned>(c) <= 9) return c; | 49 if (static_cast<unsigned>(c) <= 9) return c; |
50 c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36. | 50 c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36. |
51 if (static_cast<unsigned>(c) <= 5) return c + 10; | 51 if (static_cast<unsigned>(c) <= 5) return c + 10; |
52 return -1; | 52 return -1; |
53 } | 53 } |
54 | 54 |
| 55 // ---------------------------------------------------------------------------- |
| 56 // UTF16Buffer - scanner input source with pushback. |
55 | 57 |
56 // --------------------------------------------------------------------- | 58 class UTF16Buffer { |
57 // Buffered stream of characters, using an internal UC16 buffer. | 59 public: |
| 60 UTF16Buffer(); |
| 61 virtual ~UTF16Buffer() {} |
58 | 62 |
59 class UC16CharacterStream { | 63 virtual void PushBack(uc32 ch) = 0; |
60 public: | 64 // Returns a value < 0 when the buffer end is reached. |
61 UC16CharacterStream() : pos_(0) { } | 65 virtual uc32 Advance() = 0; |
62 virtual ~UC16CharacterStream() { } | 66 virtual void SeekForward(int pos) = 0; |
63 | 67 |
64 // Returns and advances past the next UC16 character in the input | 68 int pos() const { return pos_; } |
65 // stream. If there are no more characters, it returns a negative | |
66 // value. | |
67 inline int32_t Advance() { | |
68 if (buffer_cursor_ < buffer_end_ || ReadBlock()) { | |
69 pos_++; | |
70 return *(buffer_cursor_++); | |
71 } | |
72 // Note: currently the following increment is necessary to avoid a | |
73 // parser problem! The scanner treats the final kEndOfInput as | |
74 // a character with a position, and does math relative to that | |
75 // position. | |
76 pos_++; | |
77 | 69 |
78 return kEndOfInput; | 70 static const int kNoEndPosition = 1; |
79 } | |
80 | |
81 // Return the current position in the character stream. | |
82 // Starts at zero. | |
83 inline unsigned pos() const { return pos_; } | |
84 | |
85 // Skips forward past the next character_count UC16 characters | |
86 // in the input, or until the end of input if that comes sooner. | |
87 // Returns the number of characters actually skipped. If less | |
88 // than character_count, | |
89 inline unsigned SeekForward(unsigned character_count) { | |
90 unsigned buffered_chars = | |
91 static_cast<unsigned>(buffer_end_ - buffer_cursor_); | |
92 if (character_count <= buffered_chars) { | |
93 buffer_cursor_ += character_count; | |
94 pos_ += character_count; | |
95 return character_count; | |
96 } | |
97 return SlowSeekForward(character_count); | |
98 } | |
99 | |
100 // Pushes back the most recently read UC16 character, i.e., | |
101 // the value returned by the most recent call to Advance. | |
102 // Must not be used right after calling SeekForward. | |
103 virtual void PushBack(uc16 character) = 0; | |
104 | 71 |
105 protected: | 72 protected: |
106 static const int32_t kEndOfInput = -1; | 73 // Initial value of end_ before the input stream is initialized. |
107 | 74 |
108 // Ensures that the buffer_cursor_ points to the character at | 75 int pos_; // Current position in the buffer. |
109 // position pos_ of the input, if possible. If the position | 76 int end_; // Position where scanning should stop (EOF). |
110 // is at or after the end of the input, return false. If there | |
111 // are more characters available, return true. | |
112 virtual bool ReadBlock() = 0; | |
113 virtual unsigned SlowSeekForward(unsigned character_count) = 0; | |
114 | |
115 const uc16* buffer_cursor_; | |
116 const uc16* buffer_end_; | |
117 unsigned pos_; | |
118 }; | 77 }; |
119 | 78 |
120 | 79 |
121 // --------------------------------------------------------------------- | |
122 // Constants used by scanners. | |
123 | |
124 class ScannerConstants : AllStatic { | 80 class ScannerConstants : AllStatic { |
125 public: | 81 public: |
126 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; | 82 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; |
127 | 83 |
128 static StaticResource<Utf8Decoder>* utf8_decoder() { | 84 static StaticResource<Utf8Decoder>* utf8_decoder() { |
129 return &utf8_decoder_; | 85 return &utf8_decoder_; |
130 } | 86 } |
131 | 87 |
132 static unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart; | 88 static unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart; |
133 static unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart; | 89 static unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart; |
(...skipping 180 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
314 } | 270 } |
315 | 271 |
316 inline void AddLiteralCharAdvance() { | 272 inline void AddLiteralCharAdvance() { |
317 AddLiteralChar(c0_); | 273 AddLiteralChar(c0_); |
318 Advance(); | 274 Advance(); |
319 } | 275 } |
320 | 276 |
321 // Low-level scanning support. | 277 // Low-level scanning support. |
322 void Advance() { c0_ = source_->Advance(); } | 278 void Advance() { c0_ = source_->Advance(); } |
323 void PushBack(uc32 ch) { | 279 void PushBack(uc32 ch) { |
324 source_->PushBack(c0_); | 280 source_->PushBack(ch); |
325 c0_ = ch; | 281 c0_ = ch; |
326 } | 282 } |
327 | 283 |
328 inline Token::Value Select(Token::Value tok) { | 284 inline Token::Value Select(Token::Value tok) { |
329 Advance(); | 285 Advance(); |
330 return tok; | 286 return tok; |
331 } | 287 } |
332 | 288 |
333 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) { | 289 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) { |
334 Advance(); | 290 Advance(); |
335 if (c0_ == next) { | 291 if (c0_ == next) { |
336 Advance(); | 292 Advance(); |
337 return then; | 293 return then; |
338 } else { | 294 } else { |
339 return else_; | 295 return else_; |
340 } | 296 } |
341 } | 297 } |
342 | 298 |
343 uc32 ScanHexEscape(uc32 c, int length); | 299 uc32 ScanHexEscape(uc32 c, int length); |
344 uc32 ScanOctalEscape(uc32 c, int length); | 300 uc32 ScanOctalEscape(uc32 c, int length); |
345 | 301 |
346 // Return the current source position. | 302 // Return the current source position. |
347 int source_pos() { | 303 int source_pos() { |
348 return source_->pos() - kCharacterLookaheadBufferSize; | 304 return source_->pos() - kCharacterLookaheadBufferSize; |
349 } | 305 } |
350 | 306 |
351 TokenDesc current_; // desc for current token (as returned by Next()) | 307 TokenDesc current_; // desc for current token (as returned by Next()) |
352 TokenDesc next_; // desc for next token (one token look-ahead) | 308 TokenDesc next_; // desc for next token (one token look-ahead) |
353 | 309 |
354 // Input stream. Must be initialized to an UC16CharacterStream. | 310 // Input stream. Must be initialized to an UTF16Buffer. |
355 UC16CharacterStream* source_; | 311 UTF16Buffer* source_; |
356 | 312 |
357 // Buffer to hold literal values (identifiers, strings, numbers) | 313 // Buffer to hold literal values (identifiers, strings, numbers) |
358 // using '\x00'-terminated UTF-8 encoding. Handles allocation internally. | 314 // using '\x00'-terminated UTF-8 encoding. Handles allocation internally. |
359 LiteralCollector literal_buffer_; | 315 LiteralCollector literal_buffer_; |
360 | 316 |
361 // One Unicode character look-ahead; c0_ < 0 at the end of the input. | 317 // One Unicode character look-ahead; c0_ < 0 at the end of the input. |
362 uc32 c0_; | 318 uc32 c0_; |
363 }; | 319 }; |
364 | 320 |
365 // ---------------------------------------------------------------------------- | 321 // ---------------------------------------------------------------------------- |
(...skipping 218 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
584 // keyword with the current prefix). | 540 // keyword with the current prefix). |
585 const char* keyword_; | 541 const char* keyword_; |
586 int counter_; | 542 int counter_; |
587 Token::Value keyword_token_; | 543 Token::Value keyword_token_; |
588 }; | 544 }; |
589 | 545 |
590 | 546 |
591 } } // namespace v8::internal | 547 } } // namespace v8::internal |
592 | 548 |
593 #endif // V8_SCANNER_BASE_H_ | 549 #endif // V8_SCANNER_BASE_H_ |
OLD | NEW |