OLD | NEW |
---|---|
1 // Copyright 2010 the V8 project authors. All rights reserved. | 1 // Copyright 2010 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
45 // Returns the value (0 .. 15) of a hexadecimal character c. | 45 // Returns the value (0 .. 15) of a hexadecimal character c. |
46 // If c is not a legal hexadecimal character, returns a value < 0. | 46 // If c is not a legal hexadecimal character, returns a value < 0. |
47 inline int HexValue(uc32 c) { | 47 inline int HexValue(uc32 c) { |
48 c -= '0'; | 48 c -= '0'; |
49 if (static_cast<unsigned>(c) <= 9) return c; | 49 if (static_cast<unsigned>(c) <= 9) return c; |
50 c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36. | 50 c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36. |
51 if (static_cast<unsigned>(c) <= 5) return c + 10; | 51 if (static_cast<unsigned>(c) <= 5) return c + 10; |
52 return -1; | 52 return -1; |
53 } | 53 } |
54 | 54 |
55 // ---------------------------------------------------------------------------- | |
56 // UTF16Buffer - scanner input source with pushback. | |
57 | 55 |
58 class UTF16Buffer { | 56 // --------------------------------------------------------------------- |
57 // Buffered stream of characters, using an internal UC16 buffer. | |
58 | |
59 class UC16CharacterStream { | |
59 public: | 60 public: |
60 UTF16Buffer(); | 61 UC16CharacterStream() : pos_(0) { } |
61 virtual ~UTF16Buffer() {} | 62 virtual ~UC16CharacterStream() { } |
62 | 63 |
63 virtual void PushBack(uc32 ch) = 0; | 64 // Returns and advances past the next UC16 character in the input |
64 // Returns a value < 0 when the buffer end is reached. | 65 // stream. If there are no more characters, it returns a negative |
65 virtual uc32 Advance() = 0; | 66 // value. |
66 virtual void SeekForward(int pos) = 0; | 67 inline int32_t Advance() { |
68 if (buffer_cursor_ < buffer_end_ || ReadBlock()) { | |
69 pos_++; | |
70 return *(buffer_cursor_++); | |
71 } | |
72 // Note: currently the following increment is necessary to avoid a | |
73 // parser problem! The scanner treats the final kEndOfInput as | |
74 // a character with a position, and does math relative to that | |
75 // position. | |
76 pos_++; | |
67 | 77 |
68 int pos() const { return pos_; } | 78 return kEndOfInput; |
79 } | |
69 | 80 |
70 static const int kNoEndPosition = 1; | 81 // Return the current position in the character stream. |
82 // Starts at zero. | |
83 inline unsigned pos() const { return pos_; } | |
71 | 84 |
85 // Skips forward past the next character_count UC16 characters | |
86 // in the input, or until the end of input if that comes sooner. | |
87 // Returns the number of characters actually skipped. If less | |
88 // than character_count, | |
89 inline unsigned SeekForward(unsigned character_count) { | |
90 unsigned buffered_chars = | |
91 static_cast<unsigned>(buffer_end_ - buffer_cursor_); | |
92 if (character_count <= buffered_chars) { | |
93 buffer_cursor_ += character_count; | |
94 pos_ += character_count; | |
95 return character_count; | |
96 } | |
97 return SlowSeekForward(character_count); | |
98 } | |
99 // Pushes back the most recently read UC16 character, i.e., | |
100 // the value returned by the most recent call to Advance. | |
101 // Must not be used right after calling SeekForward. | |
102 virtual void PushBack(uc16 character) = 0; | |
Erik Corry
2010/12/07 12:27:30
Missing blank line
| |
72 protected: | 103 protected: |
73 // Initial value of end_ before the input stream is initialized. | 104 static const int32_t kEndOfInput = -1; |
74 | 105 |
75 int pos_; // Current position in the buffer. | 106 // Ensures that the buffer_cursor_ points to the character at |
76 int end_; // Position where scanning should stop (EOF). | 107 // position pos_ of the input, if possible. If the position |
108 // is at or after the end of the input, return false. If there | |
109 // are more characters available, return true. | |
110 virtual bool ReadBlock() = 0; | |
111 virtual unsigned SlowSeekForward(unsigned character_count) = 0; | |
112 | |
113 const uc16* buffer_cursor_; | |
114 const uc16* buffer_end_; | |
115 unsigned pos_; | |
77 }; | 116 }; |
78 | 117 |
79 | 118 |
119 // --------------------------------------------------------------------- | |
120 // Constants used by scanners. | |
121 | |
80 class ScannerConstants : AllStatic { | 122 class ScannerConstants : AllStatic { |
81 public: | 123 public: |
82 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; | 124 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; |
83 | 125 |
84 static StaticResource<Utf8Decoder>* utf8_decoder() { | 126 static StaticResource<Utf8Decoder>* utf8_decoder() { |
85 return &utf8_decoder_; | 127 return &utf8_decoder_; |
86 } | 128 } |
87 | 129 |
88 static unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart; | 130 static unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart; |
89 static unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart; | 131 static unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart; |
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
164 public: | 206 public: |
165 explicit LiteralScope(Scanner* self); | 207 explicit LiteralScope(Scanner* self); |
166 ~LiteralScope(); | 208 ~LiteralScope(); |
167 void Complete(); | 209 void Complete(); |
168 | 210 |
169 private: | 211 private: |
170 Scanner* scanner_; | 212 Scanner* scanner_; |
171 bool complete_; | 213 bool complete_; |
172 }; | 214 }; |
173 | 215 |
174 Scanner(); | 216 explicit Scanner(); |
Erik Corry
2010/12/07 12:27:30
You only need this for single-argument constructor
| |
175 | 217 |
176 // Returns the current token again. | 218 // Returns the current token again. |
177 Token::Value current_token() { return current_.token; } | 219 Token::Value current_token() { return current_.token; } |
178 | 220 |
179 // One token look-ahead (past the token returned by Next()). | 221 // One token look-ahead (past the token returned by Next()). |
180 Token::Value peek() const { return next_.token; } | 222 Token::Value peek() const { return next_.token; } |
181 | 223 |
182 struct Location { | 224 struct Location { |
183 Location(int b, int e) : beg_pos(b), end_pos(e) { } | 225 Location(int b, int e) : beg_pos(b), end_pos(e) { } |
184 Location() : beg_pos(0), end_pos(0) { } | 226 Location() : beg_pos(0), end_pos(0) { } |
(...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
270 } | 312 } |
271 | 313 |
272 inline void AddLiteralCharAdvance() { | 314 inline void AddLiteralCharAdvance() { |
273 AddLiteralChar(c0_); | 315 AddLiteralChar(c0_); |
274 Advance(); | 316 Advance(); |
275 } | 317 } |
276 | 318 |
277 // Low-level scanning support. | 319 // Low-level scanning support. |
278 void Advance() { c0_ = source_->Advance(); } | 320 void Advance() { c0_ = source_->Advance(); } |
279 void PushBack(uc32 ch) { | 321 void PushBack(uc32 ch) { |
280 source_->PushBack(ch); | 322 source_->PushBack(c0_); |
281 c0_ = ch; | 323 c0_ = ch; |
282 } | 324 } |
283 | 325 |
284 inline Token::Value Select(Token::Value tok) { | 326 inline Token::Value Select(Token::Value tok) { |
285 Advance(); | 327 Advance(); |
286 return tok; | 328 return tok; |
287 } | 329 } |
288 | 330 |
289 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) { | 331 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) { |
290 Advance(); | 332 Advance(); |
291 if (c0_ == next) { | 333 if (c0_ == next) { |
292 Advance(); | 334 Advance(); |
293 return then; | 335 return then; |
294 } else { | 336 } else { |
295 return else_; | 337 return else_; |
296 } | 338 } |
297 } | 339 } |
298 | 340 |
299 uc32 ScanHexEscape(uc32 c, int length); | 341 uc32 ScanHexEscape(uc32 c, int length); |
300 uc32 ScanOctalEscape(uc32 c, int length); | 342 uc32 ScanOctalEscape(uc32 c, int length); |
301 | 343 |
302 // Return the current source position. | 344 // Return the current source position. |
303 int source_pos() { | 345 int source_pos() { |
304 return source_->pos() - kCharacterLookaheadBufferSize; | 346 return source_->pos() - kCharacterLookaheadBufferSize; |
305 } | 347 } |
306 | 348 |
307 TokenDesc current_; // desc for current token (as returned by Next()) | 349 TokenDesc current_; // desc for current token (as returned by Next()) |
308 TokenDesc next_; // desc for next token (one token look-ahead) | 350 TokenDesc next_; // desc for next token (one token look-ahead) |
309 | 351 |
310 // Input stream. Must be initialized to an UTF16Buffer. | 352 // Input stream. Must be initialized to an UC16CharacterStream. |
311 UTF16Buffer* source_; | 353 UC16CharacterStream* source_; |
312 | 354 |
313 // Buffer to hold literal values (identifiers, strings, numbers) | 355 // Buffer to hold literal values (identifiers, strings, numbers) |
314 // using '\x00'-terminated UTF-8 encoding. Handles allocation internally. | 356 // using '\x00'-terminated UTF-8 encoding. Handles allocation internally. |
315 LiteralCollector literal_buffer_; | 357 LiteralCollector literal_buffer_; |
316 | 358 |
317 // One Unicode character look-ahead; c0_ < 0 at the end of the input. | 359 // One Unicode character look-ahead; c0_ < 0 at the end of the input. |
318 uc32 c0_; | 360 uc32 c0_; |
319 }; | 361 }; |
320 | 362 |
321 // ---------------------------------------------------------------------------- | 363 // ---------------------------------------------------------------------------- |
(...skipping 218 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
540 // keyword with the current prefix). | 582 // keyword with the current prefix). |
541 const char* keyword_; | 583 const char* keyword_; |
542 int counter_; | 584 int counter_; |
543 Token::Value keyword_token_; | 585 Token::Value keyword_token_; |
544 }; | 586 }; |
545 | 587 |
546 | 588 |
547 } } // namespace v8::internal | 589 } } // namespace v8::internal |
548 | 590 |
549 #endif // V8_SCANNER_BASE_H_ | 591 #endif // V8_SCANNER_BASE_H_ |
OLD | NEW |