Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(279)

Side by Side Diff: src/scanner-base.h

Issue 5545006: Optimized scanner to avoid virtual calls for every character read. (Closed)
Patch Set: Created 10 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2010 the V8 project authors. All rights reserved. 1 // Copyright 2010 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
45 // Returns the value (0 .. 15) of a hexadecimal character c. 45 // Returns the value (0 .. 15) of a hexadecimal character c.
46 // If c is not a legal hexadecimal character, returns a value < 0. 46 // If c is not a legal hexadecimal character, returns a value < 0.
47 inline int HexValue(uc32 c) { 47 inline int HexValue(uc32 c) {
48 c -= '0'; 48 c -= '0';
49 if (static_cast<unsigned>(c) <= 9) return c; 49 if (static_cast<unsigned>(c) <= 9) return c;
50 c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36. 50 c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36.
51 if (static_cast<unsigned>(c) <= 5) return c + 10; 51 if (static_cast<unsigned>(c) <= 5) return c + 10;
52 return -1; 52 return -1;
53 } 53 }
54 54
55 // ----------------------------------------------------------------------------
56 // UTF16Buffer - scanner input source with pushback.
57 55
58 class UTF16Buffer { 56 // ---------------------------------------------------------------------
57 // Buffered stream of characters, using an internal UC16 buffer.
58
59 class UC16CharacterStream {
59 public: 60 public:
60 UTF16Buffer(); 61 UC16CharacterStream() : pos_(0) { }
61 virtual ~UTF16Buffer() {} 62 virtual ~UC16CharacterStream() { }
62 63
63 virtual void PushBack(uc32 ch) = 0; 64 // Returns and advances past the next UC16 character in the input
64 // Returns a value < 0 when the buffer end is reached. 65 // stream. If there are no more characters, it returns a negative
65 virtual uc32 Advance() = 0; 66 // value.
66 virtual void SeekForward(int pos) = 0; 67 inline int32_t Advance() {
68 if (buffer_cursor_ < buffer_end_ || ReadBlock()) {
69 pos_++;
70 return *(buffer_cursor_++);
71 }
72 // Note: currently the following increment is necessary to avoid a
73 // parser problem! The scanner treats the final kEndOfInput as
74 // a character with a position, and does math relative to that
75 // position.
76 pos_++;
67 77
68 int pos() const { return pos_; } 78 return kEndOfInput;
79 }
69 80
70 static const int kNoEndPosition = 1; 81 // Return the current position in the character stream.
82 // Starts at zero.
83 inline unsigned pos() const { return pos_; }
71 84
85 // Skips forward past the next character_count UC16 characters
86 // in the input, or until the end of input if that comes sooner.
87 // Returns the number of characters actually skipped. If less
88 // than character_count,
89 inline unsigned SeekForward(unsigned character_count) {
90 unsigned buffered_chars =
91 static_cast<unsigned>(buffer_end_ - buffer_cursor_);
92 if (character_count <= buffered_chars) {
93 buffer_cursor_ += character_count;
94 pos_ += character_count;
95 return character_count;
96 }
97 return SlowSeekForward(character_count);
98 }
99 // Pushes back the most recently read UC16 character, i.e.,
100 // the value returned by the most recent call to Advance.
101 // Must not be used right after calling SeekForward.
102 virtual void PushBack(uc16 character) = 0;
Erik Corry 2010/12/07 12:27:30 Missing blank line
72 protected: 103 protected:
73 // Initial value of end_ before the input stream is initialized. 104 static const int32_t kEndOfInput = -1;
74 105
75 int pos_; // Current position in the buffer. 106 // Ensures that the buffer_cursor_ points to the character at
76 int end_; // Position where scanning should stop (EOF). 107 // position pos_ of the input, if possible. If the position
108 // is at or after the end of the input, return false. If there
109 // are more characters available, return true.
110 virtual bool ReadBlock() = 0;
111 virtual unsigned SlowSeekForward(unsigned character_count) = 0;
112
113 const uc16* buffer_cursor_;
114 const uc16* buffer_end_;
115 unsigned pos_;
77 }; 116 };
78 117
79 118
119 // ---------------------------------------------------------------------
120 // Constants used by scanners.
121
80 class ScannerConstants : AllStatic { 122 class ScannerConstants : AllStatic {
81 public: 123 public:
82 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; 124 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;
83 125
84 static StaticResource<Utf8Decoder>* utf8_decoder() { 126 static StaticResource<Utf8Decoder>* utf8_decoder() {
85 return &utf8_decoder_; 127 return &utf8_decoder_;
86 } 128 }
87 129
88 static unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart; 130 static unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart;
89 static unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart; 131 static unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart;
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after
164 public: 206 public:
165 explicit LiteralScope(Scanner* self); 207 explicit LiteralScope(Scanner* self);
166 ~LiteralScope(); 208 ~LiteralScope();
167 void Complete(); 209 void Complete();
168 210
169 private: 211 private:
170 Scanner* scanner_; 212 Scanner* scanner_;
171 bool complete_; 213 bool complete_;
172 }; 214 };
173 215
174 Scanner(); 216 explicit Scanner();
Erik Corry 2010/12/07 12:27:30 You only need this for single-argument constructor
175 217
176 // Returns the current token again. 218 // Returns the current token again.
177 Token::Value current_token() { return current_.token; } 219 Token::Value current_token() { return current_.token; }
178 220
179 // One token look-ahead (past the token returned by Next()). 221 // One token look-ahead (past the token returned by Next()).
180 Token::Value peek() const { return next_.token; } 222 Token::Value peek() const { return next_.token; }
181 223
182 struct Location { 224 struct Location {
183 Location(int b, int e) : beg_pos(b), end_pos(e) { } 225 Location(int b, int e) : beg_pos(b), end_pos(e) { }
184 Location() : beg_pos(0), end_pos(0) { } 226 Location() : beg_pos(0), end_pos(0) { }
(...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after
270 } 312 }
271 313
272 inline void AddLiteralCharAdvance() { 314 inline void AddLiteralCharAdvance() {
273 AddLiteralChar(c0_); 315 AddLiteralChar(c0_);
274 Advance(); 316 Advance();
275 } 317 }
276 318
277 // Low-level scanning support. 319 // Low-level scanning support.
278 void Advance() { c0_ = source_->Advance(); } 320 void Advance() { c0_ = source_->Advance(); }
279 void PushBack(uc32 ch) { 321 void PushBack(uc32 ch) {
280 source_->PushBack(ch); 322 source_->PushBack(c0_);
281 c0_ = ch; 323 c0_ = ch;
282 } 324 }
283 325
284 inline Token::Value Select(Token::Value tok) { 326 inline Token::Value Select(Token::Value tok) {
285 Advance(); 327 Advance();
286 return tok; 328 return tok;
287 } 329 }
288 330
289 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) { 331 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) {
290 Advance(); 332 Advance();
291 if (c0_ == next) { 333 if (c0_ == next) {
292 Advance(); 334 Advance();
293 return then; 335 return then;
294 } else { 336 } else {
295 return else_; 337 return else_;
296 } 338 }
297 } 339 }
298 340
299 uc32 ScanHexEscape(uc32 c, int length); 341 uc32 ScanHexEscape(uc32 c, int length);
300 uc32 ScanOctalEscape(uc32 c, int length); 342 uc32 ScanOctalEscape(uc32 c, int length);
301 343
302 // Return the current source position. 344 // Return the current source position.
303 int source_pos() { 345 int source_pos() {
304 return source_->pos() - kCharacterLookaheadBufferSize; 346 return source_->pos() - kCharacterLookaheadBufferSize;
305 } 347 }
306 348
307 TokenDesc current_; // desc for current token (as returned by Next()) 349 TokenDesc current_; // desc for current token (as returned by Next())
308 TokenDesc next_; // desc for next token (one token look-ahead) 350 TokenDesc next_; // desc for next token (one token look-ahead)
309 351
310 // Input stream. Must be initialized to an UTF16Buffer. 352 // Input stream. Must be initialized to an UC16CharacterStream.
311 UTF16Buffer* source_; 353 UC16CharacterStream* source_;
312 354
313 // Buffer to hold literal values (identifiers, strings, numbers) 355 // Buffer to hold literal values (identifiers, strings, numbers)
314 // using '\x00'-terminated UTF-8 encoding. Handles allocation internally. 356 // using '\x00'-terminated UTF-8 encoding. Handles allocation internally.
315 LiteralCollector literal_buffer_; 357 LiteralCollector literal_buffer_;
316 358
317 // One Unicode character look-ahead; c0_ < 0 at the end of the input. 359 // One Unicode character look-ahead; c0_ < 0 at the end of the input.
318 uc32 c0_; 360 uc32 c0_;
319 }; 361 };
320 362
321 // ---------------------------------------------------------------------------- 363 // ----------------------------------------------------------------------------
(...skipping 218 matching lines...) Expand 10 before | Expand all | Expand 10 after
540 // keyword with the current prefix). 582 // keyword with the current prefix).
541 const char* keyword_; 583 const char* keyword_;
542 int counter_; 584 int counter_;
543 Token::Value keyword_token_; 585 Token::Value keyword_token_;
544 }; 586 };
545 587
546 588
547 } } // namespace v8::internal 589 } } // namespace v8::internal
548 590
549 #endif // V8_SCANNER_BASE_H_ 591 #endif // V8_SCANNER_BASE_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698