Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(3)

Side by Side Diff: src/scanner.h

Issue 661367: Refactor the scanner interface... (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: '' Created 10 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/parser.cc ('k') | src/scanner.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after
77 } 77 }
78 78
79 static char* ComputeLimit(char* data, int capacity) { 79 static char* ComputeLimit(char* data, int capacity) {
80 return (data + capacity) - unibrow::Utf8::kMaxEncodedSize; 80 return (data + capacity) - unibrow::Utf8::kMaxEncodedSize;
81 } 81 }
82 82
83 void AddCharSlow(uc32 c); 83 void AddCharSlow(uc32 c);
84 }; 84 };
85 85
86 86
87 // Interface through which the scanner reads characters from the input source.
87 class UTF16Buffer { 88 class UTF16Buffer {
88 public: 89 public:
89 UTF16Buffer(); 90 UTF16Buffer();
90 virtual ~UTF16Buffer() {} 91 virtual ~UTF16Buffer() {}
91 92
92 virtual void PushBack(uc32 ch) = 0; 93 virtual void PushBack(uc32 ch) = 0;
93 // returns a value < 0 when the buffer end is reached 94 // Returns a value < 0 when the buffer end is reached.
94 virtual uc32 Advance() = 0; 95 virtual uc32 Advance() = 0;
95 virtual void SeekForward(int pos) = 0; 96 virtual void SeekForward(int pos) = 0;
96 97
97 int pos() const { return pos_; } 98 int pos() const { return pos_; }
98 int size() const { return size_; }
99 Handle<String> SubString(int start, int end);
100 99
101 protected: 100 protected:
102 Handle<String> data_; 101 int pos_; // Current position in the buffer.
103 int pos_; 102 int end_; // Position where scanning should stop (EOF).
104 int size_;
105 }; 103 };
106 104
107 105
106 // UTF16 buffer to read characters from a character stream.
108 class CharacterStreamUTF16Buffer: public UTF16Buffer { 107 class CharacterStreamUTF16Buffer: public UTF16Buffer {
109 public: 108 public:
110 CharacterStreamUTF16Buffer(); 109 CharacterStreamUTF16Buffer();
111 virtual ~CharacterStreamUTF16Buffer() {} 110 virtual ~CharacterStreamUTF16Buffer() {}
112 void Initialize(Handle<String> data, unibrow::CharacterStream* stream); 111 void Initialize(Handle<String> data,
112 unibrow::CharacterStream* stream,
113 int start_position,
114 int end_position);
113 virtual void PushBack(uc32 ch); 115 virtual void PushBack(uc32 ch);
114 virtual uc32 Advance(); 116 virtual uc32 Advance();
115 virtual void SeekForward(int pos); 117 virtual void SeekForward(int pos);
116 118
117 private: 119 private:
118 List<uc32> pushback_buffer_; 120 List<uc32> pushback_buffer_;
119 uc32 last_; 121 uc32 last_;
120 unibrow::CharacterStream* stream_; 122 unibrow::CharacterStream* stream_;
121 123
122 List<uc32>* pushback_buffer() { return &pushback_buffer_; } 124 List<uc32>* pushback_buffer() { return &pushback_buffer_; }
123 }; 125 };
124 126
125 127
126 class TwoByteStringUTF16Buffer: public UTF16Buffer { 128 // UTF16 buffer to read characters from an external string.
129 template <typename StringType, typename CharType>
130 class ExternalStringUTF16Buffer: public UTF16Buffer {
127 public: 131 public:
128 TwoByteStringUTF16Buffer(); 132 ExternalStringUTF16Buffer();
129 virtual ~TwoByteStringUTF16Buffer() {} 133 virtual ~ExternalStringUTF16Buffer() {}
130 void Initialize(Handle<ExternalTwoByteString> data); 134 void Initialize(Handle<StringType> data,
135 int start_position,
136 int end_position);
131 virtual void PushBack(uc32 ch); 137 virtual void PushBack(uc32 ch);
132 virtual uc32 Advance(); 138 virtual uc32 Advance();
133 virtual void SeekForward(int pos); 139 virtual void SeekForward(int pos);
134 140
135 private: 141 private:
136 const uint16_t* raw_data_; 142 const CharType* raw_data_; // Pointer to the actual array of characters.
137 }; 143 };
138 144
139 145
140 class KeywordMatcher { 146 class KeywordMatcher {
141 // Incrementally recognize keywords. 147 // Incrementally recognize keywords.
142 // 148 //
143 // Recognized keywords: 149 // Recognized keywords:
144 // break case catch const* continue debugger* default delete do else 150 // break case catch const* continue debugger* default delete do else
145 // finally false for function if in instanceof native* new null 151 // finally false for function if in instanceof native* new null
146 // return switch this throw true try typeof var void while with 152 // return switch this throw true try typeof var void while with
(...skipping 109 matching lines...) Expand 10 before | Expand all | Expand 10 after
256 enum ParserLanguage { JAVASCRIPT, JSON }; 262 enum ParserLanguage { JAVASCRIPT, JSON };
257 263
258 264
259 class Scanner { 265 class Scanner {
260 public: 266 public:
261 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; 267 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;
262 268
263 // Construction 269 // Construction
264 explicit Scanner(ParserMode parse_mode); 270 explicit Scanner(ParserMode parse_mode);
265 271
266 // Initialize the Scanner to scan source: 272 // Initialize the Scanner to scan source.
267 void Init(Handle<String> source, 273 void Initialize(Handle<String> source,
268 unibrow::CharacterStream* stream, 274 ParserLanguage language);
269 int position, 275 void Initialize(Handle<String> source,
270 ParserLanguage language); 276 unibrow::CharacterStream* stream,
277 ParserLanguage language);
278 void Initialize(Handle<String> source,
279 int start_position, int end_position,
280 ParserLanguage language);
271 281
272 // Returns the next token. 282 // Returns the next token.
273 Token::Value Next(); 283 Token::Value Next();
274 284
275 // One token look-ahead (past the token returned by Next()). 285 // One token look-ahead (past the token returned by Next()).
276 Token::Value peek() const { return next_.token; } 286 Token::Value peek() const { return next_.token; }
277 287
278 // Returns true if there was a line terminator before the peek'ed token. 288 // Returns true if there was a line terminator before the peek'ed token.
279 bool has_line_terminator_before_next() const { 289 bool has_line_terminator_before_next() const {
280 return has_line_terminator_before_next_; 290 return has_line_terminator_before_next_;
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after
328 // Returns true if regexp flags are scanned (always since flags can 338 // Returns true if regexp flags are scanned (always since flags can
329 // be empty). 339 // be empty).
330 bool ScanRegExpFlags(); 340 bool ScanRegExpFlags();
331 341
332 // Seek forward to the given position. This operation does not 342 // Seek forward to the given position. This operation does not
333 // work in general, for instance when there are pushed back 343 // work in general, for instance when there are pushed back
334 // characters, but works for seeking forward until simple delimiter 344 // characters, but works for seeking forward until simple delimiter
335 // tokens, which is what it is used for. 345 // tokens, which is what it is used for.
336 void SeekForward(int pos); 346 void SeekForward(int pos);
337 347
338 Handle<String> SubString(int start_pos, int end_pos);
339 bool stack_overflow() { return stack_overflow_; } 348 bool stack_overflow() { return stack_overflow_; }
340 349
341 static StaticResource<Utf8Decoder>* utf8_decoder() { return &utf8_decoder_; } 350 static StaticResource<Utf8Decoder>* utf8_decoder() { return &utf8_decoder_; }
342 351
343 // Tells whether the buffer contains an identifier (no escapes). 352 // Tells whether the buffer contains an identifier (no escapes).
344 // Used for checking if a property name is an identifier. 353 // Used for checking if a property name is an identifier.
345 static bool IsIdentifier(unibrow::CharacterStream* buffer); 354 static bool IsIdentifier(unibrow::CharacterStream* buffer);
346 355
347 static unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart; 356 static unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart;
348 static unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart; 357 static unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart;
349 static unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator; 358 static unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator;
350 static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace; 359 static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace;
351 360
352 static const int kCharacterLookaheadBufferSize = 1; 361 static const int kCharacterLookaheadBufferSize = 1;
362 static const int kNoEndPosition = 1;
353 363
354 private: 364 private:
365 void Init(Handle<String> source,
366 unibrow::CharacterStream* stream,
367 int start_position, int end_position,
368 ParserLanguage language);
369
370
371 // Different UTF16 buffers used to pull characters from. Based on input one of
372 // these will be initialized as the actual data source.
355 CharacterStreamUTF16Buffer char_stream_buffer_; 373 CharacterStreamUTF16Buffer char_stream_buffer_;
356 TwoByteStringUTF16Buffer two_byte_string_buffer_; 374 ExternalStringUTF16Buffer<ExternalTwoByteString, uint16_t>
375 two_byte_string_buffer_;
376 ExternalStringUTF16Buffer<ExternalAsciiString, char> ascii_string_buffer_;
357 377
358 // Source. 378 // Source. Will point to one of the buffers declared above.
359 UTF16Buffer* source_; 379 UTF16Buffer* source_;
360 int position_; 380
381 // Used to convert the source string into a character stream when a stream
382 // is not passed to the scanner.
383 SafeStringInputBuffer safe_string_input_buffer_;
361 384
362 // Buffer to hold literal values (identifiers, strings, numbers) 385 // Buffer to hold literal values (identifiers, strings, numbers)
363 // using 0-terminated UTF-8 encoding. 386 // using 0-terminated UTF-8 encoding.
364 UTF8Buffer literal_buffer_1_; 387 UTF8Buffer literal_buffer_1_;
365 UTF8Buffer literal_buffer_2_; 388 UTF8Buffer literal_buffer_2_;
366 389
367 bool stack_overflow_; 390 bool stack_overflow_;
368 static StaticResource<Utf8Decoder> utf8_decoder_; 391 static StaticResource<Utf8Decoder> utf8_decoder_;
369 392
370 // One Unicode character look-ahead; c0_ < 0 at the end of the input. 393 // One Unicode character look-ahead; c0_ < 0 at the end of the input.
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after
453 uc32 ScanHexEscape(uc32 c, int length); 476 uc32 ScanHexEscape(uc32 c, int length);
454 uc32 ScanOctalEscape(uc32 c, int length); 477 uc32 ScanOctalEscape(uc32 c, int length);
455 void ScanEscape(); 478 void ScanEscape();
456 Token::Value ScanString(); 479 Token::Value ScanString();
457 480
458 // Scans a possible HTML comment -- begins with '<!'. 481 // Scans a possible HTML comment -- begins with '<!'.
459 Token::Value ScanHtmlComment(); 482 Token::Value ScanHtmlComment();
460 483
461 // Return the current source position. 484 // Return the current source position.
462 int source_pos() { 485 int source_pos() {
463 return source_->pos() - kCharacterLookaheadBufferSize + position_; 486 return source_->pos() - kCharacterLookaheadBufferSize;
464 } 487 }
465 488
466 // Decodes a unicode escape-sequence which is part of an identifier. 489 // Decodes a unicode escape-sequence which is part of an identifier.
467 // If the escape sequence cannot be decoded the result is kBadRune. 490 // If the escape sequence cannot be decoded the result is kBadRune.
468 uc32 ScanIdentifierUnicodeEscape(); 491 uc32 ScanIdentifierUnicodeEscape();
469 }; 492 };
470 493
471 } } // namespace v8::internal 494 } } // namespace v8::internal
472 495
473 #endif // V8_SCANNER_H_ 496 #endif // V8_SCANNER_H_
OLDNEW
« no previous file with comments | « src/parser.cc ('k') | src/scanner.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698