Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(127)

Side by Side Diff: src/scanner.h

Issue 2832018: Made scanner follow coding style. (Closed)
Patch Set: Created 10 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | src/scanner.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 136 matching lines...) Expand 10 before | Expand all | Expand 10 after
147 // Incrementally recognize keywords. 147 // Incrementally recognize keywords.
148 // 148 //
149 // Recognized keywords: 149 // Recognized keywords:
150 // break case catch const* continue debugger* default delete do else 150 // break case catch const* continue debugger* default delete do else
151 // finally false for function if in instanceof native* new null 151 // finally false for function if in instanceof native* new null
152 // return switch this throw true try typeof var void while with 152 // return switch this throw true try typeof var void while with
153 // 153 //
154 // *: Actually "future reserved keywords". These are the only ones we 154 // *: Actually "future reserved keywords". These are the only ones we
155 // recognized, the remaining are allowed as identifiers. 155 // recognized, the remaining are allowed as identifiers.
156 public: 156 public:
157 KeywordMatcher() : state_(INITIAL), token_(Token::IDENTIFIER) {} 157 KeywordMatcher()
158 : state_(INITIAL),
159 token_(Token::IDENTIFIER),
160 keyword_(NULL),
161 counter_(0),
162 keyword_token_(Token::ILLEGAL) {}
158 163
159 Token::Value token() { return token_; } 164 Token::Value token() { return token_; }
160 165
161 inline void AddChar(uc32 input) { 166 inline void AddChar(uc32 input) {
162 if (state_ != UNMATCHABLE) { 167 if (state_ != UNMATCHABLE) {
163 Step(input); 168 Step(input);
164 } 169 }
165 } 170 }
166 171
167 void Fail() { 172 void Fail() {
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
199 }; 204 };
200 205
201 // Range of possible first characters of a keyword. 206 // Range of possible first characters of a keyword.
202 static const unsigned int kFirstCharRangeMin = 'b'; 207 static const unsigned int kFirstCharRangeMin = 'b';
203 static const unsigned int kFirstCharRangeMax = 'w'; 208 static const unsigned int kFirstCharRangeMax = 'w';
204 static const unsigned int kFirstCharRangeLength = 209 static const unsigned int kFirstCharRangeLength =
205 kFirstCharRangeMax - kFirstCharRangeMin + 1; 210 kFirstCharRangeMax - kFirstCharRangeMin + 1;
206 // State map for first keyword character range. 211 // State map for first keyword character range.
207 static FirstState first_states_[kFirstCharRangeLength]; 212 static FirstState first_states_[kFirstCharRangeLength];
208 213
209 // Current state.
210 State state_;
211 // Token for currently added characters.
212 Token::Value token_;
213
214 // Matching a specific keyword string (there is only one possible valid
215 // keyword with the current prefix).
216 const char* keyword_;
217 int counter_;
218 Token::Value keyword_token_;
219
220 // If input equals keyword's character at position, continue matching keyword 214 // If input equals keyword's character at position, continue matching keyword
221 // from that position. 215 // from that position.
222 inline bool MatchKeywordStart(uc32 input, 216 inline bool MatchKeywordStart(uc32 input,
223 const char* keyword, 217 const char* keyword,
224 int position, 218 int position,
225 Token::Value token_if_match) { 219 Token::Value token_if_match) {
226 if (input == keyword[position]) { 220 if (input == keyword[position]) {
227 state_ = KEYWORD_PREFIX; 221 state_ = KEYWORD_PREFIX;
228 this->keyword_ = keyword; 222 this->keyword_ = keyword;
229 this->counter_ = position + 1; 223 this->counter_ = position + 1;
230 this->keyword_token_ = token_if_match; 224 this->keyword_token_ = token_if_match;
231 return true; 225 return true;
232 } 226 }
233 return false; 227 return false;
234 } 228 }
235 229
236 // If input equals match character, transition to new state and return true. 230 // If input equals match character, transition to new state and return true.
237 inline bool MatchState(uc32 input, char match, State new_state) { 231 inline bool MatchState(uc32 input, char match, State new_state) {
238 if (input == match) { 232 if (input == match) {
239 state_ = new_state; 233 state_ = new_state;
240 return true; 234 return true;
241 } 235 }
242 return false; 236 return false;
243 } 237 }
244 238
245 inline bool MatchKeyword(uc32 input, 239 inline bool MatchKeyword(uc32 input,
246 char match, 240 char match,
247 State new_state, 241 State new_state,
248 Token::Value keyword_token) { 242 Token::Value keyword_token) {
249 if (input == match) { // Matched "do". 243 if (input != match) {
250 state_ = new_state; 244 return false;
251 token_ = keyword_token;
252 return true;
253 } 245 }
254 return false; 246 state_ = new_state;
247 token_ = keyword_token;
248 return true;
255 } 249 }
256 250
257 void Step(uc32 input); 251 void Step(uc32 input);
252
253 // Current state.
254 State state_;
255 // Token for currently added characters.
256 Token::Value token_;
257
258 // Matching a specific keyword string (there is only one possible valid
259 // keyword with the current prefix).
260 const char* keyword_;
261 int counter_;
262 Token::Value keyword_token_;
258 }; 263 };
259 264
260 265
261 enum ParserMode { PARSE, PREPARSE }; 266 enum ParserMode { PARSE, PREPARSE };
262 enum ParserLanguage { JAVASCRIPT, JSON }; 267 enum ParserLanguage { JAVASCRIPT, JSON };
263 268
264 269
265 class Scanner { 270 class Scanner {
266 public: 271 public:
267 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; 272 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;
(...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after
355 360
356 static unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart; 361 static unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart;
357 static unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart; 362 static unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart;
358 static unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator; 363 static unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator;
359 static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace; 364 static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace;
360 365
361 static const int kCharacterLookaheadBufferSize = 1; 366 static const int kCharacterLookaheadBufferSize = 1;
362 static const int kNoEndPosition = 1; 367 static const int kNoEndPosition = 1;
363 368
364 private: 369 private:
365 void Init(Handle<String> source,
366 unibrow::CharacterStream* stream,
367 int start_position, int end_position,
368 ParserLanguage language);
369
370
371 // Different UTF16 buffers used to pull characters from. Based on input one of
372 // these will be initialized as the actual data source.
373 CharacterStreamUTF16Buffer char_stream_buffer_;
374 ExternalStringUTF16Buffer<ExternalTwoByteString, uint16_t>
375 two_byte_string_buffer_;
376 ExternalStringUTF16Buffer<ExternalAsciiString, char> ascii_string_buffer_;
377
378 // Source. Will point to one of the buffers declared above.
379 UTF16Buffer* source_;
380
381 // Used to convert the source string into a character stream when a stream
382 // is not passed to the scanner.
383 SafeStringInputBuffer safe_string_input_buffer_;
384
385 // Buffer to hold literal values (identifiers, strings, numbers)
386 // using 0-terminated UTF-8 encoding.
387 UTF8Buffer literal_buffer_1_;
388 UTF8Buffer literal_buffer_2_;
389
390 bool stack_overflow_;
391 static StaticResource<Utf8Decoder> utf8_decoder_;
392
393 // One Unicode character look-ahead; c0_ < 0 at the end of the input.
394 uc32 c0_;
395
396 // The current and look-ahead token. 370 // The current and look-ahead token.
397 struct TokenDesc { 371 struct TokenDesc {
398 Token::Value token; 372 Token::Value token;
399 Location location; 373 Location location;
400 UTF8Buffer* literal_buffer; 374 UTF8Buffer* literal_buffer;
401 }; 375 };
402 376
403 TokenDesc current_; // desc for current token (as returned by Next()) 377 void Init(Handle<String> source,
404 TokenDesc next_; // desc for next token (one token look-ahead) 378 unibrow::CharacterStream* stream,
405 bool has_line_terminator_before_next_; 379 int start_position, int end_position,
406 bool is_pre_parsing_; 380 ParserLanguage language);
407 bool is_parsing_json_;
408 381
409 // Literal buffer support 382 // Literal buffer support
410 void StartLiteral(); 383 void StartLiteral();
411 void AddChar(uc32 ch); 384 void AddChar(uc32 ch);
412 void AddCharAdvance(); 385 void AddCharAdvance();
413 void TerminateLiteral(); 386 void TerminateLiteral();
414 387
415 // Low-level scanning support. 388 // Low-level scanning support.
416 void Advance() { c0_ = source_->Advance(); } 389 void Advance() { c0_ = source_->Advance(); }
417 void PushBack(uc32 ch) { 390 void PushBack(uc32 ch) {
418 source_->PushBack(ch); 391 source_->PushBack(ch);
419 c0_ = ch; 392 c0_ = ch;
420 } 393 }
421 394
422 bool SkipWhiteSpace() { 395 bool SkipWhiteSpace() {
423 if (is_parsing_json_) { 396 if (is_parsing_json_) {
424 return SkipJsonWhiteSpace(); 397 return SkipJsonWhiteSpace();
425 } else { 398 } else {
426 return SkipJavaScriptWhiteSpace(); 399 return SkipJavaScriptWhiteSpace();
427 } 400 }
428 } 401 }
402
429 bool SkipJavaScriptWhiteSpace(); 403 bool SkipJavaScriptWhiteSpace();
430 bool SkipJsonWhiteSpace(); 404 bool SkipJsonWhiteSpace();
431 Token::Value SkipSingleLineComment(); 405 Token::Value SkipSingleLineComment();
432 Token::Value SkipMultiLineComment(); 406 Token::Value SkipMultiLineComment();
433 407
434 inline Token::Value Select(Token::Value tok); 408 inline Token::Value Select(Token::Value tok);
435 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_); 409 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_);
436 410
437 inline void Scan() { 411 inline void Scan() {
438 if (is_parsing_json_) { 412 if (is_parsing_json_) {
(...skipping 14 matching lines...) Expand all
453 // carrige-return, newline and space. 427 // carrige-return, newline and space.
454 void ScanJson(); 428 void ScanJson();
455 429
456 // A JSON number (production JSONNumber) is a subset of the valid JavaScript 430 // A JSON number (production JSONNumber) is a subset of the valid JavaScript
457 // decimal number literals. 431 // decimal number literals.
458 // It includes an optional minus sign, must have at least one 432 // It includes an optional minus sign, must have at least one
459 // digit before and after a decimal point, may not have prefixed zeros (unless 433 // digit before and after a decimal point, may not have prefixed zeros (unless
460 // the integer part is zero), and may include an exponent part (e.g., "e-10"). 434 // the integer part is zero), and may include an exponent part (e.g., "e-10").
461 // Hexadecimal and octal numbers are not allowed. 435 // Hexadecimal and octal numbers are not allowed.
462 Token::Value ScanJsonNumber(); 436 Token::Value ScanJsonNumber();
437
463 // A JSON string (production JSONString) is subset of valid JavaScript string 438 // A JSON string (production JSONString) is subset of valid JavaScript string
464 // literals. The string must only be double-quoted (not single-quoted), and 439 // literals. The string must only be double-quoted (not single-quoted), and
465 // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and 440 // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and
466 // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid. 441 // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid.
467 Token::Value ScanJsonString(); 442 Token::Value ScanJsonString();
443
468 // Used to recognizes one of the literals "true", "false", or "null". These 444 // Used to recognizes one of the literals "true", "false", or "null". These
469 // are the only valid JSON identifiers (productions JSONBooleanLiteral, 445 // are the only valid JSON identifiers (productions JSONBooleanLiteral,
470 // JSONNullLiteral). 446 // JSONNullLiteral).
471 Token::Value ScanJsonIdentifier(const char* text, Token::Value token); 447 Token::Value ScanJsonIdentifier(const char* text, Token::Value token);
472 448
473 void ScanDecimalDigits(); 449 void ScanDecimalDigits();
474 Token::Value ScanNumber(bool seen_period); 450 Token::Value ScanNumber(bool seen_period);
475 Token::Value ScanIdentifier(); 451 Token::Value ScanIdentifier();
476 uc32 ScanHexEscape(uc32 c, int length); 452 uc32 ScanHexEscape(uc32 c, int length);
477 uc32 ScanOctalEscape(uc32 c, int length); 453 uc32 ScanOctalEscape(uc32 c, int length);
478 void ScanEscape(); 454 void ScanEscape();
479 Token::Value ScanString(); 455 Token::Value ScanString();
480 456
481 // Scans a possible HTML comment -- begins with '<!'. 457 // Scans a possible HTML comment -- begins with '<!'.
482 Token::Value ScanHtmlComment(); 458 Token::Value ScanHtmlComment();
483 459
484 // Return the current source position. 460 // Return the current source position.
485 int source_pos() { 461 int source_pos() {
486 return source_->pos() - kCharacterLookaheadBufferSize; 462 return source_->pos() - kCharacterLookaheadBufferSize;
487 } 463 }
488 464
489 // Decodes a unicode escape-sequence which is part of an identifier. 465 // Decodes a unicode escape-sequence which is part of an identifier.
490 // If the escape sequence cannot be decoded the result is kBadRune. 466 // If the escape sequence cannot be decoded the result is kBadRune.
491 uc32 ScanIdentifierUnicodeEscape(); 467 uc32 ScanIdentifierUnicodeEscape();
468
469 TokenDesc current_; // desc for current token (as returned by Next())
470 TokenDesc next_; // desc for next token (one token look-ahead)
471 bool has_line_terminator_before_next_;
472 bool is_pre_parsing_;
473 bool is_parsing_json_;
474
475 // Different UTF16 buffers used to pull characters from. Based on input one of
476 // these will be initialized as the actual data source.
477 CharacterStreamUTF16Buffer char_stream_buffer_;
478 ExternalStringUTF16Buffer<ExternalTwoByteString, uint16_t>
479 two_byte_string_buffer_;
480 ExternalStringUTF16Buffer<ExternalAsciiString, char> ascii_string_buffer_;
481
482 // Source. Will point to one of the buffers declared above.
483 UTF16Buffer* source_;
484
485 // Used to convert the source string into a character stream when a stream
486 // is not passed to the scanner.
487 SafeStringInputBuffer safe_string_input_buffer_;
488
489 // Buffer to hold literal values (identifiers, strings, numbers)
490 // using 0-terminated UTF-8 encoding.
491 UTF8Buffer literal_buffer_1_;
492 UTF8Buffer literal_buffer_2_;
493
494 bool stack_overflow_;
495 static StaticResource<Utf8Decoder> utf8_decoder_;
496
497 // One Unicode character look-ahead; c0_ < 0 at the end of the input.
498 uc32 c0_;
492 }; 499 };
493 500
494 } } // namespace v8::internal 501 } } // namespace v8::internal
495 502
496 #endif // V8_SCANNER_H_ 503 #endif // V8_SCANNER_H_
OLDNEW
« no previous file with comments | « no previous file | src/scanner.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698