| OLD | NEW |
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 136 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 147 // Incrementally recognize keywords. | 147 // Incrementally recognize keywords. |
| 148 // | 148 // |
| 149 // Recognized keywords: | 149 // Recognized keywords: |
| 150 // break case catch const* continue debugger* default delete do else | 150 // break case catch const* continue debugger* default delete do else |
| 151 // finally false for function if in instanceof native* new null | 151 // finally false for function if in instanceof native* new null |
| 152 // return switch this throw true try typeof var void while with | 152 // return switch this throw true try typeof var void while with |
| 153 // | 153 // |
| 154 // *: Actually "future reserved keywords". These are the only ones we | 154 // *: Actually "future reserved keywords". These are the only ones we |
| 155 // recognized, the remaining are allowed as identifiers. | 155 // recognized, the remaining are allowed as identifiers. |
| 156 public: | 156 public: |
| 157 KeywordMatcher() : state_(INITIAL), token_(Token::IDENTIFIER) {} | 157 KeywordMatcher() |
| 158 : state_(INITIAL), |
| 159 token_(Token::IDENTIFIER), |
| 160 keyword_(NULL), |
| 161 counter_(0), |
| 162 keyword_token_(Token::ILLEGAL) {} |
| 158 | 163 |
| 159 Token::Value token() { return token_; } | 164 Token::Value token() { return token_; } |
| 160 | 165 |
| 161 inline void AddChar(uc32 input) { | 166 inline void AddChar(uc32 input) { |
| 162 if (state_ != UNMATCHABLE) { | 167 if (state_ != UNMATCHABLE) { |
| 163 Step(input); | 168 Step(input); |
| 164 } | 169 } |
| 165 } | 170 } |
| 166 | 171 |
| 167 void Fail() { | 172 void Fail() { |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 199 }; | 204 }; |
| 200 | 205 |
| 201 // Range of possible first characters of a keyword. | 206 // Range of possible first characters of a keyword. |
| 202 static const unsigned int kFirstCharRangeMin = 'b'; | 207 static const unsigned int kFirstCharRangeMin = 'b'; |
| 203 static const unsigned int kFirstCharRangeMax = 'w'; | 208 static const unsigned int kFirstCharRangeMax = 'w'; |
| 204 static const unsigned int kFirstCharRangeLength = | 209 static const unsigned int kFirstCharRangeLength = |
| 205 kFirstCharRangeMax - kFirstCharRangeMin + 1; | 210 kFirstCharRangeMax - kFirstCharRangeMin + 1; |
| 206 // State map for first keyword character range. | 211 // State map for first keyword character range. |
| 207 static FirstState first_states_[kFirstCharRangeLength]; | 212 static FirstState first_states_[kFirstCharRangeLength]; |
| 208 | 213 |
| 209 // Current state. | |
| 210 State state_; | |
| 211 // Token for currently added characters. | |
| 212 Token::Value token_; | |
| 213 | |
| 214 // Matching a specific keyword string (there is only one possible valid | |
| 215 // keyword with the current prefix). | |
| 216 const char* keyword_; | |
| 217 int counter_; | |
| 218 Token::Value keyword_token_; | |
| 219 | |
| 220 // If input equals keyword's character at position, continue matching keyword | 214 // If input equals keyword's character at position, continue matching keyword |
| 221 // from that position. | 215 // from that position. |
| 222 inline bool MatchKeywordStart(uc32 input, | 216 inline bool MatchKeywordStart(uc32 input, |
| 223 const char* keyword, | 217 const char* keyword, |
| 224 int position, | 218 int position, |
| 225 Token::Value token_if_match) { | 219 Token::Value token_if_match) { |
| 226 if (input == keyword[position]) { | 220 if (input == keyword[position]) { |
| 227 state_ = KEYWORD_PREFIX; | 221 state_ = KEYWORD_PREFIX; |
| 228 this->keyword_ = keyword; | 222 this->keyword_ = keyword; |
| 229 this->counter_ = position + 1; | 223 this->counter_ = position + 1; |
| 230 this->keyword_token_ = token_if_match; | 224 this->keyword_token_ = token_if_match; |
| 231 return true; | 225 return true; |
| 232 } | 226 } |
| 233 return false; | 227 return false; |
| 234 } | 228 } |
| 235 | 229 |
| 236 // If input equals match character, transition to new state and return true. | 230 // If input equals match character, transition to new state and return true. |
| 237 inline bool MatchState(uc32 input, char match, State new_state) { | 231 inline bool MatchState(uc32 input, char match, State new_state) { |
| 238 if (input == match) { | 232 if (input == match) { |
| 239 state_ = new_state; | 233 state_ = new_state; |
| 240 return true; | 234 return true; |
| 241 } | 235 } |
| 242 return false; | 236 return false; |
| 243 } | 237 } |
| 244 | 238 |
| 245 inline bool MatchKeyword(uc32 input, | 239 inline bool MatchKeyword(uc32 input, |
| 246 char match, | 240 char match, |
| 247 State new_state, | 241 State new_state, |
| 248 Token::Value keyword_token) { | 242 Token::Value keyword_token) { |
| 249 if (input == match) { // Matched "do". | 243 if (input != match) { |
| 250 state_ = new_state; | 244 return false; |
| 251 token_ = keyword_token; | |
| 252 return true; | |
| 253 } | 245 } |
| 254 return false; | 246 state_ = new_state; |
| 247 token_ = keyword_token; |
| 248 return true; |
| 255 } | 249 } |
| 256 | 250 |
| 257 void Step(uc32 input); | 251 void Step(uc32 input); |
| 252 |
| 253 // Current state. |
| 254 State state_; |
| 255 // Token for currently added characters. |
| 256 Token::Value token_; |
| 257 |
| 258 // Matching a specific keyword string (there is only one possible valid |
| 259 // keyword with the current prefix). |
| 260 const char* keyword_; |
| 261 int counter_; |
| 262 Token::Value keyword_token_; |
| 258 }; | 263 }; |
| 259 | 264 |
| 260 | 265 |
| 261 enum ParserMode { PARSE, PREPARSE }; | 266 enum ParserMode { PARSE, PREPARSE }; |
| 262 enum ParserLanguage { JAVASCRIPT, JSON }; | 267 enum ParserLanguage { JAVASCRIPT, JSON }; |
| 263 | 268 |
| 264 | 269 |
| 265 class Scanner { | 270 class Scanner { |
| 266 public: | 271 public: |
| 267 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; | 272 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; |
| (...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 355 | 360 |
| 356 static unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart; | 361 static unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart; |
| 357 static unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart; | 362 static unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart; |
| 358 static unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator; | 363 static unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator; |
| 359 static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace; | 364 static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace; |
| 360 | 365 |
| 361 static const int kCharacterLookaheadBufferSize = 1; | 366 static const int kCharacterLookaheadBufferSize = 1; |
| 362 static const int kNoEndPosition = 1; | 367 static const int kNoEndPosition = 1; |
| 363 | 368 |
| 364 private: | 369 private: |
| 365 void Init(Handle<String> source, | |
| 366 unibrow::CharacterStream* stream, | |
| 367 int start_position, int end_position, | |
| 368 ParserLanguage language); | |
| 369 | |
| 370 | |
| 371 // Different UTF16 buffers used to pull characters from. Based on input one of | |
| 372 // these will be initialized as the actual data source. | |
| 373 CharacterStreamUTF16Buffer char_stream_buffer_; | |
| 374 ExternalStringUTF16Buffer<ExternalTwoByteString, uint16_t> | |
| 375 two_byte_string_buffer_; | |
| 376 ExternalStringUTF16Buffer<ExternalAsciiString, char> ascii_string_buffer_; | |
| 377 | |
| 378 // Source. Will point to one of the buffers declared above. | |
| 379 UTF16Buffer* source_; | |
| 380 | |
| 381 // Used to convert the source string into a character stream when a stream | |
| 382 // is not passed to the scanner. | |
| 383 SafeStringInputBuffer safe_string_input_buffer_; | |
| 384 | |
| 385 // Buffer to hold literal values (identifiers, strings, numbers) | |
| 386 // using 0-terminated UTF-8 encoding. | |
| 387 UTF8Buffer literal_buffer_1_; | |
| 388 UTF8Buffer literal_buffer_2_; | |
| 389 | |
| 390 bool stack_overflow_; | |
| 391 static StaticResource<Utf8Decoder> utf8_decoder_; | |
| 392 | |
| 393 // One Unicode character look-ahead; c0_ < 0 at the end of the input. | |
| 394 uc32 c0_; | |
| 395 | |
| 396 // The current and look-ahead token. | 370 // The current and look-ahead token. |
| 397 struct TokenDesc { | 371 struct TokenDesc { |
| 398 Token::Value token; | 372 Token::Value token; |
| 399 Location location; | 373 Location location; |
| 400 UTF8Buffer* literal_buffer; | 374 UTF8Buffer* literal_buffer; |
| 401 }; | 375 }; |
| 402 | 376 |
| 403 TokenDesc current_; // desc for current token (as returned by Next()) | 377 void Init(Handle<String> source, |
| 404 TokenDesc next_; // desc for next token (one token look-ahead) | 378 unibrow::CharacterStream* stream, |
| 405 bool has_line_terminator_before_next_; | 379 int start_position, int end_position, |
| 406 bool is_pre_parsing_; | 380 ParserLanguage language); |
| 407 bool is_parsing_json_; | |
| 408 | 381 |
| 409 // Literal buffer support | 382 // Literal buffer support |
| 410 void StartLiteral(); | 383 void StartLiteral(); |
| 411 void AddChar(uc32 ch); | 384 void AddChar(uc32 ch); |
| 412 void AddCharAdvance(); | 385 void AddCharAdvance(); |
| 413 void TerminateLiteral(); | 386 void TerminateLiteral(); |
| 414 | 387 |
| 415 // Low-level scanning support. | 388 // Low-level scanning support. |
| 416 void Advance() { c0_ = source_->Advance(); } | 389 void Advance() { c0_ = source_->Advance(); } |
| 417 void PushBack(uc32 ch) { | 390 void PushBack(uc32 ch) { |
| 418 source_->PushBack(ch); | 391 source_->PushBack(ch); |
| 419 c0_ = ch; | 392 c0_ = ch; |
| 420 } | 393 } |
| 421 | 394 |
| 422 bool SkipWhiteSpace() { | 395 bool SkipWhiteSpace() { |
| 423 if (is_parsing_json_) { | 396 if (is_parsing_json_) { |
| 424 return SkipJsonWhiteSpace(); | 397 return SkipJsonWhiteSpace(); |
| 425 } else { | 398 } else { |
| 426 return SkipJavaScriptWhiteSpace(); | 399 return SkipJavaScriptWhiteSpace(); |
| 427 } | 400 } |
| 428 } | 401 } |
| 402 |
| 429 bool SkipJavaScriptWhiteSpace(); | 403 bool SkipJavaScriptWhiteSpace(); |
| 430 bool SkipJsonWhiteSpace(); | 404 bool SkipJsonWhiteSpace(); |
| 431 Token::Value SkipSingleLineComment(); | 405 Token::Value SkipSingleLineComment(); |
| 432 Token::Value SkipMultiLineComment(); | 406 Token::Value SkipMultiLineComment(); |
| 433 | 407 |
| 434 inline Token::Value Select(Token::Value tok); | 408 inline Token::Value Select(Token::Value tok); |
| 435 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_); | 409 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_); |
| 436 | 410 |
| 437 inline void Scan() { | 411 inline void Scan() { |
| 438 if (is_parsing_json_) { | 412 if (is_parsing_json_) { |
| (...skipping 14 matching lines...) Expand all Loading... |
| 453 // carrige-return, newline and space. | 427 // carrige-return, newline and space. |
| 454 void ScanJson(); | 428 void ScanJson(); |
| 455 | 429 |
| 456 // A JSON number (production JSONNumber) is a subset of the valid JavaScript | 430 // A JSON number (production JSONNumber) is a subset of the valid JavaScript |
| 457 // decimal number literals. | 431 // decimal number literals. |
| 458 // It includes an optional minus sign, must have at least one | 432 // It includes an optional minus sign, must have at least one |
| 459 // digit before and after a decimal point, may not have prefixed zeros (unless | 433 // digit before and after a decimal point, may not have prefixed zeros (unless |
| 460 // the integer part is zero), and may include an exponent part (e.g., "e-10"). | 434 // the integer part is zero), and may include an exponent part (e.g., "e-10"). |
| 461 // Hexadecimal and octal numbers are not allowed. | 435 // Hexadecimal and octal numbers are not allowed. |
| 462 Token::Value ScanJsonNumber(); | 436 Token::Value ScanJsonNumber(); |
| 437 |
| 463 // A JSON string (production JSONString) is subset of valid JavaScript string | 438 // A JSON string (production JSONString) is subset of valid JavaScript string |
| 464 // literals. The string must only be double-quoted (not single-quoted), and | 439 // literals. The string must only be double-quoted (not single-quoted), and |
| 465 // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and | 440 // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and |
| 466 // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid. | 441 // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid. |
| 467 Token::Value ScanJsonString(); | 442 Token::Value ScanJsonString(); |
| 443 |
| 468 // Used to recognizes one of the literals "true", "false", or "null". These | 444 // Used to recognizes one of the literals "true", "false", or "null". These |
| 469 // are the only valid JSON identifiers (productions JSONBooleanLiteral, | 445 // are the only valid JSON identifiers (productions JSONBooleanLiteral, |
| 470 // JSONNullLiteral). | 446 // JSONNullLiteral). |
| 471 Token::Value ScanJsonIdentifier(const char* text, Token::Value token); | 447 Token::Value ScanJsonIdentifier(const char* text, Token::Value token); |
| 472 | 448 |
| 473 void ScanDecimalDigits(); | 449 void ScanDecimalDigits(); |
| 474 Token::Value ScanNumber(bool seen_period); | 450 Token::Value ScanNumber(bool seen_period); |
| 475 Token::Value ScanIdentifier(); | 451 Token::Value ScanIdentifier(); |
| 476 uc32 ScanHexEscape(uc32 c, int length); | 452 uc32 ScanHexEscape(uc32 c, int length); |
| 477 uc32 ScanOctalEscape(uc32 c, int length); | 453 uc32 ScanOctalEscape(uc32 c, int length); |
| 478 void ScanEscape(); | 454 void ScanEscape(); |
| 479 Token::Value ScanString(); | 455 Token::Value ScanString(); |
| 480 | 456 |
| 481 // Scans a possible HTML comment -- begins with '<!'. | 457 // Scans a possible HTML comment -- begins with '<!'. |
| 482 Token::Value ScanHtmlComment(); | 458 Token::Value ScanHtmlComment(); |
| 483 | 459 |
| 484 // Return the current source position. | 460 // Return the current source position. |
| 485 int source_pos() { | 461 int source_pos() { |
| 486 return source_->pos() - kCharacterLookaheadBufferSize; | 462 return source_->pos() - kCharacterLookaheadBufferSize; |
| 487 } | 463 } |
| 488 | 464 |
| 489 // Decodes a unicode escape-sequence which is part of an identifier. | 465 // Decodes a unicode escape-sequence which is part of an identifier. |
| 490 // If the escape sequence cannot be decoded the result is kBadRune. | 466 // If the escape sequence cannot be decoded the result is kBadRune. |
| 491 uc32 ScanIdentifierUnicodeEscape(); | 467 uc32 ScanIdentifierUnicodeEscape(); |
| 468 |
| 469 TokenDesc current_; // desc for current token (as returned by Next()) |
| 470 TokenDesc next_; // desc for next token (one token look-ahead) |
| 471 bool has_line_terminator_before_next_; |
| 472 bool is_pre_parsing_; |
| 473 bool is_parsing_json_; |
| 474 |
| 475 // Different UTF16 buffers used to pull characters from. Based on input one of |
| 476 // these will be initialized as the actual data source. |
| 477 CharacterStreamUTF16Buffer char_stream_buffer_; |
| 478 ExternalStringUTF16Buffer<ExternalTwoByteString, uint16_t> |
| 479 two_byte_string_buffer_; |
| 480 ExternalStringUTF16Buffer<ExternalAsciiString, char> ascii_string_buffer_; |
| 481 |
| 482 // Source. Will point to one of the buffers declared above. |
| 483 UTF16Buffer* source_; |
| 484 |
| 485 // Used to convert the source string into a character stream when a stream |
| 486 // is not passed to the scanner. |
| 487 SafeStringInputBuffer safe_string_input_buffer_; |
| 488 |
| 489 // Buffer to hold literal values (identifiers, strings, numbers) |
| 490 // using 0-terminated UTF-8 encoding. |
| 491 UTF8Buffer literal_buffer_1_; |
| 492 UTF8Buffer literal_buffer_2_; |
| 493 |
| 494 bool stack_overflow_; |
| 495 static StaticResource<Utf8Decoder> utf8_decoder_; |
| 496 |
| 497 // One Unicode character look-ahead; c0_ < 0 at the end of the input. |
| 498 uc32 c0_; |
| 492 }; | 499 }; |
| 493 | 500 |
| 494 } } // namespace v8::internal | 501 } } // namespace v8::internal |
| 495 | 502 |
| 496 #endif // V8_SCANNER_H_ | 503 #endif // V8_SCANNER_H_ |
| OLD | NEW |