Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2010 the V8 project authors. All rights reserved. | 1 // Copyright 2010 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 309 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 320 | 320 |
| 321 // One Unicode character look-ahead; c0_ < 0 at the end of the input. | 321 // One Unicode character look-ahead; c0_ < 0 at the end of the input. |
| 322 uc32 c0_; | 322 uc32 c0_; |
| 323 }; | 323 }; |
| 324 | 324 |
| 325 // ---------------------------------------------------------------------------- | 325 // ---------------------------------------------------------------------------- |
| 326 // JavaScriptScanner - base logic for JavaScript scanning. | 326 // JavaScriptScanner - base logic for JavaScript scanning. |
| 327 | 327 |
| 328 class JavaScriptScanner : public Scanner { | 328 class JavaScriptScanner : public Scanner { |
| 329 public: | 329 public: |
| 330 | |
| 331 enum LiteralType { | |
| 332 kLiteralNumber = 1, | |
| 333 kLiteralIdentifier = 2, | |
| 334 kLiteralString = 4, | |
| 335 kLiteralRegExp = 8, | |
| 336 kLiteralRegExpFlags = 16 | |
| 337 }; | |
| 338 | |
| 339 // More specialized literal scope. | |
|
Søren Thygesen Gjesse
2010/11/19 08:17:21
More specialized - in what sense?
Lasse Reichstein
2010/11/19 08:51:24
Elaborated.
| |
| 340 class LiteralScope { | |
| 341 public: | |
| 342 LiteralScope(JavaScriptScanner* self, LiteralType type) | |
| 343 : scanner_(self), complete_(false) { | |
| 344 if (scanner_->RecordsLiteral(type)) { | |
| 345 scanner_->StartLiteral(); | |
| 346 } | |
| 347 } | |
| 348 ~LiteralScope() { | |
| 349 if (!complete_) scanner_->DropLiteral(); | |
| 350 } | |
| 351 void Complete() { | |
| 352 scanner_->TerminateLiteral(); | |
| 353 complete_ = true; | |
| 354 } | |
| 355 | |
| 356 private: | |
| 357 JavaScriptScanner* scanner_; | |
| 358 bool complete_; | |
| 359 }; | |
| 360 | |
| 330 JavaScriptScanner(); | 361 JavaScriptScanner(); |
| 331 | 362 |
| 332 // Returns the next token. | 363 // Returns the next token. |
| 333 Token::Value Next(); | 364 Token::Value Next(); |
| 334 | 365 |
| 335 // Returns true if there was a line terminator before the peek'ed token. | 366 // Returns true if there was a line terminator before the peek'ed token. |
| 336 bool has_line_terminator_before_next() const { | 367 bool has_line_terminator_before_next() const { |
| 337 return has_line_terminator_before_next_; | 368 return has_line_terminator_before_next_; |
| 338 } | 369 } |
| 339 | 370 |
| 340 // Scans the input as a regular expression pattern, previous | 371 // Scans the input as a regular expression pattern, previous |
| 341 // character(s) must be /(=). Returns true if a pattern is scanned. | 372 // character(s) must be /(=). Returns true if a pattern is scanned. |
| 342 bool ScanRegExpPattern(bool seen_equal); | 373 bool ScanRegExpPattern(bool seen_equal); |
| 343 // Returns true if regexp flags are scanned (always since flags can | 374 // Returns true if regexp flags are scanned (always since flags can |
| 344 // be empty). | 375 // be empty). |
| 345 bool ScanRegExpFlags(); | 376 bool ScanRegExpFlags(); |
| 346 | 377 |
| 347 // Tells whether the buffer contains an identifier (no escapes). | 378 // Tells whether the buffer contains an identifier (no escapes). |
| 348 // Used for checking if a property name is an identifier. | 379 // Used for checking if a property name is an identifier. |
| 349 static bool IsIdentifier(unibrow::CharacterStream* buffer); | 380 static bool IsIdentifier(unibrow::CharacterStream* buffer); |
| 350 | 381 |
| 351 // Seek forward to the given position. This operation does not | 382 // Seek forward to the given position. This operation does not |
| 352 // work in general, for instance when there are pushed back | 383 // work in general, for instance when there are pushed back |
| 353 // characters, but works for seeking forward until simple delimiter | 384 // characters, but works for seeking forward until simple delimiter |
| 354 // tokens, which is what it is used for. | 385 // tokens, which is what it is used for. |
| 355 void SeekForward(int pos); | 386 void SeekForward(int pos); |
| 356 | 387 |
| 388 // Whether this scanner records the given literal type or not. | |
| 389 bool RecordsLiteral(LiteralType type) { | |
| 390 return (literal_flags_ & type) != 0; | |
| 391 } | |
| 392 | |
| 357 protected: | 393 protected: |
| 358 bool SkipWhiteSpace(); | 394 bool SkipWhiteSpace(); |
| 359 Token::Value SkipSingleLineComment(); | 395 Token::Value SkipSingleLineComment(); |
| 360 Token::Value SkipMultiLineComment(); | 396 Token::Value SkipMultiLineComment(); |
| 361 | 397 |
| 362 // Scans a single JavaScript token. | 398 // Scans a single JavaScript token. |
| 363 void Scan(); | 399 void Scan(); |
| 364 | 400 |
| 365 void ScanDecimalDigits(); | 401 void ScanDecimalDigits(); |
| 366 Token::Value ScanNumber(bool seen_period); | 402 Token::Value ScanNumber(bool seen_period); |
| 367 Token::Value ScanIdentifier(); | 403 Token::Value ScanIdentifierOrKeyword(); |
| 404 Token::Value ScanIdentifierSuffix(LiteralScope* literal); | |
| 368 | 405 |
| 369 void ScanEscape(); | 406 void ScanEscape(); |
| 370 Token::Value ScanString(); | 407 Token::Value ScanString(); |
| 371 | 408 |
| 372 // Scans a possible HTML comment -- begins with '<!'. | 409 // Scans a possible HTML comment -- begins with '<!'. |
| 373 Token::Value ScanHtmlComment(); | 410 Token::Value ScanHtmlComment(); |
| 374 | 411 |
| 375 // Decodes a unicode escape-sequence which is part of an identifier. | 412 // Decodes a unicode escape-sequence which is part of an identifier. |
| 376 // If the escape sequence cannot be decoded the result is kBadChar. | 413 // If the escape sequence cannot be decoded the result is kBadChar. |
| 377 uc32 ScanIdentifierUnicodeEscape(); | 414 uc32 ScanIdentifierUnicodeEscape(); |
| 378 | 415 |
| 416 int literal_flags_; | |
| 379 bool has_line_terminator_before_next_; | 417 bool has_line_terminator_before_next_; |
| 380 }; | 418 }; |
| 381 | 419 |
| 382 | 420 |
| 383 // ---------------------------------------------------------------------------- | 421 // ---------------------------------------------------------------------------- |
| 384 // Keyword matching state machine. | 422 // Keyword matching state machine. |
| 385 | 423 |
| 386 class KeywordMatcher { | 424 class KeywordMatcher { |
| 387 // Incrementally recognize keywords. | 425 // Incrementally recognize keywords. |
| 388 // | 426 // |
| 389 // Recognized keywords: | 427 // Recognized keywords: |
| 390 // break case catch const* continue debugger* default delete do else | 428 // break case catch const* continue debugger* default delete do else |
| 391 // finally false for function if in instanceof native* new null | 429 // finally false for function if in instanceof native* new null |
| 392 // return switch this throw true try typeof var void while with | 430 // return switch this throw true try typeof var void while with |
| 393 // | 431 // |
| 394 // *: Actually "future reserved keywords". These are the only ones we | 432 // *: Actually "future reserved keywords". These are the only ones we |
| 395 // recognize, the remaining are allowed as identifiers. | 433 // recognize, the remaining are allowed as identifiers. |
| 396 // In ES5 strict mode, we should disallow all reserved keywords. | 434 // In ES5 strict mode, we should disallow all reserved keywords. |
| 397 public: | 435 public: |
| 398 KeywordMatcher() | 436 KeywordMatcher() |
| 399 : state_(INITIAL), | 437 : state_(INITIAL), |
| 400 token_(Token::IDENTIFIER), | 438 token_(Token::IDENTIFIER), |
| 401 keyword_(NULL), | 439 keyword_(NULL), |
| 402 counter_(0), | 440 counter_(0), |
| 403 keyword_token_(Token::ILLEGAL) {} | 441 keyword_token_(Token::ILLEGAL) {} |
| 404 | 442 |
| 405 Token::Value token() { return token_; } | 443 Token::Value token() { return token_; } |
| 406 | 444 |
| 407 inline void AddChar(unibrow::uchar input) { | 445 inline bool AddChar(unibrow::uchar input) { |
| 408 if (state_ != UNMATCHABLE) { | 446 if (state_ != UNMATCHABLE) { |
| 409 Step(input); | 447 Step(input); |
| 410 } | 448 } |
| 449 return state_ != UNMATCHABLE; | |
| 411 } | 450 } |
| 412 | 451 |
| 413 void Fail() { | 452 void Fail() { |
| 414 token_ = Token::IDENTIFIER; | 453 token_ = Token::IDENTIFIER; |
| 415 state_ = UNMATCHABLE; | 454 state_ = UNMATCHABLE; |
| 416 } | 455 } |
| 417 | 456 |
| 418 private: | 457 private: |
| 419 enum State { | 458 enum State { |
| 420 UNMATCHABLE, | 459 UNMATCHABLE, |
| (...skipping 30 matching lines...) Expand all Loading... | |
| 451 kFirstCharRangeMax - kFirstCharRangeMin + 1; | 490 kFirstCharRangeMax - kFirstCharRangeMin + 1; |
| 452 // State map for first keyword character range. | 491 // State map for first keyword character range. |
| 453 static FirstState first_states_[kFirstCharRangeLength]; | 492 static FirstState first_states_[kFirstCharRangeLength]; |
| 454 | 493 |
| 455 // If input equals keyword's character at position, continue matching keyword | 494 // If input equals keyword's character at position, continue matching keyword |
| 456 // from that position. | 495 // from that position. |
| 457 inline bool MatchKeywordStart(unibrow::uchar input, | 496 inline bool MatchKeywordStart(unibrow::uchar input, |
| 458 const char* keyword, | 497 const char* keyword, |
| 459 int position, | 498 int position, |
| 460 Token::Value token_if_match) { | 499 Token::Value token_if_match) { |
| 461 if (input == static_cast<unibrow::uchar>(keyword[position])) { | 500 if (input != static_cast<unibrow::uchar>(keyword[position])) { |
| 462 state_ = KEYWORD_PREFIX; | 501 return false; |
| 463 this->keyword_ = keyword; | |
| 464 this->counter_ = position + 1; | |
| 465 this->keyword_token_ = token_if_match; | |
| 466 return true; | |
| 467 } | 502 } |
| 468 return false; | 503 state_ = KEYWORD_PREFIX; |
| 504 this->keyword_ = keyword; | |
| 505 this->counter_ = position + 1; | |
| 506 this->keyword_token_ = token_if_match; | |
| 507 return true; | |
| 469 } | 508 } |
| 470 | 509 |
| 471 // If input equals match character, transition to new state and return true. | 510 // If input equals match character, transition to new state and return true. |
| 472 inline bool MatchState(unibrow::uchar input, char match, State new_state) { | 511 inline bool MatchState(unibrow::uchar input, char match, State new_state) { |
| 473 if (input == static_cast<unibrow::uchar>(match)) { | 512 if (input != static_cast<unibrow::uchar>(match)) { |
| 474 state_ = new_state; | 513 return false; |
| 475 return true; | |
| 476 } | 514 } |
| 477 return false; | 515 state_ = new_state; |
| 516 return true; | |
| 478 } | 517 } |
| 479 | 518 |
| 480 inline bool MatchKeyword(unibrow::uchar input, | 519 inline bool MatchKeyword(unibrow::uchar input, |
| 481 char match, | 520 char match, |
| 482 State new_state, | |
| 483 Token::Value keyword_token) { | 521 Token::Value keyword_token) { |
| 484 if (input != static_cast<unibrow::uchar>(match)) { | 522 if (input != static_cast<unibrow::uchar>(match)) { |
| 485 return false; | 523 return false; |
| 486 } | 524 } |
| 487 state_ = new_state; | 525 state_ = KEYWORD_MATCHED; |
| 488 token_ = keyword_token; | 526 token_ = keyword_token; |
| 489 return true; | 527 return true; |
| 490 } | 528 } |
| 491 | 529 |
| 492 void Step(unibrow::uchar input); | 530 void Step(unibrow::uchar input); |
| 493 | 531 |
| 494 // Current state. | 532 // Current state. |
| 495 State state_; | 533 State state_; |
| 496 // Token for currently added characters. | 534 // Token for currently added characters. |
| 497 Token::Value token_; | 535 Token::Value token_; |
| 498 | 536 |
| 499 // Matching a specific keyword string (there is only one possible valid | 537 // Matching a specific keyword string (there is only one possible valid |
| 500 // keyword with the current prefix). | 538 // keyword with the current prefix). |
| 501 const char* keyword_; | 539 const char* keyword_; |
| 502 int counter_; | 540 int counter_; |
| 503 Token::Value keyword_token_; | 541 Token::Value keyword_token_; |
| 504 }; | 542 }; |
| 505 | 543 |
| 506 | 544 |
| 507 } } // namespace v8::internal | 545 } } // namespace v8::internal |
| 508 | 546 |
| 509 #endif // V8_SCANNER_BASE_H_ | 547 #endif // V8_SCANNER_BASE_H_ |
| OLD | NEW |