OLD | NEW |
---|---|
1 // Copyright 2010 the V8 project authors. All rights reserved. | 1 // Copyright 2010 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 309 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
320 | 320 |
321 // One Unicode character look-ahead; c0_ < 0 at the end of the input. | 321 // One Unicode character look-ahead; c0_ < 0 at the end of the input. |
322 uc32 c0_; | 322 uc32 c0_; |
323 }; | 323 }; |
324 | 324 |
325 // ---------------------------------------------------------------------------- | 325 // ---------------------------------------------------------------------------- |
326 // JavaScriptScanner - base logic for JavaScript scanning. | 326 // JavaScriptScanner - base logic for JavaScript scanning. |
327 | 327 |
328 class JavaScriptScanner : public Scanner { | 328 class JavaScriptScanner : public Scanner { |
329 public: | 329 public: |
330 | |
331 enum LiteralType { | |
332 kLiteralNumber = 1, | |
333 kLiteralIdentifier = 2, | |
334 kLiteralString = 4, | |
335 kLiteralRegExp = 8, | |
336 kLiteralRegExpFlags = 16 | |
337 }; | |
338 | |
339 // More specialized literal scope. | |
Søren Thygesen Gjesse
2010/11/19 08:17:21
More specialized - in what sense?
Lasse Reichstein
2010/11/19 08:51:24
Elaborated.
| |
340 class LiteralScope { | |
341 public: | |
342 LiteralScope(JavaScriptScanner* self, LiteralType type) | |
343 : scanner_(self), complete_(false) { | |
344 if (scanner_->RecordsLiteral(type)) { | |
345 scanner_->StartLiteral(); | |
346 } | |
347 } | |
348 ~LiteralScope() { | |
349 if (!complete_) scanner_->DropLiteral(); | |
350 } | |
351 void Complete() { | |
352 scanner_->TerminateLiteral(); | |
353 complete_ = true; | |
354 } | |
355 | |
356 private: | |
357 JavaScriptScanner* scanner_; | |
358 bool complete_; | |
359 }; | |
360 | |
330 JavaScriptScanner(); | 361 JavaScriptScanner(); |
331 | 362 |
332 // Returns the next token. | 363 // Returns the next token. |
333 Token::Value Next(); | 364 Token::Value Next(); |
334 | 365 |
335 // Returns true if there was a line terminator before the peek'ed token. | 366 // Returns true if there was a line terminator before the peek'ed token. |
336 bool has_line_terminator_before_next() const { | 367 bool has_line_terminator_before_next() const { |
337 return has_line_terminator_before_next_; | 368 return has_line_terminator_before_next_; |
338 } | 369 } |
339 | 370 |
340 // Scans the input as a regular expression pattern, previous | 371 // Scans the input as a regular expression pattern, previous |
341 // character(s) must be /(=). Returns true if a pattern is scanned. | 372 // character(s) must be /(=). Returns true if a pattern is scanned. |
342 bool ScanRegExpPattern(bool seen_equal); | 373 bool ScanRegExpPattern(bool seen_equal); |
343 // Returns true if regexp flags are scanned (always since flags can | 374 // Returns true if regexp flags are scanned (always since flags can |
344 // be empty). | 375 // be empty). |
345 bool ScanRegExpFlags(); | 376 bool ScanRegExpFlags(); |
346 | 377 |
347 // Tells whether the buffer contains an identifier (no escapes). | 378 // Tells whether the buffer contains an identifier (no escapes). |
348 // Used for checking if a property name is an identifier. | 379 // Used for checking if a property name is an identifier. |
349 static bool IsIdentifier(unibrow::CharacterStream* buffer); | 380 static bool IsIdentifier(unibrow::CharacterStream* buffer); |
350 | 381 |
351 // Seek forward to the given position. This operation does not | 382 // Seek forward to the given position. This operation does not |
352 // work in general, for instance when there are pushed back | 383 // work in general, for instance when there are pushed back |
353 // characters, but works for seeking forward until simple delimiter | 384 // characters, but works for seeking forward until simple delimiter |
354 // tokens, which is what it is used for. | 385 // tokens, which is what it is used for. |
355 void SeekForward(int pos); | 386 void SeekForward(int pos); |
356 | 387 |
388 // Whether this scanner records the given literal type or not. | |
389 bool RecordsLiteral(LiteralType type) { | |
390 return (literal_flags_ & type) != 0; | |
391 } | |
392 | |
357 protected: | 393 protected: |
358 bool SkipWhiteSpace(); | 394 bool SkipWhiteSpace(); |
359 Token::Value SkipSingleLineComment(); | 395 Token::Value SkipSingleLineComment(); |
360 Token::Value SkipMultiLineComment(); | 396 Token::Value SkipMultiLineComment(); |
361 | 397 |
362 // Scans a single JavaScript token. | 398 // Scans a single JavaScript token. |
363 void Scan(); | 399 void Scan(); |
364 | 400 |
365 void ScanDecimalDigits(); | 401 void ScanDecimalDigits(); |
366 Token::Value ScanNumber(bool seen_period); | 402 Token::Value ScanNumber(bool seen_period); |
367 Token::Value ScanIdentifier(); | 403 Token::Value ScanIdentifierOrKeyword(); |
404 Token::Value ScanIdentifierSuffix(LiteralScope* literal); | |
368 | 405 |
369 void ScanEscape(); | 406 void ScanEscape(); |
370 Token::Value ScanString(); | 407 Token::Value ScanString(); |
371 | 408 |
372 // Scans a possible HTML comment -- begins with '<!'. | 409 // Scans a possible HTML comment -- begins with '<!'. |
373 Token::Value ScanHtmlComment(); | 410 Token::Value ScanHtmlComment(); |
374 | 411 |
375 // Decodes a unicode escape-sequence which is part of an identifier. | 412 // Decodes a unicode escape-sequence which is part of an identifier. |
376 // If the escape sequence cannot be decoded the result is kBadChar. | 413 // If the escape sequence cannot be decoded the result is kBadChar. |
377 uc32 ScanIdentifierUnicodeEscape(); | 414 uc32 ScanIdentifierUnicodeEscape(); |
378 | 415 |
416 int literal_flags_; | |
379 bool has_line_terminator_before_next_; | 417 bool has_line_terminator_before_next_; |
380 }; | 418 }; |
381 | 419 |
382 | 420 |
383 // ---------------------------------------------------------------------------- | 421 // ---------------------------------------------------------------------------- |
384 // Keyword matching state machine. | 422 // Keyword matching state machine. |
385 | 423 |
386 class KeywordMatcher { | 424 class KeywordMatcher { |
387 // Incrementally recognize keywords. | 425 // Incrementally recognize keywords. |
388 // | 426 // |
389 // Recognized keywords: | 427 // Recognized keywords: |
390 // break case catch const* continue debugger* default delete do else | 428 // break case catch const* continue debugger* default delete do else |
391 // finally false for function if in instanceof native* new null | 429 // finally false for function if in instanceof native* new null |
392 // return switch this throw true try typeof var void while with | 430 // return switch this throw true try typeof var void while with |
393 // | 431 // |
394 // *: Actually "future reserved keywords". These are the only ones we | 432 // *: Actually "future reserved keywords". These are the only ones we |
395 // recognize, the remaining are allowed as identifiers. | 433 // recognize, the remaining are allowed as identifiers. |
396 // In ES5 strict mode, we should disallow all reserved keywords. | 434 // In ES5 strict mode, we should disallow all reserved keywords. |
397 public: | 435 public: |
398 KeywordMatcher() | 436 KeywordMatcher() |
399 : state_(INITIAL), | 437 : state_(INITIAL), |
400 token_(Token::IDENTIFIER), | 438 token_(Token::IDENTIFIER), |
401 keyword_(NULL), | 439 keyword_(NULL), |
402 counter_(0), | 440 counter_(0), |
403 keyword_token_(Token::ILLEGAL) {} | 441 keyword_token_(Token::ILLEGAL) {} |
404 | 442 |
405 Token::Value token() { return token_; } | 443 Token::Value token() { return token_; } |
406 | 444 |
407 inline void AddChar(unibrow::uchar input) { | 445 inline bool AddChar(unibrow::uchar input) { |
408 if (state_ != UNMATCHABLE) { | 446 if (state_ != UNMATCHABLE) { |
409 Step(input); | 447 Step(input); |
410 } | 448 } |
449 return state_ != UNMATCHABLE; | |
411 } | 450 } |
412 | 451 |
413 void Fail() { | 452 void Fail() { |
414 token_ = Token::IDENTIFIER; | 453 token_ = Token::IDENTIFIER; |
415 state_ = UNMATCHABLE; | 454 state_ = UNMATCHABLE; |
416 } | 455 } |
417 | 456 |
418 private: | 457 private: |
419 enum State { | 458 enum State { |
420 UNMATCHABLE, | 459 UNMATCHABLE, |
(...skipping 30 matching lines...) Expand all Loading... | |
451 kFirstCharRangeMax - kFirstCharRangeMin + 1; | 490 kFirstCharRangeMax - kFirstCharRangeMin + 1; |
452 // State map for first keyword character range. | 491 // State map for first keyword character range. |
453 static FirstState first_states_[kFirstCharRangeLength]; | 492 static FirstState first_states_[kFirstCharRangeLength]; |
454 | 493 |
455 // If input equals keyword's character at position, continue matching keyword | 494 // If input equals keyword's character at position, continue matching keyword |
456 // from that position. | 495 // from that position. |
457 inline bool MatchKeywordStart(unibrow::uchar input, | 496 inline bool MatchKeywordStart(unibrow::uchar input, |
458 const char* keyword, | 497 const char* keyword, |
459 int position, | 498 int position, |
460 Token::Value token_if_match) { | 499 Token::Value token_if_match) { |
461 if (input == static_cast<unibrow::uchar>(keyword[position])) { | 500 if (input != static_cast<unibrow::uchar>(keyword[position])) { |
462 state_ = KEYWORD_PREFIX; | 501 return false; |
463 this->keyword_ = keyword; | |
464 this->counter_ = position + 1; | |
465 this->keyword_token_ = token_if_match; | |
466 return true; | |
467 } | 502 } |
468 return false; | 503 state_ = KEYWORD_PREFIX; |
504 this->keyword_ = keyword; | |
505 this->counter_ = position + 1; | |
506 this->keyword_token_ = token_if_match; | |
507 return true; | |
469 } | 508 } |
470 | 509 |
471 // If input equals match character, transition to new state and return true. | 510 // If input equals match character, transition to new state and return true. |
472 inline bool MatchState(unibrow::uchar input, char match, State new_state) { | 511 inline bool MatchState(unibrow::uchar input, char match, State new_state) { |
473 if (input == static_cast<unibrow::uchar>(match)) { | 512 if (input != static_cast<unibrow::uchar>(match)) { |
474 state_ = new_state; | 513 return false; |
475 return true; | |
476 } | 514 } |
477 return false; | 515 state_ = new_state; |
516 return true; | |
478 } | 517 } |
479 | 518 |
480 inline bool MatchKeyword(unibrow::uchar input, | 519 inline bool MatchKeyword(unibrow::uchar input, |
481 char match, | 520 char match, |
482 State new_state, | |
483 Token::Value keyword_token) { | 521 Token::Value keyword_token) { |
484 if (input != static_cast<unibrow::uchar>(match)) { | 522 if (input != static_cast<unibrow::uchar>(match)) { |
485 return false; | 523 return false; |
486 } | 524 } |
487 state_ = new_state; | 525 state_ = KEYWORD_MATCHED; |
488 token_ = keyword_token; | 526 token_ = keyword_token; |
489 return true; | 527 return true; |
490 } | 528 } |
491 | 529 |
492 void Step(unibrow::uchar input); | 530 void Step(unibrow::uchar input); |
493 | 531 |
494 // Current state. | 532 // Current state. |
495 State state_; | 533 State state_; |
496 // Token for currently added characters. | 534 // Token for currently added characters. |
497 Token::Value token_; | 535 Token::Value token_; |
498 | 536 |
499 // Matching a specific keyword string (there is only one possible valid | 537 // Matching a specific keyword string (there is only one possible valid |
500 // keyword with the current prefix). | 538 // keyword with the current prefix). |
501 const char* keyword_; | 539 const char* keyword_; |
502 int counter_; | 540 int counter_; |
503 Token::Value keyword_token_; | 541 Token::Value keyword_token_; |
504 }; | 542 }; |
505 | 543 |
506 | 544 |
507 } } // namespace v8::internal | 545 } } // namespace v8::internal |
508 | 546 |
509 #endif // V8_SCANNER_BASE_H_ | 547 #endif // V8_SCANNER_BASE_H_ |
OLD | NEW |