OLD | NEW |
1 // Copyright 2010 the V8 project authors. All rights reserved. | 1 // Copyright 2010 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 309 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
320 | 320 |
321 // One Unicode character look-ahead; c0_ < 0 at the end of the input. | 321 // One Unicode character look-ahead; c0_ < 0 at the end of the input. |
322 uc32 c0_; | 322 uc32 c0_; |
323 }; | 323 }; |
324 | 324 |
325 // ---------------------------------------------------------------------------- | 325 // ---------------------------------------------------------------------------- |
326 // JavaScriptScanner - base logic for JavaScript scanning. | 326 // JavaScriptScanner - base logic for JavaScript scanning. |
327 | 327 |
328 class JavaScriptScanner : public Scanner { | 328 class JavaScriptScanner : public Scanner { |
329 public: | 329 public: |
| 330 |
| 331 // Bit vector representing set of types of literals. |
| 332 enum LiteralType { |
| 333 kNoLiterals = 0, |
| 334 kLiteralNumber = 1, |
| 335 kLiteralIdentifier = 2, |
| 336 kLiteralString = 4, |
| 337 kLiteralRegExp = 8, |
| 338 kLiteralRegExpFlags = 16, |
| 339 kAllLiterals = 31 |
| 340 }; |
| 341 |
| 342 // A LiteralScope that disables recording of some types of JavaScript |
| 343 // literals. If the scanner is configured to not record the specific |
| 344 // type of literal, the scope will not call StartLiteral. |
| 345 class LiteralScope { |
| 346 public: |
| 347 LiteralScope(JavaScriptScanner* self, LiteralType type) |
| 348 : scanner_(self), complete_(false) { |
| 349 if (scanner_->RecordsLiteral(type)) { |
| 350 scanner_->StartLiteral(); |
| 351 } |
| 352 } |
| 353 ~LiteralScope() { |
| 354 if (!complete_) scanner_->DropLiteral(); |
| 355 } |
| 356 void Complete() { |
| 357 scanner_->TerminateLiteral(); |
| 358 complete_ = true; |
| 359 } |
| 360 |
| 361 private: |
| 362 JavaScriptScanner* scanner_; |
| 363 bool complete_; |
| 364 }; |
| 365 |
330 JavaScriptScanner(); | 366 JavaScriptScanner(); |
331 | 367 |
332 // Returns the next token. | 368 // Returns the next token. |
333 Token::Value Next(); | 369 Token::Value Next(); |
334 | 370 |
335 // Returns true if there was a line terminator before the peek'ed token. | 371 // Returns true if there was a line terminator before the peek'ed token. |
336 bool has_line_terminator_before_next() const { | 372 bool has_line_terminator_before_next() const { |
337 return has_line_terminator_before_next_; | 373 return has_line_terminator_before_next_; |
338 } | 374 } |
339 | 375 |
340 // Scans the input as a regular expression pattern, previous | 376 // Scans the input as a regular expression pattern, previous |
341 // character(s) must be /(=). Returns true if a pattern is scanned. | 377 // character(s) must be /(=). Returns true if a pattern is scanned. |
342 bool ScanRegExpPattern(bool seen_equal); | 378 bool ScanRegExpPattern(bool seen_equal); |
343 // Returns true if regexp flags are scanned (always since flags can | 379 // Returns true if regexp flags are scanned (always since flags can |
344 // be empty). | 380 // be empty). |
345 bool ScanRegExpFlags(); | 381 bool ScanRegExpFlags(); |
346 | 382 |
347 // Tells whether the buffer contains an identifier (no escapes). | 383 // Tells whether the buffer contains an identifier (no escapes). |
348 // Used for checking if a property name is an identifier. | 384 // Used for checking if a property name is an identifier. |
349 static bool IsIdentifier(unibrow::CharacterStream* buffer); | 385 static bool IsIdentifier(unibrow::CharacterStream* buffer); |
350 | 386 |
351 // Seek forward to the given position. This operation does not | 387 // Seek forward to the given position. This operation does not |
352 // work in general, for instance when there are pushed back | 388 // work in general, for instance when there are pushed back |
353 // characters, but works for seeking forward until simple delimiter | 389 // characters, but works for seeking forward until simple delimiter |
354 // tokens, which is what it is used for. | 390 // tokens, which is what it is used for. |
355 void SeekForward(int pos); | 391 void SeekForward(int pos); |
356 | 392 |
| 393 // Whether this scanner records the given literal type or not. |
| 394 bool RecordsLiteral(LiteralType type) { |
| 395 return (literal_flags_ & type) != 0; |
| 396 } |
| 397 |
357 protected: | 398 protected: |
358 bool SkipWhiteSpace(); | 399 bool SkipWhiteSpace(); |
359 Token::Value SkipSingleLineComment(); | 400 Token::Value SkipSingleLineComment(); |
360 Token::Value SkipMultiLineComment(); | 401 Token::Value SkipMultiLineComment(); |
361 | 402 |
362 // Scans a single JavaScript token. | 403 // Scans a single JavaScript token. |
363 void Scan(); | 404 void Scan(); |
364 | 405 |
365 void ScanDecimalDigits(); | 406 void ScanDecimalDigits(); |
366 Token::Value ScanNumber(bool seen_period); | 407 Token::Value ScanNumber(bool seen_period); |
367 Token::Value ScanIdentifier(); | 408 Token::Value ScanIdentifierOrKeyword(); |
| 409 Token::Value ScanIdentifierSuffix(LiteralScope* literal); |
368 | 410 |
369 void ScanEscape(); | 411 void ScanEscape(); |
370 Token::Value ScanString(); | 412 Token::Value ScanString(); |
371 | 413 |
372 // Scans a possible HTML comment -- begins with '<!'. | 414 // Scans a possible HTML comment -- begins with '<!'. |
373 Token::Value ScanHtmlComment(); | 415 Token::Value ScanHtmlComment(); |
374 | 416 |
375 // Decodes a unicode escape-sequence which is part of an identifier. | 417 // Decodes a unicode escape-sequence which is part of an identifier. |
376 // If the escape sequence cannot be decoded the result is kBadChar. | 418 // If the escape sequence cannot be decoded the result is kBadChar. |
377 uc32 ScanIdentifierUnicodeEscape(); | 419 uc32 ScanIdentifierUnicodeEscape(); |
378 | 420 |
| 421 int literal_flags_; |
379 bool has_line_terminator_before_next_; | 422 bool has_line_terminator_before_next_; |
380 }; | 423 }; |
381 | 424 |
382 | 425 |
383 // ---------------------------------------------------------------------------- | 426 // ---------------------------------------------------------------------------- |
384 // Keyword matching state machine. | 427 // Keyword matching state machine. |
385 | 428 |
386 class KeywordMatcher { | 429 class KeywordMatcher { |
387 // Incrementally recognize keywords. | 430 // Incrementally recognize keywords. |
388 // | 431 // |
389 // Recognized keywords: | 432 // Recognized keywords: |
390 // break case catch const* continue debugger* default delete do else | 433 // break case catch const* continue debugger* default delete do else |
391 // finally false for function if in instanceof native* new null | 434 // finally false for function if in instanceof native* new null |
392 // return switch this throw true try typeof var void while with | 435 // return switch this throw true try typeof var void while with |
393 // | 436 // |
394 // *: Actually "future reserved keywords". These are the only ones we | 437 // *: Actually "future reserved keywords". These are the only ones we |
395 // recognize, the remaining are allowed as identifiers. | 438 // recognize, the remaining are allowed as identifiers. |
396 // In ES5 strict mode, we should disallow all reserved keywords. | 439 // In ES5 strict mode, we should disallow all reserved keywords. |
397 public: | 440 public: |
398 KeywordMatcher() | 441 KeywordMatcher() |
399 : state_(INITIAL), | 442 : state_(INITIAL), |
400 token_(Token::IDENTIFIER), | 443 token_(Token::IDENTIFIER), |
401 keyword_(NULL), | 444 keyword_(NULL), |
402 counter_(0), | 445 counter_(0), |
403 keyword_token_(Token::ILLEGAL) {} | 446 keyword_token_(Token::ILLEGAL) {} |
404 | 447 |
405 Token::Value token() { return token_; } | 448 Token::Value token() { return token_; } |
406 | 449 |
407 inline void AddChar(unibrow::uchar input) { | 450 inline bool AddChar(unibrow::uchar input) { |
408 if (state_ != UNMATCHABLE) { | 451 if (state_ != UNMATCHABLE) { |
409 Step(input); | 452 Step(input); |
410 } | 453 } |
| 454 return state_ != UNMATCHABLE; |
411 } | 455 } |
412 | 456 |
413 void Fail() { | 457 void Fail() { |
414 token_ = Token::IDENTIFIER; | 458 token_ = Token::IDENTIFIER; |
415 state_ = UNMATCHABLE; | 459 state_ = UNMATCHABLE; |
416 } | 460 } |
417 | 461 |
418 private: | 462 private: |
419 enum State { | 463 enum State { |
420 UNMATCHABLE, | 464 UNMATCHABLE, |
(...skipping 30 matching lines...) Expand all Loading... |
451 kFirstCharRangeMax - kFirstCharRangeMin + 1; | 495 kFirstCharRangeMax - kFirstCharRangeMin + 1; |
452 // State map for first keyword character range. | 496 // State map for first keyword character range. |
453 static FirstState first_states_[kFirstCharRangeLength]; | 497 static FirstState first_states_[kFirstCharRangeLength]; |
454 | 498 |
455 // If input equals keyword's character at position, continue matching keyword | 499 // If input equals keyword's character at position, continue matching keyword |
456 // from that position. | 500 // from that position. |
457 inline bool MatchKeywordStart(unibrow::uchar input, | 501 inline bool MatchKeywordStart(unibrow::uchar input, |
458 const char* keyword, | 502 const char* keyword, |
459 int position, | 503 int position, |
460 Token::Value token_if_match) { | 504 Token::Value token_if_match) { |
461 if (input == static_cast<unibrow::uchar>(keyword[position])) { | 505 if (input != static_cast<unibrow::uchar>(keyword[position])) { |
462 state_ = KEYWORD_PREFIX; | 506 return false; |
463 this->keyword_ = keyword; | |
464 this->counter_ = position + 1; | |
465 this->keyword_token_ = token_if_match; | |
466 return true; | |
467 } | 507 } |
468 return false; | 508 state_ = KEYWORD_PREFIX; |
| 509 this->keyword_ = keyword; |
| 510 this->counter_ = position + 1; |
| 511 this->keyword_token_ = token_if_match; |
| 512 return true; |
469 } | 513 } |
470 | 514 |
471 // If input equals match character, transition to new state and return true. | 515 // If input equals match character, transition to new state and return true. |
472 inline bool MatchState(unibrow::uchar input, char match, State new_state) { | 516 inline bool MatchState(unibrow::uchar input, char match, State new_state) { |
473 if (input == static_cast<unibrow::uchar>(match)) { | 517 if (input != static_cast<unibrow::uchar>(match)) { |
474 state_ = new_state; | 518 return false; |
475 return true; | |
476 } | 519 } |
477 return false; | 520 state_ = new_state; |
| 521 return true; |
478 } | 522 } |
479 | 523 |
480 inline bool MatchKeyword(unibrow::uchar input, | 524 inline bool MatchKeyword(unibrow::uchar input, |
481 char match, | 525 char match, |
482 State new_state, | 526 State new_state, |
483 Token::Value keyword_token) { | 527 Token::Value keyword_token) { |
484 if (input != static_cast<unibrow::uchar>(match)) { | 528 if (input != static_cast<unibrow::uchar>(match)) { |
485 return false; | 529 return false; |
486 } | 530 } |
487 state_ = new_state; | 531 state_ = new_state; |
(...skipping 12 matching lines...) Expand all Loading... |
500 // keyword with the current prefix). | 544 // keyword with the current prefix). |
501 const char* keyword_; | 545 const char* keyword_; |
502 int counter_; | 546 int counter_; |
503 Token::Value keyword_token_; | 547 Token::Value keyword_token_; |
504 }; | 548 }; |
505 | 549 |
506 | 550 |
507 } } // namespace v8::internal | 551 } } // namespace v8::internal |
508 | 552 |
509 #endif // V8_SCANNER_BASE_H_ | 553 #endif // V8_SCANNER_BASE_H_ |
OLD | NEW |