| OLD | NEW |
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 268 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 279 Token::Value peek() const { return next_.token; } | 279 Token::Value peek() const { return next_.token; } |
| 280 | 280 |
| 281 struct Location { | 281 struct Location { |
| 282 Location(int b, int e) : beg_pos(b), end_pos(e) { } | 282 Location(int b, int e) : beg_pos(b), end_pos(e) { } |
| 283 Location() : beg_pos(0), end_pos(0) { } | 283 Location() : beg_pos(0), end_pos(0) { } |
| 284 | 284 |
| 285 bool IsValid() const { | 285 bool IsValid() const { |
| 286 return beg_pos >= 0 && end_pos >= beg_pos; | 286 return beg_pos >= 0 && end_pos >= beg_pos; |
| 287 } | 287 } |
| 288 | 288 |
| 289 static Location invalid() { return Location(-1, -1); } |
| 290 |
| 289 int beg_pos; | 291 int beg_pos; |
| 290 int end_pos; | 292 int end_pos; |
| 291 }; | 293 }; |
| 292 | 294 |
| 293 static Location NoLocation() { | |
| 294 return Location(-1, -1); | |
| 295 } | |
| 296 | |
| 297 // Returns the location information for the current token | 295 // Returns the location information for the current token |
| 298 // (the token returned by Next()). | 296 // (the token returned by Next()). |
| 299 Location location() const { return current_.location; } | 297 Location location() const { return current_.location; } |
| 300 Location peek_location() const { return next_.location; } | 298 Location peek_location() const { return next_.location; } |
| 301 | 299 |
| 302 // Returns the location of the last seen octal literal | |
| 303 int octal_position() const { return octal_pos_; } | |
| 304 void clear_octal_position() { octal_pos_ = -1; } | |
| 305 | |
| 306 // Returns the literal string, if any, for the current token (the | 300 // Returns the literal string, if any, for the current token (the |
| 307 // token returned by Next()). The string is 0-terminated and in | 301 // token returned by Next()). The string is 0-terminated and in |
| 308 // UTF-8 format; they may contain 0-characters. Literal strings are | 302 // UTF-8 format; they may contain 0-characters. Literal strings are |
| 309 // collected for identifiers, strings, and numbers. | 303 // collected for identifiers, strings, and numbers. |
| 310 // These functions only give the correct result if the literal | 304 // These functions only give the correct result if the literal |
| 311 // was scanned between calls to StartLiteral() and TerminateLiteral(). | 305 // was scanned between calls to StartLiteral() and TerminateLiteral(). |
| 312 bool is_literal_ascii() { | 306 bool is_literal_ascii() { |
| 313 ASSERT_NOT_NULL(current_.literal_chars); | 307 ASSERT_NOT_NULL(current_.literal_chars); |
| 314 return current_.literal_chars->is_ascii(); | 308 return current_.literal_chars->is_ascii(); |
| 315 } | 309 } |
| 316 Vector<const char> literal_ascii_string() { | 310 Vector<const char> literal_ascii_string() { |
| 317 ASSERT_NOT_NULL(current_.literal_chars); | 311 ASSERT_NOT_NULL(current_.literal_chars); |
| 318 return current_.literal_chars->ascii_literal(); | 312 return current_.literal_chars->ascii_literal(); |
| 319 } | 313 } |
| 320 Vector<const uc16> literal_uc16_string() { | 314 Vector<const uc16> literal_uc16_string() { |
| 321 ASSERT_NOT_NULL(current_.literal_chars); | 315 ASSERT_NOT_NULL(current_.literal_chars); |
| 322 return current_.literal_chars->uc16_literal(); | 316 return current_.literal_chars->uc16_literal(); |
| 323 } | 317 } |
| 324 int literal_length() const { | 318 int literal_length() const { |
| 325 ASSERT_NOT_NULL(current_.literal_chars); | 319 ASSERT_NOT_NULL(current_.literal_chars); |
| 326 return current_.literal_chars->length(); | 320 return current_.literal_chars->length(); |
| 327 } | 321 } |
| 328 | 322 |
| 323 bool literal_contains_escapes() const { |
| 324 Location location = current_.location; |
| 325 int source_length = (location.end_pos - location.beg_pos); |
| 326 if (current_.token == Token::STRING) { |
| 327 // Subtract delimiters. |
| 328 source_length -= 2; |
| 329 } |
| 330 return current_.literal_chars->length() != source_length; |
| 331 } |
| 332 |
| 329 // Returns the literal string for the next token (the token that | 333 // Returns the literal string for the next token (the token that |
| 330 // would be returned if Next() were called). | 334 // would be returned if Next() were called). |
| 331 bool is_next_literal_ascii() { | 335 bool is_next_literal_ascii() { |
| 332 ASSERT_NOT_NULL(next_.literal_chars); | 336 ASSERT_NOT_NULL(next_.literal_chars); |
| 333 return next_.literal_chars->is_ascii(); | 337 return next_.literal_chars->is_ascii(); |
| 334 } | 338 } |
| 335 Vector<const char> next_literal_ascii_string() { | 339 Vector<const char> next_literal_ascii_string() { |
| 336 ASSERT_NOT_NULL(next_.literal_chars); | 340 ASSERT_NOT_NULL(next_.literal_chars); |
| 337 return next_.literal_chars->ascii_literal(); | 341 return next_.literal_chars->ascii_literal(); |
| 338 } | 342 } |
| (...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 410 if (c0_ == next) { | 414 if (c0_ == next) { |
| 411 Advance(); | 415 Advance(); |
| 412 return then; | 416 return then; |
| 413 } else { | 417 } else { |
| 414 return else_; | 418 return else_; |
| 415 } | 419 } |
| 416 } | 420 } |
| 417 | 421 |
| 418 uc32 ScanHexEscape(uc32 c, int length); | 422 uc32 ScanHexEscape(uc32 c, int length); |
| 419 | 423 |
| 420 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. | |
| 421 uc32 ScanOctalEscape(uc32 c, int length); | |
| 422 | |
| 423 // Return the current source position. | 424 // Return the current source position. |
| 424 int source_pos() { | 425 int source_pos() { |
| 425 return source_->pos() - kCharacterLookaheadBufferSize; | 426 return source_->pos() - kCharacterLookaheadBufferSize; |
| 426 } | 427 } |
| 427 | 428 |
| 428 UnicodeCache* unicode_cache_; | 429 UnicodeCache* unicode_cache_; |
| 429 | 430 |
| 430 // Buffers collecting literal strings, numbers, etc. | 431 // Buffers collecting literal strings, numbers, etc. |
| 431 LiteralBuffer literal_buffer1_; | 432 LiteralBuffer literal_buffer1_; |
| 432 LiteralBuffer literal_buffer2_; | 433 LiteralBuffer literal_buffer2_; |
| 433 | 434 |
| 434 TokenDesc current_; // desc for current token (as returned by Next()) | 435 TokenDesc current_; // desc for current token (as returned by Next()) |
| 435 TokenDesc next_; // desc for next token (one token look-ahead) | 436 TokenDesc next_; // desc for next token (one token look-ahead) |
| 436 | 437 |
| 437 // Input stream. Must be initialized to an UC16CharacterStream. | 438 // Input stream. Must be initialized to an UC16CharacterStream. |
| 438 UC16CharacterStream* source_; | 439 UC16CharacterStream* source_; |
| 439 | 440 |
| 440 // Start position of the octal literal last scanned. | |
| 441 int octal_pos_; | |
| 442 | |
| 443 // One Unicode character look-ahead; c0_ < 0 at the end of the input. | 441 // One Unicode character look-ahead; c0_ < 0 at the end of the input. |
| 444 uc32 c0_; | 442 uc32 c0_; |
| 445 }; | 443 }; |
| 446 | 444 |
| 447 // ---------------------------------------------------------------------------- | 445 // ---------------------------------------------------------------------------- |
| 448 // JavaScriptScanner - base logic for JavaScript scanning. | 446 // JavaScriptScanner - base logic for JavaScript scanning. |
| 449 | 447 |
| 450 class JavaScriptScanner : public Scanner { | 448 class JavaScriptScanner : public Scanner { |
| 451 public: | 449 public: |
| 452 // A LiteralScope that disables recording of some types of JavaScript | 450 // A LiteralScope that disables recording of some types of JavaScript |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 485 // character(s) must be /(=). Returns true if a pattern is scanned. | 483 // character(s) must be /(=). Returns true if a pattern is scanned. |
| 486 bool ScanRegExpPattern(bool seen_equal); | 484 bool ScanRegExpPattern(bool seen_equal); |
| 487 // Returns true if regexp flags are scanned (always since flags can | 485 // Returns true if regexp flags are scanned (always since flags can |
| 488 // be empty). | 486 // be empty). |
| 489 bool ScanRegExpFlags(); | 487 bool ScanRegExpFlags(); |
| 490 | 488 |
| 491 // Tells whether the buffer contains an identifier (no escapes). | 489 // Tells whether the buffer contains an identifier (no escapes). |
| 492 // Used for checking if a property name is an identifier. | 490 // Used for checking if a property name is an identifier. |
| 493 static bool IsIdentifier(unibrow::CharacterStream* buffer); | 491 static bool IsIdentifier(unibrow::CharacterStream* buffer); |
| 494 | 492 |
| 493 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. |
| 494 uc32 ScanOctalEscape(uc32 c, int length); |
| 495 |
| 496 // Returns the location of the last seen octal literal |
| 497 Location octal_position() const { return octal_pos_; } |
| 498 void clear_octal_position() { octal_pos_ = Location::invalid(); } |
| 499 |
| 495 // Seek forward to the given position. This operation does not | 500 // Seek forward to the given position. This operation does not |
| 496 // work in general, for instance when there are pushed back | 501 // work in general, for instance when there are pushed back |
| 497 // characters, but works for seeking forward until simple delimiter | 502 // characters, but works for seeking forward until simple delimiter |
| 498 // tokens, which is what it is used for. | 503 // tokens, which is what it is used for. |
| 499 void SeekForward(int pos); | 504 void SeekForward(int pos); |
| 500 | 505 |
| 501 protected: | 506 protected: |
| 502 bool SkipWhiteSpace(); | 507 bool SkipWhiteSpace(); |
| 503 Token::Value SkipSingleLineComment(); | 508 Token::Value SkipSingleLineComment(); |
| 504 Token::Value SkipMultiLineComment(); | 509 Token::Value SkipMultiLineComment(); |
| 505 | 510 |
| 506 // Scans a single JavaScript token. | 511 // Scans a single JavaScript token. |
| 507 void Scan(); | 512 void Scan(); |
| 508 | 513 |
| 509 void ScanDecimalDigits(); | 514 void ScanDecimalDigits(); |
| 510 Token::Value ScanNumber(bool seen_period); | 515 Token::Value ScanNumber(bool seen_period); |
| 511 Token::Value ScanIdentifierOrKeyword(); | 516 Token::Value ScanIdentifierOrKeyword(); |
| 512 Token::Value ScanIdentifierSuffix(LiteralScope* literal); | 517 Token::Value ScanIdentifierSuffix(LiteralScope* literal); |
| 513 | 518 |
| 514 void ScanEscape(); | 519 void ScanEscape(); |
| 515 Token::Value ScanString(); | 520 Token::Value ScanString(); |
| 516 | 521 |
| 517 // Scans a possible HTML comment -- begins with '<!'. | 522 // Scans a possible HTML comment -- begins with '<!'. |
| 518 Token::Value ScanHtmlComment(); | 523 Token::Value ScanHtmlComment(); |
| 519 | 524 |
| 520 // Decodes a unicode escape-sequence which is part of an identifier. | 525 // Decodes a unicode escape-sequence which is part of an identifier. |
| 521 // If the escape sequence cannot be decoded the result is kBadChar. | 526 // If the escape sequence cannot be decoded the result is kBadChar. |
| 522 uc32 ScanIdentifierUnicodeEscape(); | 527 uc32 ScanIdentifierUnicodeEscape(); |
| 523 | 528 |
| 529 // Start position of the octal literal last scanned. |
| 530 Location octal_pos_; |
| 531 |
| 524 bool has_line_terminator_before_next_; | 532 bool has_line_terminator_before_next_; |
| 525 }; | 533 }; |
| 526 | 534 |
| 527 | 535 |
| 528 // ---------------------------------------------------------------------------- | 536 // ---------------------------------------------------------------------------- |
| 529 // Keyword matching state machine. | 537 // Keyword matching state machine. |
| 530 | 538 |
| 531 class KeywordMatcher { | 539 class KeywordMatcher { |
| 532 // Incrementally recognize keywords. | 540 // Incrementally recognize keywords. |
| 533 // | 541 // |
| (...skipping 119 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 653 // keyword with the current prefix). | 661 // keyword with the current prefix). |
| 654 const char* keyword_; | 662 const char* keyword_; |
| 655 int counter_; | 663 int counter_; |
| 656 Token::Value keyword_token_; | 664 Token::Value keyword_token_; |
| 657 }; | 665 }; |
| 658 | 666 |
| 659 | 667 |
| 660 } } // namespace v8::internal | 668 } } // namespace v8::internal |
| 661 | 669 |
| 662 #endif // V8_SCANNER_BASE_H_ | 670 #endif // V8_SCANNER_BASE_H_ |
| OLD | NEW |