OLD | NEW |
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 268 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
279 Token::Value peek() const { return next_.token; } | 279 Token::Value peek() const { return next_.token; } |
280 | 280 |
281 struct Location { | 281 struct Location { |
282 Location(int b, int e) : beg_pos(b), end_pos(e) { } | 282 Location(int b, int e) : beg_pos(b), end_pos(e) { } |
283 Location() : beg_pos(0), end_pos(0) { } | 283 Location() : beg_pos(0), end_pos(0) { } |
284 | 284 |
285 bool IsValid() const { | 285 bool IsValid() const { |
286 return beg_pos >= 0 && end_pos >= beg_pos; | 286 return beg_pos >= 0 && end_pos >= beg_pos; |
287 } | 287 } |
288 | 288 |
| 289 static Location invalid() { return Location(-1, -1); } |
| 290 |
289 int beg_pos; | 291 int beg_pos; |
290 int end_pos; | 292 int end_pos; |
291 }; | 293 }; |
292 | 294 |
293 static Location NoLocation() { | |
294 return Location(-1, -1); | |
295 } | |
296 | |
297 // Returns the location information for the current token | 295 // Returns the location information for the current token |
298 // (the token returned by Next()). | 296 // (the token returned by Next()). |
299 Location location() const { return current_.location; } | 297 Location location() const { return current_.location; } |
300 Location peek_location() const { return next_.location; } | 298 Location peek_location() const { return next_.location; } |
301 | 299 |
302 // Returns the location of the last seen octal literal | |
303 int octal_position() const { return octal_pos_; } | |
304 void clear_octal_position() { octal_pos_ = -1; } | |
305 | |
306 // Returns the literal string, if any, for the current token (the | 300 // Returns the literal string, if any, for the current token (the |
307 // token returned by Next()). The string is 0-terminated and in | 301 // token returned by Next()). The string is 0-terminated and in |
308 // UTF-8 format; they may contain 0-characters. Literal strings are | 302 // UTF-8 format; they may contain 0-characters. Literal strings are |
309 // collected for identifiers, strings, and numbers. | 303 // collected for identifiers, strings, and numbers. |
310 // These functions only give the correct result if the literal | 304 // These functions only give the correct result if the literal |
311 // was scanned between calls to StartLiteral() and TerminateLiteral(). | 305 // was scanned between calls to StartLiteral() and TerminateLiteral(). |
312 bool is_literal_ascii() { | 306 bool is_literal_ascii() { |
313 ASSERT_NOT_NULL(current_.literal_chars); | 307 ASSERT_NOT_NULL(current_.literal_chars); |
314 return current_.literal_chars->is_ascii(); | 308 return current_.literal_chars->is_ascii(); |
315 } | 309 } |
316 Vector<const char> literal_ascii_string() { | 310 Vector<const char> literal_ascii_string() { |
317 ASSERT_NOT_NULL(current_.literal_chars); | 311 ASSERT_NOT_NULL(current_.literal_chars); |
318 return current_.literal_chars->ascii_literal(); | 312 return current_.literal_chars->ascii_literal(); |
319 } | 313 } |
320 Vector<const uc16> literal_uc16_string() { | 314 Vector<const uc16> literal_uc16_string() { |
321 ASSERT_NOT_NULL(current_.literal_chars); | 315 ASSERT_NOT_NULL(current_.literal_chars); |
322 return current_.literal_chars->uc16_literal(); | 316 return current_.literal_chars->uc16_literal(); |
323 } | 317 } |
324 int literal_length() const { | 318 int literal_length() const { |
325 ASSERT_NOT_NULL(current_.literal_chars); | 319 ASSERT_NOT_NULL(current_.literal_chars); |
326 return current_.literal_chars->length(); | 320 return current_.literal_chars->length(); |
327 } | 321 } |
328 | 322 |
| 323 bool literal_contains_escapes() const { |
| 324 Location location = current_.location; |
| 325 int source_length = (location.end_pos - location.beg_pos); |
| 326 if (current_.token == Token::STRING) { |
| 327 // Subtract delimiters. |
| 328 source_length -= 2; |
| 329 } |
| 330 return current_.literal_chars->length() != source_length; |
| 331 } |
| 332 |
329 // Returns the literal string for the next token (the token that | 333 // Returns the literal string for the next token (the token that |
330 // would be returned if Next() were called). | 334 // would be returned if Next() were called). |
331 bool is_next_literal_ascii() { | 335 bool is_next_literal_ascii() { |
332 ASSERT_NOT_NULL(next_.literal_chars); | 336 ASSERT_NOT_NULL(next_.literal_chars); |
333 return next_.literal_chars->is_ascii(); | 337 return next_.literal_chars->is_ascii(); |
334 } | 338 } |
335 Vector<const char> next_literal_ascii_string() { | 339 Vector<const char> next_literal_ascii_string() { |
336 ASSERT_NOT_NULL(next_.literal_chars); | 340 ASSERT_NOT_NULL(next_.literal_chars); |
337 return next_.literal_chars->ascii_literal(); | 341 return next_.literal_chars->ascii_literal(); |
338 } | 342 } |
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
410 if (c0_ == next) { | 414 if (c0_ == next) { |
411 Advance(); | 415 Advance(); |
412 return then; | 416 return then; |
413 } else { | 417 } else { |
414 return else_; | 418 return else_; |
415 } | 419 } |
416 } | 420 } |
417 | 421 |
418 uc32 ScanHexEscape(uc32 c, int length); | 422 uc32 ScanHexEscape(uc32 c, int length); |
419 | 423 |
420 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. | |
421 uc32 ScanOctalEscape(uc32 c, int length); | |
422 | |
423 // Return the current source position. | 424 // Return the current source position. |
424 int source_pos() { | 425 int source_pos() { |
425 return source_->pos() - kCharacterLookaheadBufferSize; | 426 return source_->pos() - kCharacterLookaheadBufferSize; |
426 } | 427 } |
427 | 428 |
428 UnicodeCache* unicode_cache_; | 429 UnicodeCache* unicode_cache_; |
429 | 430 |
430 // Buffers collecting literal strings, numbers, etc. | 431 // Buffers collecting literal strings, numbers, etc. |
431 LiteralBuffer literal_buffer1_; | 432 LiteralBuffer literal_buffer1_; |
432 LiteralBuffer literal_buffer2_; | 433 LiteralBuffer literal_buffer2_; |
433 | 434 |
434 TokenDesc current_; // desc for current token (as returned by Next()) | 435 TokenDesc current_; // desc for current token (as returned by Next()) |
435 TokenDesc next_; // desc for next token (one token look-ahead) | 436 TokenDesc next_; // desc for next token (one token look-ahead) |
436 | 437 |
437 // Input stream. Must be initialized to an UC16CharacterStream. | 438 // Input stream. Must be initialized to an UC16CharacterStream. |
438 UC16CharacterStream* source_; | 439 UC16CharacterStream* source_; |
439 | 440 |
440 // Start position of the octal literal last scanned. | |
441 int octal_pos_; | |
442 | |
443 // One Unicode character look-ahead; c0_ < 0 at the end of the input. | 441 // One Unicode character look-ahead; c0_ < 0 at the end of the input. |
444 uc32 c0_; | 442 uc32 c0_; |
445 }; | 443 }; |
446 | 444 |
447 // ---------------------------------------------------------------------------- | 445 // ---------------------------------------------------------------------------- |
448 // JavaScriptScanner - base logic for JavaScript scanning. | 446 // JavaScriptScanner - base logic for JavaScript scanning. |
449 | 447 |
450 class JavaScriptScanner : public Scanner { | 448 class JavaScriptScanner : public Scanner { |
451 public: | 449 public: |
452 // A LiteralScope that disables recording of some types of JavaScript | 450 // A LiteralScope that disables recording of some types of JavaScript |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
485 // character(s) must be /(=). Returns true if a pattern is scanned. | 483 // character(s) must be /(=). Returns true if a pattern is scanned. |
486 bool ScanRegExpPattern(bool seen_equal); | 484 bool ScanRegExpPattern(bool seen_equal); |
487 // Returns true if regexp flags are scanned (always since flags can | 485 // Returns true if regexp flags are scanned (always since flags can |
488 // be empty). | 486 // be empty). |
489 bool ScanRegExpFlags(); | 487 bool ScanRegExpFlags(); |
490 | 488 |
491 // Tells whether the buffer contains an identifier (no escapes). | 489 // Tells whether the buffer contains an identifier (no escapes). |
492 // Used for checking if a property name is an identifier. | 490 // Used for checking if a property name is an identifier. |
493 static bool IsIdentifier(unibrow::CharacterStream* buffer); | 491 static bool IsIdentifier(unibrow::CharacterStream* buffer); |
494 | 492 |
| 493 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. |
| 494 uc32 ScanOctalEscape(uc32 c, int length); |
| 495 |
| 496 // Returns the location of the last seen octal literal |
| 497 Location octal_position() const { return octal_pos_; } |
| 498 void clear_octal_position() { octal_pos_ = Location::invalid(); } |
| 499 |
495 // Seek forward to the given position. This operation does not | 500 // Seek forward to the given position. This operation does not |
496 // work in general, for instance when there are pushed back | 501 // work in general, for instance when there are pushed back |
497 // characters, but works for seeking forward until simple delimiter | 502 // characters, but works for seeking forward until simple delimiter |
498 // tokens, which is what it is used for. | 503 // tokens, which is what it is used for. |
499 void SeekForward(int pos); | 504 void SeekForward(int pos); |
500 | 505 |
501 protected: | 506 protected: |
502 bool SkipWhiteSpace(); | 507 bool SkipWhiteSpace(); |
503 Token::Value SkipSingleLineComment(); | 508 Token::Value SkipSingleLineComment(); |
504 Token::Value SkipMultiLineComment(); | 509 Token::Value SkipMultiLineComment(); |
505 | 510 |
506 // Scans a single JavaScript token. | 511 // Scans a single JavaScript token. |
507 void Scan(); | 512 void Scan(); |
508 | 513 |
509 void ScanDecimalDigits(); | 514 void ScanDecimalDigits(); |
510 Token::Value ScanNumber(bool seen_period); | 515 Token::Value ScanNumber(bool seen_period); |
511 Token::Value ScanIdentifierOrKeyword(); | 516 Token::Value ScanIdentifierOrKeyword(); |
512 Token::Value ScanIdentifierSuffix(LiteralScope* literal); | 517 Token::Value ScanIdentifierSuffix(LiteralScope* literal); |
513 | 518 |
514 void ScanEscape(); | 519 void ScanEscape(); |
515 Token::Value ScanString(); | 520 Token::Value ScanString(); |
516 | 521 |
517 // Scans a possible HTML comment -- begins with '<!'. | 522 // Scans a possible HTML comment -- begins with '<!'. |
518 Token::Value ScanHtmlComment(); | 523 Token::Value ScanHtmlComment(); |
519 | 524 |
520 // Decodes a unicode escape-sequence which is part of an identifier. | 525 // Decodes a unicode escape-sequence which is part of an identifier. |
521 // If the escape sequence cannot be decoded the result is kBadChar. | 526 // If the escape sequence cannot be decoded the result is kBadChar. |
522 uc32 ScanIdentifierUnicodeEscape(); | 527 uc32 ScanIdentifierUnicodeEscape(); |
523 | 528 |
| 529 // Start position of the octal literal last scanned. |
| 530 Location octal_pos_; |
| 531 |
524 bool has_line_terminator_before_next_; | 532 bool has_line_terminator_before_next_; |
525 }; | 533 }; |
526 | 534 |
527 | 535 |
528 // ---------------------------------------------------------------------------- | 536 // ---------------------------------------------------------------------------- |
529 // Keyword matching state machine. | 537 // Keyword matching state machine. |
530 | 538 |
531 class KeywordMatcher { | 539 class KeywordMatcher { |
532 // Incrementally recognize keywords. | 540 // Incrementally recognize keywords. |
533 // | 541 // |
(...skipping 119 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
653 // keyword with the current prefix). | 661 // keyword with the current prefix). |
654 const char* keyword_; | 662 const char* keyword_; |
655 int counter_; | 663 int counter_; |
656 Token::Value keyword_token_; | 664 Token::Value keyword_token_; |
657 }; | 665 }; |
658 | 666 |
659 | 667 |
660 } } // namespace v8::internal | 668 } } // namespace v8::internal |
661 | 669 |
662 #endif // V8_SCANNER_BASE_H_ | 670 #endif // V8_SCANNER_BASE_H_ |
OLD | NEW |