| OLD | NEW |
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 242 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 253 | 253 |
| 254 bool is_ascii_; | 254 bool is_ascii_; |
| 255 int position_; | 255 int position_; |
| 256 Vector<byte> backing_store_; | 256 Vector<byte> backing_store_; |
| 257 | 257 |
| 258 DISALLOW_COPY_AND_ASSIGN(LiteralBuffer); | 258 DISALLOW_COPY_AND_ASSIGN(LiteralBuffer); |
| 259 }; | 259 }; |
| 260 | 260 |
| 261 | 261 |
| 262 // ---------------------------------------------------------------------------- | 262 // ---------------------------------------------------------------------------- |
| 263 // Scanner base-class. | 263 // JavaScript Scanner. |
| 264 | 264 |
| 265 // Generic functionality used by both JSON and JavaScript scanners. | |
| 266 class Scanner { | 265 class Scanner { |
| 267 public: | 266 public: |
| 268 // -1 is outside of the range of any real source code. | 267 // Scoped helper for literal recording. Automatically drops the literal |
| 269 static const int kNoOctalLocation = -1; | 268 // if aborting the scanning before it's complete. |
| 270 | |
| 271 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; | |
| 272 | |
| 273 class LiteralScope { | 269 class LiteralScope { |
| 274 public: | 270 public: |
| 275 explicit LiteralScope(Scanner* self); | 271 explicit LiteralScope(Scanner* self) |
| 276 ~LiteralScope(); | 272 : scanner_(self), complete_(false) { |
| 277 void Complete(); | 273 scanner_->StartLiteral(); |
| 274 } |
| 275 ~LiteralScope() { |
| 276 if (!complete_) scanner_->DropLiteral(); |
| 277 } |
| 278 void Complete() { |
| 279 scanner_->TerminateLiteral(); |
| 280 complete_ = true; |
| 281 } |
| 278 | 282 |
| 279 private: | 283 private: |
| 280 Scanner* scanner_; | 284 Scanner* scanner_; |
| 281 bool complete_; | 285 bool complete_; |
| 282 }; | 286 }; |
| 283 | 287 |
| 284 explicit Scanner(UnicodeCache* scanner_contants); | 288 // Representation of an interval of source positions. |
| 285 | |
| 286 // Returns the current token again. | |
| 287 Token::Value current_token() { return current_.token; } | |
| 288 | |
| 289 // One token look-ahead (past the token returned by Next()). | |
| 290 Token::Value peek() const { return next_.token; } | |
| 291 | |
| 292 struct Location { | 289 struct Location { |
| 293 Location(int b, int e) : beg_pos(b), end_pos(e) { } | 290 Location(int b, int e) : beg_pos(b), end_pos(e) { } |
| 294 Location() : beg_pos(0), end_pos(0) { } | 291 Location() : beg_pos(0), end_pos(0) { } |
| 295 | 292 |
| 296 bool IsValid() const { | 293 bool IsValid() const { |
| 297 return beg_pos >= 0 && end_pos >= beg_pos; | 294 return beg_pos >= 0 && end_pos >= beg_pos; |
| 298 } | 295 } |
| 299 | 296 |
| 300 static Location invalid() { return Location(-1, -1); } | 297 static Location invalid() { return Location(-1, -1); } |
| 301 | 298 |
| 302 int beg_pos; | 299 int beg_pos; |
| 303 int end_pos; | 300 int end_pos; |
| 304 }; | 301 }; |
| 305 | 302 |
| 303 // -1 is outside of the range of any real source code. |
| 304 static const int kNoOctalLocation = -1; |
| 305 |
| 306 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; |
| 307 |
| 308 explicit Scanner(UnicodeCache* scanner_contants); |
| 309 |
| 310 void Initialize(UC16CharacterStream* source); |
| 311 |
| 312 // Returns the next token and advances input. |
| 313 Token::Value Next(); |
| 314 // Returns the current token again. |
| 315 Token::Value current_token() { return current_.token; } |
| 306 // Returns the location information for the current token | 316 // Returns the location information for the current token |
| 307 // (the token returned by Next()). | 317 // (the token last returned by Next()). |
| 308 Location location() const { return current_.location; } | 318 Location location() const { return current_.location; } |
| 309 Location peek_location() const { return next_.location; } | |
| 310 | |
| 311 // Returns the literal string, if any, for the current token (the | 319 // Returns the literal string, if any, for the current token (the |
| 312 // token returned by Next()). The string is 0-terminated and in | 320 // token last returned by Next()). The string is 0-terminated. |
| 313 // UTF-8 format; they may contain 0-characters. Literal strings are | 321 // Literal strings are collected for identifiers, strings, and |
| 314 // collected for identifiers, strings, and numbers. | 322 // numbers. |
| 315 // These functions only give the correct result if the literal | 323 // These functions only give the correct result if the literal |
| 316 // was scanned between calls to StartLiteral() and TerminateLiteral(). | 324 // was scanned between calls to StartLiteral() and TerminateLiteral(). |
| 317 bool is_literal_ascii() { | |
| 318 ASSERT_NOT_NULL(current_.literal_chars); | |
| 319 return current_.literal_chars->is_ascii(); | |
| 320 } | |
| 321 Vector<const char> literal_ascii_string() { | 325 Vector<const char> literal_ascii_string() { |
| 322 ASSERT_NOT_NULL(current_.literal_chars); | 326 ASSERT_NOT_NULL(current_.literal_chars); |
| 323 return current_.literal_chars->ascii_literal(); | 327 return current_.literal_chars->ascii_literal(); |
| 324 } | 328 } |
| 325 Vector<const uc16> literal_uc16_string() { | 329 Vector<const uc16> literal_uc16_string() { |
| 326 ASSERT_NOT_NULL(current_.literal_chars); | 330 ASSERT_NOT_NULL(current_.literal_chars); |
| 327 return current_.literal_chars->uc16_literal(); | 331 return current_.literal_chars->uc16_literal(); |
| 328 } | 332 } |
| 333 bool is_literal_ascii() { |
| 334 ASSERT_NOT_NULL(current_.literal_chars); |
| 335 return current_.literal_chars->is_ascii(); |
| 336 } |
| 329 int literal_length() const { | 337 int literal_length() const { |
| 330 ASSERT_NOT_NULL(current_.literal_chars); | 338 ASSERT_NOT_NULL(current_.literal_chars); |
| 331 return current_.literal_chars->length(); | 339 return current_.literal_chars->length(); |
| 332 } | 340 } |
| 333 | 341 |
| 334 bool literal_contains_escapes() const { | 342 bool literal_contains_escapes() const { |
| 335 Location location = current_.location; | 343 Location location = current_.location; |
| 336 int source_length = (location.end_pos - location.beg_pos); | 344 int source_length = (location.end_pos - location.beg_pos); |
| 337 if (current_.token == Token::STRING) { | 345 if (current_.token == Token::STRING) { |
| 338 // Subtract delimiters. | 346 // Subtract delimiters. |
| 339 source_length -= 2; | 347 source_length -= 2; |
| 340 } | 348 } |
| 341 return current_.literal_chars->length() != source_length; | 349 return current_.literal_chars->length() != source_length; |
| 342 } | 350 } |
| 343 | 351 |
| 352 // Similar functions for the upcoming token. |
| 353 |
| 354 // One token look-ahead (past the token returned by Next()). |
| 355 Token::Value peek() const { return next_.token; } |
| 356 |
| 357 Location peek_location() const { return next_.location; } |
| 358 |
| 344 // Returns the literal string for the next token (the token that | 359 // Returns the literal string for the next token (the token that |
| 345 // would be returned if Next() were called). | 360 // would be returned if Next() were called). |
| 346 bool is_next_literal_ascii() { | |
| 347 ASSERT_NOT_NULL(next_.literal_chars); | |
| 348 return next_.literal_chars->is_ascii(); | |
| 349 } | |
| 350 Vector<const char> next_literal_ascii_string() { | 361 Vector<const char> next_literal_ascii_string() { |
| 351 ASSERT_NOT_NULL(next_.literal_chars); | 362 ASSERT_NOT_NULL(next_.literal_chars); |
| 352 return next_.literal_chars->ascii_literal(); | 363 return next_.literal_chars->ascii_literal(); |
| 353 } | 364 } |
| 354 Vector<const uc16> next_literal_uc16_string() { | 365 Vector<const uc16> next_literal_uc16_string() { |
| 355 ASSERT_NOT_NULL(next_.literal_chars); | 366 ASSERT_NOT_NULL(next_.literal_chars); |
| 356 return next_.literal_chars->uc16_literal(); | 367 return next_.literal_chars->uc16_literal(); |
| 357 } | 368 } |
| 369 bool is_next_literal_ascii() { |
| 370 ASSERT_NOT_NULL(next_.literal_chars); |
| 371 return next_.literal_chars->is_ascii(); |
| 372 } |
| 358 int next_literal_length() const { | 373 int next_literal_length() const { |
| 359 ASSERT_NOT_NULL(next_.literal_chars); | 374 ASSERT_NOT_NULL(next_.literal_chars); |
| 360 return next_.literal_chars->length(); | 375 return next_.literal_chars->length(); |
| 361 } | 376 } |
| 362 | 377 |
| 363 UnicodeCache* unicode_cache() { return unicode_cache_; } | 378 UnicodeCache* unicode_cache() { return unicode_cache_; } |
| 364 | 379 |
| 365 static const int kCharacterLookaheadBufferSize = 1; | 380 static const int kCharacterLookaheadBufferSize = 1; |
| 366 | 381 |
| 367 protected: | 382 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. |
| 383 uc32 ScanOctalEscape(uc32 c, int length); |
| 384 |
| 385 // Returns the location of the last seen octal literal. |
| 386 Location octal_position() const { return octal_pos_; } |
| 387 void clear_octal_position() { octal_pos_ = Location::invalid(); } |
| 388 |
| 389 // Seek forward to the given position. This operation does not |
| 390 // work in general, for instance when there are pushed back |
| 391 // characters, but works for seeking forward until simple delimiter |
| 392 // tokens, which is what it is used for. |
| 393 void SeekForward(int pos); |
| 394 |
| 395 bool HarmonyScoping() const { |
| 396 return harmony_scoping_; |
| 397 } |
| 398 void SetHarmonyScoping(bool block_scoping) { |
| 399 harmony_scoping_ = block_scoping; |
| 400 } |
| 401 |
| 402 |
| 403 // Returns true if there was a line terminator before the peek'ed token, |
| 404 // possibly inside a multi-line comment. |
| 405 bool HasAnyLineTerminatorBeforeNext() const { |
| 406 return has_line_terminator_before_next_ || |
| 407 has_multiline_comment_before_next_; |
| 408 } |
| 409 |
| 410 // Scans the input as a regular expression pattern, previous |
| 411 // character(s) must be /(=). Returns true if a pattern is scanned. |
| 412 bool ScanRegExpPattern(bool seen_equal); |
| 413 // Returns true if regexp flags are scanned (always since flags can |
| 414 // be empty). |
| 415 bool ScanRegExpFlags(); |
| 416 |
| 417 // Tells whether the buffer contains an identifier (no escapes). |
| 418 // Used for checking if a property name is an identifier. |
| 419 static bool IsIdentifier(unibrow::CharacterStream* buffer); |
| 420 |
| 421 private: |
| 368 // The current and look-ahead token. | 422 // The current and look-ahead token. |
| 369 struct TokenDesc { | 423 struct TokenDesc { |
| 370 Token::Value token; | 424 Token::Value token; |
| 371 Location location; | 425 Location location; |
| 372 LiteralBuffer* literal_chars; | 426 LiteralBuffer* literal_chars; |
| 373 }; | 427 }; |
| 374 | 428 |
| 375 // Call this after setting source_ to the input. | 429 // Call this after setting source_ to the input. |
| 376 void Init() { | 430 void Init() { |
| 377 // Set c0_ (one character ahead) | 431 // Set c0_ (one character ahead) |
| (...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 427 if (c0_ == next) { | 481 if (c0_ == next) { |
| 428 Advance(); | 482 Advance(); |
| 429 return then; | 483 return then; |
| 430 } else { | 484 } else { |
| 431 return else_; | 485 return else_; |
| 432 } | 486 } |
| 433 } | 487 } |
| 434 | 488 |
| 435 uc32 ScanHexNumber(int expected_length); | 489 uc32 ScanHexNumber(int expected_length); |
| 436 | 490 |
| 437 // Return the current source position. | |
| 438 int source_pos() { | |
| 439 return source_->pos() - kCharacterLookaheadBufferSize; | |
| 440 } | |
| 441 | |
| 442 UnicodeCache* unicode_cache_; | |
| 443 | |
| 444 // Buffers collecting literal strings, numbers, etc. | |
| 445 LiteralBuffer literal_buffer1_; | |
| 446 LiteralBuffer literal_buffer2_; | |
| 447 | |
| 448 TokenDesc current_; // desc for current token (as returned by Next()) | |
| 449 TokenDesc next_; // desc for next token (one token look-ahead) | |
| 450 | |
| 451 // Input stream. Must be initialized to an UC16CharacterStream. | |
| 452 UC16CharacterStream* source_; | |
| 453 | |
| 454 // One Unicode character look-ahead; c0_ < 0 at the end of the input. | |
| 455 uc32 c0_; | |
| 456 }; | |
| 457 | |
| 458 // ---------------------------------------------------------------------------- | |
| 459 // JavaScriptScanner - base logic for JavaScript scanning. | |
| 460 | |
| 461 class JavaScriptScanner : public Scanner { | |
| 462 public: | |
| 463 // A LiteralScope that disables recording of some types of JavaScript | |
| 464 // literals. If the scanner is configured to not record the specific | |
| 465 // type of literal, the scope will not call StartLiteral. | |
| 466 class LiteralScope { | |
| 467 public: | |
| 468 explicit LiteralScope(JavaScriptScanner* self) | |
| 469 : scanner_(self), complete_(false) { | |
| 470 scanner_->StartLiteral(); | |
| 471 } | |
| 472 ~LiteralScope() { | |
| 473 if (!complete_) scanner_->DropLiteral(); | |
| 474 } | |
| 475 void Complete() { | |
| 476 scanner_->TerminateLiteral(); | |
| 477 complete_ = true; | |
| 478 } | |
| 479 | |
| 480 private: | |
| 481 JavaScriptScanner* scanner_; | |
| 482 bool complete_; | |
| 483 }; | |
| 484 | |
| 485 explicit JavaScriptScanner(UnicodeCache* scanner_contants); | |
| 486 | |
| 487 void Initialize(UC16CharacterStream* source); | |
| 488 | |
| 489 // Returns the next token. | |
| 490 Token::Value Next(); | |
| 491 | |
| 492 // Returns true if there was a line terminator before the peek'ed token, | |
| 493 // possibly inside a multi-line comment. | |
| 494 bool HasAnyLineTerminatorBeforeNext() const { | |
| 495 return has_line_terminator_before_next_ || | |
| 496 has_multiline_comment_before_next_; | |
| 497 } | |
| 498 | |
| 499 // Scans the input as a regular expression pattern, previous | |
| 500 // character(s) must be /(=). Returns true if a pattern is scanned. | |
| 501 bool ScanRegExpPattern(bool seen_equal); | |
| 502 // Returns true if regexp flags are scanned (always since flags can | |
| 503 // be empty). | |
| 504 bool ScanRegExpFlags(); | |
| 505 | |
| 506 // Tells whether the buffer contains an identifier (no escapes). | |
| 507 // Used for checking if a property name is an identifier. | |
| 508 static bool IsIdentifier(unibrow::CharacterStream* buffer); | |
| 509 | |
| 510 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. | |
| 511 uc32 ScanOctalEscape(uc32 c, int length); | |
| 512 | |
| 513 // Returns the location of the last seen octal literal | |
| 514 Location octal_position() const { return octal_pos_; } | |
| 515 void clear_octal_position() { octal_pos_ = Location::invalid(); } | |
| 516 | |
| 517 // Seek forward to the given position. This operation does not | |
| 518 // work in general, for instance when there are pushed back | |
| 519 // characters, but works for seeking forward until simple delimiter | |
| 520 // tokens, which is what it is used for. | |
| 521 void SeekForward(int pos); | |
| 522 | |
| 523 bool HarmonyScoping() const { | |
| 524 return harmony_scoping_; | |
| 525 } | |
| 526 void SetHarmonyScoping(bool block_scoping) { | |
| 527 harmony_scoping_ = block_scoping; | |
| 528 } | |
| 529 | |
| 530 | |
| 531 protected: | |
| 532 bool SkipWhiteSpace(); | |
| 533 Token::Value SkipSingleLineComment(); | |
| 534 Token::Value SkipMultiLineComment(); | |
| 535 | |
| 536 // Scans a single JavaScript token. | 491 // Scans a single JavaScript token. |
| 537 void Scan(); | 492 void Scan(); |
| 538 | 493 |
| 494 bool SkipWhiteSpace(); |
| 495 Token::Value SkipSingleLineComment(); |
| 496 Token::Value SkipMultiLineComment(); |
| 497 // Scans a possible HTML comment -- begins with '<!'. |
| 498 Token::Value ScanHtmlComment(); |
| 499 |
| 539 void ScanDecimalDigits(); | 500 void ScanDecimalDigits(); |
| 540 Token::Value ScanNumber(bool seen_period); | 501 Token::Value ScanNumber(bool seen_period); |
| 541 Token::Value ScanIdentifierOrKeyword(); | 502 Token::Value ScanIdentifierOrKeyword(); |
| 542 Token::Value ScanIdentifierSuffix(LiteralScope* literal); | 503 Token::Value ScanIdentifierSuffix(LiteralScope* literal); |
| 543 | 504 |
| 544 void ScanEscape(); | 505 void ScanEscape(); |
| 545 Token::Value ScanString(); | 506 Token::Value ScanString(); |
| 546 | 507 |
| 547 // Scans a possible HTML comment -- begins with '<!'. | |
| 548 Token::Value ScanHtmlComment(); | |
| 549 | |
| 550 // Decodes a unicode escape-sequence which is part of an identifier. | 508 // Decodes a unicode escape-sequence which is part of an identifier. |
| 551 // If the escape sequence cannot be decoded the result is kBadChar. | 509 // If the escape sequence cannot be decoded the result is kBadChar. |
| 552 uc32 ScanIdentifierUnicodeEscape(); | 510 uc32 ScanIdentifierUnicodeEscape(); |
| 553 // Recognizes a uniocde escape-sequence and adds its characters, | 511 // Recognizes a uniocde escape-sequence and adds its characters, |
| 554 // uninterpreted, to the current literal. Used for parsing RegExp | 512 // uninterpreted, to the current literal. Used for parsing RegExp |
| 555 // flags. | 513 // flags. |
| 556 bool ScanLiteralUnicodeEscape(); | 514 bool ScanLiteralUnicodeEscape(); |
| 557 | 515 |
| 516 // Return the current source position. |
| 517 int source_pos() { |
| 518 return source_->pos() - kCharacterLookaheadBufferSize; |
| 519 } |
| 520 |
| 521 UnicodeCache* unicode_cache_; |
| 522 |
| 523 // Buffers collecting literal strings, numbers, etc. |
| 524 LiteralBuffer literal_buffer1_; |
| 525 LiteralBuffer literal_buffer2_; |
| 526 |
| 527 TokenDesc current_; // desc for current token (as returned by Next()) |
| 528 TokenDesc next_; // desc for next token (one token look-ahead) |
| 529 |
| 530 // Input stream. Must be initialized to an UC16CharacterStream. |
| 531 UC16CharacterStream* source_; |
| 532 |
| 533 |
| 558 // Start position of the octal literal last scanned. | 534 // Start position of the octal literal last scanned. |
| 559 Location octal_pos_; | 535 Location octal_pos_; |
| 560 | 536 |
| 537 // One Unicode character look-ahead; c0_ < 0 at the end of the input. |
| 538 uc32 c0_; |
| 539 |
| 561 // Whether there is a line terminator whitespace character after | 540 // Whether there is a line terminator whitespace character after |
| 562 // the current token, and before the next. Does not count newlines | 541 // the current token, and before the next. Does not count newlines |
| 563 // inside multiline comments. | 542 // inside multiline comments. |
| 564 bool has_line_terminator_before_next_; | 543 bool has_line_terminator_before_next_; |
| 565 // Whether there is a multi-line comment that contains a | 544 // Whether there is a multi-line comment that contains a |
| 566 // line-terminator after the current token, and before the next. | 545 // line-terminator after the current token, and before the next. |
| 567 bool has_multiline_comment_before_next_; | 546 bool has_multiline_comment_before_next_; |
| 568 // Whether we scan 'let' as a keyword for harmony block scoped | 547 // Whether we scan 'let' as a keyword for harmony block scoped |
| 569 // let bindings. | 548 // let bindings. |
| 570 bool harmony_scoping_; | 549 bool harmony_scoping_; |
| 571 }; | 550 }; |
| 572 | 551 |
| 573 } } // namespace v8::internal | 552 } } // namespace v8::internal |
| 574 | 553 |
| 575 #endif // V8_SCANNER_H_ | 554 #endif // V8_SCANNER_H_ |
| OLD | NEW |