Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(81)

Side by Side Diff: src/scanner-base.h

Issue 5188009: Merge preparser Scanner with main JavaScript scanner. (Closed)
Patch Set: Created 10 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2010 the V8 project authors. All rights reserved. 1 // Copyright 2010 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 309 matching lines...) Expand 10 before | Expand all | Expand 10 after
320 320
321 // One Unicode character look-ahead; c0_ < 0 at the end of the input. 321 // One Unicode character look-ahead; c0_ < 0 at the end of the input.
322 uc32 c0_; 322 uc32 c0_;
323 }; 323 };
324 324
325 // ---------------------------------------------------------------------------- 325 // ----------------------------------------------------------------------------
326 // JavaScriptScanner - base logic for JavaScript scanning. 326 // JavaScriptScanner - base logic for JavaScript scanning.
327 327
328 class JavaScriptScanner : public Scanner { 328 class JavaScriptScanner : public Scanner {
329 public: 329 public:
330
331 enum LiteralType {
332 kLiteralNumber = 1,
333 kLiteralIdentifier = 2,
334 kLiteralString = 4,
335 kLiteralRegExp = 8,
336 kLiteralRegExpFlags = 16
337 };
338
339 // More specialized literal scope.
Søren Thygesen Gjesse 2010/11/19 08:17:21 More specialized - in what sense?
Lasse Reichstein 2010/11/19 08:51:24 Elaborated.
340 class LiteralScope {
341 public:
342 LiteralScope(JavaScriptScanner* self, LiteralType type)
343 : scanner_(self), complete_(false) {
344 if (scanner_->RecordsLiteral(type)) {
345 scanner_->StartLiteral();
346 }
347 }
348 ~LiteralScope() {
349 if (!complete_) scanner_->DropLiteral();
350 }
351 void Complete() {
352 scanner_->TerminateLiteral();
353 complete_ = true;
354 }
355
356 private:
357 JavaScriptScanner* scanner_;
358 bool complete_;
359 };
360
330 JavaScriptScanner(); 361 JavaScriptScanner();
331 362
332 // Returns the next token. 363 // Returns the next token.
333 Token::Value Next(); 364 Token::Value Next();
334 365
335 // Returns true if there was a line terminator before the peek'ed token. 366 // Returns true if there was a line terminator before the peek'ed token.
336 bool has_line_terminator_before_next() const { 367 bool has_line_terminator_before_next() const {
337 return has_line_terminator_before_next_; 368 return has_line_terminator_before_next_;
338 } 369 }
339 370
340 // Scans the input as a regular expression pattern, previous 371 // Scans the input as a regular expression pattern, previous
341 // character(s) must be /(=). Returns true if a pattern is scanned. 372 // character(s) must be /(=). Returns true if a pattern is scanned.
342 bool ScanRegExpPattern(bool seen_equal); 373 bool ScanRegExpPattern(bool seen_equal);
343 // Returns true if regexp flags are scanned (always since flags can 374 // Returns true if regexp flags are scanned (always since flags can
344 // be empty). 375 // be empty).
345 bool ScanRegExpFlags(); 376 bool ScanRegExpFlags();
346 377
347 // Tells whether the buffer contains an identifier (no escapes). 378 // Tells whether the buffer contains an identifier (no escapes).
348 // Used for checking if a property name is an identifier. 379 // Used for checking if a property name is an identifier.
349 static bool IsIdentifier(unibrow::CharacterStream* buffer); 380 static bool IsIdentifier(unibrow::CharacterStream* buffer);
350 381
351 // Seek forward to the given position. This operation does not 382 // Seek forward to the given position. This operation does not
352 // work in general, for instance when there are pushed back 383 // work in general, for instance when there are pushed back
353 // characters, but works for seeking forward until simple delimiter 384 // characters, but works for seeking forward until simple delimiter
354 // tokens, which is what it is used for. 385 // tokens, which is what it is used for.
355 void SeekForward(int pos); 386 void SeekForward(int pos);
356 387
388 // Whether this scanner records the given literal type or not.
389 bool RecordsLiteral(LiteralType type) {
390 return (literal_flags_ & type) != 0;
391 }
392
357 protected: 393 protected:
358 bool SkipWhiteSpace(); 394 bool SkipWhiteSpace();
359 Token::Value SkipSingleLineComment(); 395 Token::Value SkipSingleLineComment();
360 Token::Value SkipMultiLineComment(); 396 Token::Value SkipMultiLineComment();
361 397
362 // Scans a single JavaScript token. 398 // Scans a single JavaScript token.
363 void Scan(); 399 void Scan();
364 400
365 void ScanDecimalDigits(); 401 void ScanDecimalDigits();
366 Token::Value ScanNumber(bool seen_period); 402 Token::Value ScanNumber(bool seen_period);
367 Token::Value ScanIdentifier(); 403 Token::Value ScanIdentifierOrKeyword();
404 Token::Value ScanIdentifierSuffix(LiteralScope* literal);
368 405
369 void ScanEscape(); 406 void ScanEscape();
370 Token::Value ScanString(); 407 Token::Value ScanString();
371 408
372 // Scans a possible HTML comment -- begins with '<!'. 409 // Scans a possible HTML comment -- begins with '<!'.
373 Token::Value ScanHtmlComment(); 410 Token::Value ScanHtmlComment();
374 411
375 // Decodes a unicode escape-sequence which is part of an identifier. 412 // Decodes a unicode escape-sequence which is part of an identifier.
376 // If the escape sequence cannot be decoded the result is kBadChar. 413 // If the escape sequence cannot be decoded the result is kBadChar.
377 uc32 ScanIdentifierUnicodeEscape(); 414 uc32 ScanIdentifierUnicodeEscape();
378 415
416 int literal_flags_;
379 bool has_line_terminator_before_next_; 417 bool has_line_terminator_before_next_;
380 }; 418 };
381 419
382 420
383 // ---------------------------------------------------------------------------- 421 // ----------------------------------------------------------------------------
384 // Keyword matching state machine. 422 // Keyword matching state machine.
385 423
386 class KeywordMatcher { 424 class KeywordMatcher {
387 // Incrementally recognize keywords. 425 // Incrementally recognize keywords.
388 // 426 //
389 // Recognized keywords: 427 // Recognized keywords:
390 // break case catch const* continue debugger* default delete do else 428 // break case catch const* continue debugger* default delete do else
391 // finally false for function if in instanceof native* new null 429 // finally false for function if in instanceof native* new null
392 // return switch this throw true try typeof var void while with 430 // return switch this throw true try typeof var void while with
393 // 431 //
394 // *: Actually "future reserved keywords". These are the only ones we 432 // *: Actually "future reserved keywords". These are the only ones we
395 // recognize, the remaining are allowed as identifiers. 433 // recognize, the remaining are allowed as identifiers.
396 // In ES5 strict mode, we should disallow all reserved keywords. 434 // In ES5 strict mode, we should disallow all reserved keywords.
397 public: 435 public:
398 KeywordMatcher() 436 KeywordMatcher()
399 : state_(INITIAL), 437 : state_(INITIAL),
400 token_(Token::IDENTIFIER), 438 token_(Token::IDENTIFIER),
401 keyword_(NULL), 439 keyword_(NULL),
402 counter_(0), 440 counter_(0),
403 keyword_token_(Token::ILLEGAL) {} 441 keyword_token_(Token::ILLEGAL) {}
404 442
405 Token::Value token() { return token_; } 443 Token::Value token() { return token_; }
406 444
407 inline void AddChar(unibrow::uchar input) { 445 inline bool AddChar(unibrow::uchar input) {
408 if (state_ != UNMATCHABLE) { 446 if (state_ != UNMATCHABLE) {
409 Step(input); 447 Step(input);
410 } 448 }
449 return state_ != UNMATCHABLE;
411 } 450 }
412 451
413 void Fail() { 452 void Fail() {
414 token_ = Token::IDENTIFIER; 453 token_ = Token::IDENTIFIER;
415 state_ = UNMATCHABLE; 454 state_ = UNMATCHABLE;
416 } 455 }
417 456
418 private: 457 private:
419 enum State { 458 enum State {
420 UNMATCHABLE, 459 UNMATCHABLE,
(...skipping 30 matching lines...) Expand all
451 kFirstCharRangeMax - kFirstCharRangeMin + 1; 490 kFirstCharRangeMax - kFirstCharRangeMin + 1;
452 // State map for first keyword character range. 491 // State map for first keyword character range.
453 static FirstState first_states_[kFirstCharRangeLength]; 492 static FirstState first_states_[kFirstCharRangeLength];
454 493
455 // If input equals keyword's character at position, continue matching keyword 494 // If input equals keyword's character at position, continue matching keyword
456 // from that position. 495 // from that position.
457 inline bool MatchKeywordStart(unibrow::uchar input, 496 inline bool MatchKeywordStart(unibrow::uchar input,
458 const char* keyword, 497 const char* keyword,
459 int position, 498 int position,
460 Token::Value token_if_match) { 499 Token::Value token_if_match) {
461 if (input == static_cast<unibrow::uchar>(keyword[position])) { 500 if (input != static_cast<unibrow::uchar>(keyword[position])) {
462 state_ = KEYWORD_PREFIX; 501 return false;
463 this->keyword_ = keyword;
464 this->counter_ = position + 1;
465 this->keyword_token_ = token_if_match;
466 return true;
467 } 502 }
468 return false; 503 state_ = KEYWORD_PREFIX;
504 this->keyword_ = keyword;
505 this->counter_ = position + 1;
506 this->keyword_token_ = token_if_match;
507 return true;
469 } 508 }
470 509
471 // If input equals match character, transition to new state and return true. 510 // If input equals match character, transition to new state and return true.
472 inline bool MatchState(unibrow::uchar input, char match, State new_state) { 511 inline bool MatchState(unibrow::uchar input, char match, State new_state) {
473 if (input == static_cast<unibrow::uchar>(match)) { 512 if (input != static_cast<unibrow::uchar>(match)) {
474 state_ = new_state; 513 return false;
475 return true;
476 } 514 }
477 return false; 515 state_ = new_state;
516 return true;
478 } 517 }
479 518
480 inline bool MatchKeyword(unibrow::uchar input, 519 inline bool MatchKeyword(unibrow::uchar input,
481 char match, 520 char match,
482 State new_state,
483 Token::Value keyword_token) { 521 Token::Value keyword_token) {
484 if (input != static_cast<unibrow::uchar>(match)) { 522 if (input != static_cast<unibrow::uchar>(match)) {
485 return false; 523 return false;
486 } 524 }
487 state_ = new_state; 525 state_ = KEYWORD_MATCHED;
488 token_ = keyword_token; 526 token_ = keyword_token;
489 return true; 527 return true;
490 } 528 }
491 529
492 void Step(unibrow::uchar input); 530 void Step(unibrow::uchar input);
493 531
494 // Current state. 532 // Current state.
495 State state_; 533 State state_;
496 // Token for currently added characters. 534 // Token for currently added characters.
497 Token::Value token_; 535 Token::Value token_;
498 536
499 // Matching a specific keyword string (there is only one possible valid 537 // Matching a specific keyword string (there is only one possible valid
500 // keyword with the current prefix). 538 // keyword with the current prefix).
501 const char* keyword_; 539 const char* keyword_;
502 int counter_; 540 int counter_;
503 Token::Value keyword_token_; 541 Token::Value keyword_token_;
504 }; 542 };
505 543
506 544
507 } } // namespace v8::internal 545 } } // namespace v8::internal
508 546
509 #endif // V8_SCANNER_BASE_H_ 547 #endif // V8_SCANNER_BASE_H_
OLDNEW
« src/scanner.h ('K') | « src/scanner.cc ('k') | src/scanner-base.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698