Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(944)

Side by Side Diff: src/scanner-base.h

Issue 5188009: Merge preparser Scanner with main JavaScript scanner. (Closed)
Patch Set: Address review. Fix thinko in keyword matcher. Created 10 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/scanner.cc ('k') | src/scanner-base.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2010 the V8 project authors. All rights reserved. 1 // Copyright 2010 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 309 matching lines...) Expand 10 before | Expand all | Expand 10 after
320 320
321 // One Unicode character look-ahead; c0_ < 0 at the end of the input. 321 // One Unicode character look-ahead; c0_ < 0 at the end of the input.
322 uc32 c0_; 322 uc32 c0_;
323 }; 323 };
324 324
325 // ---------------------------------------------------------------------------- 325 // ----------------------------------------------------------------------------
326 // JavaScriptScanner - base logic for JavaScript scanning. 326 // JavaScriptScanner - base logic for JavaScript scanning.
327 327
328 class JavaScriptScanner : public Scanner { 328 class JavaScriptScanner : public Scanner {
329 public: 329 public:
330
331 // Bit vector representing set of types of literals.
332 enum LiteralType {
333 kNoLiterals = 0,
334 kLiteralNumber = 1,
335 kLiteralIdentifier = 2,
336 kLiteralString = 4,
337 kLiteralRegExp = 8,
338 kLiteralRegExpFlags = 16,
339 kAllLiterals = 31
340 };
341
342 // A LiteralScope that disables recording of some types of JavaScript
343 // literals. If the scanner is configured to not record the specific
344 // type of literal, the scope will not call StartLiteral.
345 class LiteralScope {
346 public:
347 LiteralScope(JavaScriptScanner* self, LiteralType type)
348 : scanner_(self), complete_(false) {
349 if (scanner_->RecordsLiteral(type)) {
350 scanner_->StartLiteral();
351 }
352 }
353 ~LiteralScope() {
354 if (!complete_) scanner_->DropLiteral();
355 }
356 void Complete() {
357 scanner_->TerminateLiteral();
358 complete_ = true;
359 }
360
361 private:
362 JavaScriptScanner* scanner_;
363 bool complete_;
364 };
365
330 JavaScriptScanner(); 366 JavaScriptScanner();
331 367
332 // Returns the next token. 368 // Returns the next token.
333 Token::Value Next(); 369 Token::Value Next();
334 370
335 // Returns true if there was a line terminator before the peek'ed token. 371 // Returns true if there was a line terminator before the peek'ed token.
336 bool has_line_terminator_before_next() const { 372 bool has_line_terminator_before_next() const {
337 return has_line_terminator_before_next_; 373 return has_line_terminator_before_next_;
338 } 374 }
339 375
340 // Scans the input as a regular expression pattern, previous 376 // Scans the input as a regular expression pattern, previous
341 // character(s) must be /(=). Returns true if a pattern is scanned. 377 // character(s) must be /(=). Returns true if a pattern is scanned.
342 bool ScanRegExpPattern(bool seen_equal); 378 bool ScanRegExpPattern(bool seen_equal);
343 // Returns true if regexp flags are scanned (always since flags can 379 // Returns true if regexp flags are scanned (always since flags can
344 // be empty). 380 // be empty).
345 bool ScanRegExpFlags(); 381 bool ScanRegExpFlags();
346 382
347 // Tells whether the buffer contains an identifier (no escapes). 383 // Tells whether the buffer contains an identifier (no escapes).
348 // Used for checking if a property name is an identifier. 384 // Used for checking if a property name is an identifier.
349 static bool IsIdentifier(unibrow::CharacterStream* buffer); 385 static bool IsIdentifier(unibrow::CharacterStream* buffer);
350 386
351 // Seek forward to the given position. This operation does not 387 // Seek forward to the given position. This operation does not
352 // work in general, for instance when there are pushed back 388 // work in general, for instance when there are pushed back
353 // characters, but works for seeking forward until simple delimiter 389 // characters, but works for seeking forward until simple delimiter
354 // tokens, which is what it is used for. 390 // tokens, which is what it is used for.
355 void SeekForward(int pos); 391 void SeekForward(int pos);
356 392
393 // Whether this scanner records the given literal type or not.
394 bool RecordsLiteral(LiteralType type) {
395 return (literal_flags_ & type) != 0;
396 }
397
357 protected: 398 protected:
358 bool SkipWhiteSpace(); 399 bool SkipWhiteSpace();
359 Token::Value SkipSingleLineComment(); 400 Token::Value SkipSingleLineComment();
360 Token::Value SkipMultiLineComment(); 401 Token::Value SkipMultiLineComment();
361 402
362 // Scans a single JavaScript token. 403 // Scans a single JavaScript token.
363 void Scan(); 404 void Scan();
364 405
365 void ScanDecimalDigits(); 406 void ScanDecimalDigits();
366 Token::Value ScanNumber(bool seen_period); 407 Token::Value ScanNumber(bool seen_period);
367 Token::Value ScanIdentifier(); 408 Token::Value ScanIdentifierOrKeyword();
409 Token::Value ScanIdentifierSuffix(LiteralScope* literal);
368 410
369 void ScanEscape(); 411 void ScanEscape();
370 Token::Value ScanString(); 412 Token::Value ScanString();
371 413
372 // Scans a possible HTML comment -- begins with '<!'. 414 // Scans a possible HTML comment -- begins with '<!'.
373 Token::Value ScanHtmlComment(); 415 Token::Value ScanHtmlComment();
374 416
375 // Decodes a unicode escape-sequence which is part of an identifier. 417 // Decodes a unicode escape-sequence which is part of an identifier.
376 // If the escape sequence cannot be decoded the result is kBadChar. 418 // If the escape sequence cannot be decoded the result is kBadChar.
377 uc32 ScanIdentifierUnicodeEscape(); 419 uc32 ScanIdentifierUnicodeEscape();
378 420
421 int literal_flags_;
379 bool has_line_terminator_before_next_; 422 bool has_line_terminator_before_next_;
380 }; 423 };
381 424
382 425
383 // ---------------------------------------------------------------------------- 426 // ----------------------------------------------------------------------------
384 // Keyword matching state machine. 427 // Keyword matching state machine.
385 428
386 class KeywordMatcher { 429 class KeywordMatcher {
387 // Incrementally recognize keywords. 430 // Incrementally recognize keywords.
388 // 431 //
389 // Recognized keywords: 432 // Recognized keywords:
390 // break case catch const* continue debugger* default delete do else 433 // break case catch const* continue debugger* default delete do else
391 // finally false for function if in instanceof native* new null 434 // finally false for function if in instanceof native* new null
392 // return switch this throw true try typeof var void while with 435 // return switch this throw true try typeof var void while with
393 // 436 //
394 // *: Actually "future reserved keywords". These are the only ones we 437 // *: Actually "future reserved keywords". These are the only ones we
395 // recognize, the remaining are allowed as identifiers. 438 // recognize, the remaining are allowed as identifiers.
396 // In ES5 strict mode, we should disallow all reserved keywords. 439 // In ES5 strict mode, we should disallow all reserved keywords.
397 public: 440 public:
398 KeywordMatcher() 441 KeywordMatcher()
399 : state_(INITIAL), 442 : state_(INITIAL),
400 token_(Token::IDENTIFIER), 443 token_(Token::IDENTIFIER),
401 keyword_(NULL), 444 keyword_(NULL),
402 counter_(0), 445 counter_(0),
403 keyword_token_(Token::ILLEGAL) {} 446 keyword_token_(Token::ILLEGAL) {}
404 447
405 Token::Value token() { return token_; } 448 Token::Value token() { return token_; }
406 449
407 inline void AddChar(unibrow::uchar input) { 450 inline bool AddChar(unibrow::uchar input) {
408 if (state_ != UNMATCHABLE) { 451 if (state_ != UNMATCHABLE) {
409 Step(input); 452 Step(input);
410 } 453 }
454 return state_ != UNMATCHABLE;
411 } 455 }
412 456
413 void Fail() { 457 void Fail() {
414 token_ = Token::IDENTIFIER; 458 token_ = Token::IDENTIFIER;
415 state_ = UNMATCHABLE; 459 state_ = UNMATCHABLE;
416 } 460 }
417 461
418 private: 462 private:
419 enum State { 463 enum State {
420 UNMATCHABLE, 464 UNMATCHABLE,
(...skipping 30 matching lines...) Expand all
451 kFirstCharRangeMax - kFirstCharRangeMin + 1; 495 kFirstCharRangeMax - kFirstCharRangeMin + 1;
452 // State map for first keyword character range. 496 // State map for first keyword character range.
453 static FirstState first_states_[kFirstCharRangeLength]; 497 static FirstState first_states_[kFirstCharRangeLength];
454 498
455 // If input equals keyword's character at position, continue matching keyword 499 // If input equals keyword's character at position, continue matching keyword
456 // from that position. 500 // from that position.
457 inline bool MatchKeywordStart(unibrow::uchar input, 501 inline bool MatchKeywordStart(unibrow::uchar input,
458 const char* keyword, 502 const char* keyword,
459 int position, 503 int position,
460 Token::Value token_if_match) { 504 Token::Value token_if_match) {
461 if (input == static_cast<unibrow::uchar>(keyword[position])) { 505 if (input != static_cast<unibrow::uchar>(keyword[position])) {
462 state_ = KEYWORD_PREFIX; 506 return false;
463 this->keyword_ = keyword;
464 this->counter_ = position + 1;
465 this->keyword_token_ = token_if_match;
466 return true;
467 } 507 }
468 return false; 508 state_ = KEYWORD_PREFIX;
509 this->keyword_ = keyword;
510 this->counter_ = position + 1;
511 this->keyword_token_ = token_if_match;
512 return true;
469 } 513 }
470 514
471 // If input equals match character, transition to new state and return true. 515 // If input equals match character, transition to new state and return true.
472 inline bool MatchState(unibrow::uchar input, char match, State new_state) { 516 inline bool MatchState(unibrow::uchar input, char match, State new_state) {
473 if (input == static_cast<unibrow::uchar>(match)) { 517 if (input != static_cast<unibrow::uchar>(match)) {
474 state_ = new_state; 518 return false;
475 return true;
476 } 519 }
477 return false; 520 state_ = new_state;
521 return true;
478 } 522 }
479 523
480 inline bool MatchKeyword(unibrow::uchar input, 524 inline bool MatchKeyword(unibrow::uchar input,
481 char match, 525 char match,
482 State new_state, 526 State new_state,
483 Token::Value keyword_token) { 527 Token::Value keyword_token) {
484 if (input != static_cast<unibrow::uchar>(match)) { 528 if (input != static_cast<unibrow::uchar>(match)) {
485 return false; 529 return false;
486 } 530 }
487 state_ = new_state; 531 state_ = new_state;
(...skipping 12 matching lines...) Expand all
500 // keyword with the current prefix). 544 // keyword with the current prefix).
501 const char* keyword_; 545 const char* keyword_;
502 int counter_; 546 int counter_;
503 Token::Value keyword_token_; 547 Token::Value keyword_token_;
504 }; 548 };
505 549
506 550
507 } } // namespace v8::internal 551 } } // namespace v8::internal
508 552
509 #endif // V8_SCANNER_BASE_H_ 553 #endif // V8_SCANNER_BASE_H_
OLDNEW
« no previous file with comments | « src/scanner.cc ('k') | src/scanner-base.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698