| Index: src/scanner.h
|
| diff --git a/src/scanner.h b/src/scanner.h
|
| index a2e64a9d2000125a05167034f41026881bc44988..65a04a39e39bf0214fe6a7ba717693c60f11c9a0 100644
|
| --- a/src/scanner.h
|
| +++ b/src/scanner.h
|
| @@ -260,35 +260,32 @@ class LiteralBuffer {
|
|
|
|
|
| // ----------------------------------------------------------------------------
|
| -// Scanner base-class.
|
| +// JavaScript Scanner.
|
|
|
| -// Generic functionality used by both JSON and JavaScript scanners.
|
| class Scanner {
|
| public:
|
| - // -1 is outside of the range of any real source code.
|
| - static const int kNoOctalLocation = -1;
|
| -
|
| - typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;
|
| -
|
| + // Scoped helper for literal recording. Automatically drops the literal
|
| + // if aborting the scanning before it's complete.
|
| class LiteralScope {
|
| public:
|
| - explicit LiteralScope(Scanner* self);
|
| - ~LiteralScope();
|
| - void Complete();
|
| + explicit LiteralScope(Scanner* self)
|
| + : scanner_(self), complete_(false) {
|
| + scanner_->StartLiteral();
|
| + }
|
| + ~LiteralScope() {
|
| + if (!complete_) scanner_->DropLiteral();
|
| + }
|
| + void Complete() {
|
| + scanner_->TerminateLiteral();
|
| + complete_ = true;
|
| + }
|
|
|
| private:
|
| Scanner* scanner_;
|
| bool complete_;
|
| };
|
|
|
| - explicit Scanner(UnicodeCache* scanner_contants);
|
| -
|
| - // Returns the current token again.
|
| - Token::Value current_token() { return current_.token; }
|
| -
|
| - // One token look-ahead (past the token returned by Next()).
|
| - Token::Value peek() const { return next_.token; }
|
| -
|
| + // Representation of an interval of source positions.
|
| struct Location {
|
| Location(int b, int e) : beg_pos(b), end_pos(e) { }
|
| Location() : beg_pos(0), end_pos(0) { }
|
| @@ -303,21 +300,28 @@ class Scanner {
|
| int end_pos;
|
| };
|
|
|
| + // -1 is outside of the range of any real source code.
|
| + static const int kNoOctalLocation = -1;
|
| +
|
| + typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;
|
| +
|
| + explicit Scanner(UnicodeCache* scanner_contants);
|
| +
|
| + void Initialize(UC16CharacterStream* source);
|
| +
|
| + // Returns the next token and advances input.
|
| + Token::Value Next();
|
| + // Returns the current token again.
|
| + Token::Value current_token() { return current_.token; }
|
| // Returns the location information for the current token
|
| - // (the token returned by Next()).
|
| + // (the token last returned by Next()).
|
| Location location() const { return current_.location; }
|
| - Location peek_location() const { return next_.location; }
|
| -
|
| // Returns the literal string, if any, for the current token (the
|
| - // token returned by Next()). The string is 0-terminated and in
|
| - // UTF-8 format; they may contain 0-characters. Literal strings are
|
| - // collected for identifiers, strings, and numbers.
|
| + // token last returned by Next()). The string is 0-terminated.
|
| + // Literal strings are collected for identifiers, strings, and
|
| + // numbers.
|
| // These functions only give the correct result if the literal
|
| // was scanned between calls to StartLiteral() and TerminateLiteral().
|
| - bool is_literal_ascii() {
|
| - ASSERT_NOT_NULL(current_.literal_chars);
|
| - return current_.literal_chars->is_ascii();
|
| - }
|
| Vector<const char> literal_ascii_string() {
|
| ASSERT_NOT_NULL(current_.literal_chars);
|
| return current_.literal_chars->ascii_literal();
|
| @@ -326,6 +330,10 @@ class Scanner {
|
| ASSERT_NOT_NULL(current_.literal_chars);
|
| return current_.literal_chars->uc16_literal();
|
| }
|
| + bool is_literal_ascii() {
|
| + ASSERT_NOT_NULL(current_.literal_chars);
|
| + return current_.literal_chars->is_ascii();
|
| + }
|
| int literal_length() const {
|
| ASSERT_NOT_NULL(current_.literal_chars);
|
| return current_.literal_chars->length();
|
| @@ -341,12 +349,15 @@ class Scanner {
|
| return current_.literal_chars->length() != source_length;
|
| }
|
|
|
| + // Similar functions for the upcoming token.
|
| +
|
| + // One token look-ahead (past the token returned by Next()).
|
| + Token::Value peek() const { return next_.token; }
|
| +
|
| + Location peek_location() const { return next_.location; }
|
| +
|
| // Returns the literal string for the next token (the token that
|
| // would be returned if Next() were called).
|
| - bool is_next_literal_ascii() {
|
| - ASSERT_NOT_NULL(next_.literal_chars);
|
| - return next_.literal_chars->is_ascii();
|
| - }
|
| Vector<const char> next_literal_ascii_string() {
|
| ASSERT_NOT_NULL(next_.literal_chars);
|
| return next_.literal_chars->ascii_literal();
|
| @@ -355,6 +366,10 @@ class Scanner {
|
| ASSERT_NOT_NULL(next_.literal_chars);
|
| return next_.literal_chars->uc16_literal();
|
| }
|
| + bool is_next_literal_ascii() {
|
| + ASSERT_NOT_NULL(next_.literal_chars);
|
| + return next_.literal_chars->is_ascii();
|
| + }
|
| int next_literal_length() const {
|
| ASSERT_NOT_NULL(next_.literal_chars);
|
| return next_.literal_chars->length();
|
| @@ -364,7 +379,46 @@ class Scanner {
|
|
|
| static const int kCharacterLookaheadBufferSize = 1;
|
|
|
| - protected:
|
| + // Scans octal escape sequence. Also accepts "\0" decimal escape sequence.
|
| + uc32 ScanOctalEscape(uc32 c, int length);
|
| +
|
| + // Returns the location of the last seen octal literal.
|
| + Location octal_position() const { return octal_pos_; }
|
| + void clear_octal_position() { octal_pos_ = Location::invalid(); }
|
| +
|
| + // Seek forward to the given position. This operation does not
|
| + // work in general, for instance when there are pushed back
|
| + // characters, but works for seeking forward until simple delimiter
|
| + // tokens, which is what it is used for.
|
| + void SeekForward(int pos);
|
| +
|
| + bool HarmonyScoping() const {
|
| + return harmony_scoping_;
|
| + }
|
| + void SetHarmonyScoping(bool block_scoping) {
|
| + harmony_scoping_ = block_scoping;
|
| + }
|
| +
|
| +
|
| + // Returns true if there was a line terminator before the peek'ed token,
|
| + // possibly inside a multi-line comment.
|
| + bool HasAnyLineTerminatorBeforeNext() const {
|
| + return has_line_terminator_before_next_ ||
|
| + has_multiline_comment_before_next_;
|
| + }
|
| +
|
| + // Scans the input as a regular expression pattern, previous
|
| + // character(s) must be /(=). Returns true if a pattern is scanned.
|
| + bool ScanRegExpPattern(bool seen_equal);
|
| + // Returns true if regexp flags are scanned (always since flags can
|
| + // be empty).
|
| + bool ScanRegExpFlags();
|
| +
|
| + // Tells whether the buffer contains an identifier (no escapes).
|
| + // Used for checking if a property name is an identifier.
|
| + static bool IsIdentifier(unibrow::CharacterStream* buffer);
|
| +
|
| + private:
|
| // The current and look-ahead token.
|
| struct TokenDesc {
|
| Token::Value token;
|
| @@ -434,107 +488,14 @@ class Scanner {
|
|
|
| uc32 ScanHexNumber(int expected_length);
|
|
|
| - // Return the current source position.
|
| - int source_pos() {
|
| - return source_->pos() - kCharacterLookaheadBufferSize;
|
| - }
|
| -
|
| - UnicodeCache* unicode_cache_;
|
| -
|
| - // Buffers collecting literal strings, numbers, etc.
|
| - LiteralBuffer literal_buffer1_;
|
| - LiteralBuffer literal_buffer2_;
|
| -
|
| - TokenDesc current_; // desc for current token (as returned by Next())
|
| - TokenDesc next_; // desc for next token (one token look-ahead)
|
| -
|
| - // Input stream. Must be initialized to an UC16CharacterStream.
|
| - UC16CharacterStream* source_;
|
| -
|
| - // One Unicode character look-ahead; c0_ < 0 at the end of the input.
|
| - uc32 c0_;
|
| -};
|
| -
|
| -// ----------------------------------------------------------------------------
|
| -// JavaScriptScanner - base logic for JavaScript scanning.
|
| -
|
| -class JavaScriptScanner : public Scanner {
|
| - public:
|
| - // A LiteralScope that disables recording of some types of JavaScript
|
| - // literals. If the scanner is configured to not record the specific
|
| - // type of literal, the scope will not call StartLiteral.
|
| - class LiteralScope {
|
| - public:
|
| - explicit LiteralScope(JavaScriptScanner* self)
|
| - : scanner_(self), complete_(false) {
|
| - scanner_->StartLiteral();
|
| - }
|
| - ~LiteralScope() {
|
| - if (!complete_) scanner_->DropLiteral();
|
| - }
|
| - void Complete() {
|
| - scanner_->TerminateLiteral();
|
| - complete_ = true;
|
| - }
|
| -
|
| - private:
|
| - JavaScriptScanner* scanner_;
|
| - bool complete_;
|
| - };
|
| -
|
| - explicit JavaScriptScanner(UnicodeCache* scanner_contants);
|
| -
|
| - void Initialize(UC16CharacterStream* source);
|
| -
|
| - // Returns the next token.
|
| - Token::Value Next();
|
| -
|
| - // Returns true if there was a line terminator before the peek'ed token,
|
| - // possibly inside a multi-line comment.
|
| - bool HasAnyLineTerminatorBeforeNext() const {
|
| - return has_line_terminator_before_next_ ||
|
| - has_multiline_comment_before_next_;
|
| - }
|
| -
|
| - // Scans the input as a regular expression pattern, previous
|
| - // character(s) must be /(=). Returns true if a pattern is scanned.
|
| - bool ScanRegExpPattern(bool seen_equal);
|
| - // Returns true if regexp flags are scanned (always since flags can
|
| - // be empty).
|
| - bool ScanRegExpFlags();
|
| -
|
| - // Tells whether the buffer contains an identifier (no escapes).
|
| - // Used for checking if a property name is an identifier.
|
| - static bool IsIdentifier(unibrow::CharacterStream* buffer);
|
| -
|
| - // Scans octal escape sequence. Also accepts "\0" decimal escape sequence.
|
| - uc32 ScanOctalEscape(uc32 c, int length);
|
| -
|
| - // Returns the location of the last seen octal literal
|
| - Location octal_position() const { return octal_pos_; }
|
| - void clear_octal_position() { octal_pos_ = Location::invalid(); }
|
| -
|
| - // Seek forward to the given position. This operation does not
|
| - // work in general, for instance when there are pushed back
|
| - // characters, but works for seeking forward until simple delimiter
|
| - // tokens, which is what it is used for.
|
| - void SeekForward(int pos);
|
| -
|
| - bool HarmonyScoping() const {
|
| - return harmony_scoping_;
|
| - }
|
| - void SetHarmonyScoping(bool block_scoping) {
|
| - harmony_scoping_ = block_scoping;
|
| - }
|
| -
|
| + // Scans a single JavaScript token.
|
| + void Scan();
|
|
|
| - protected:
|
| bool SkipWhiteSpace();
|
| Token::Value SkipSingleLineComment();
|
| Token::Value SkipMultiLineComment();
|
| -
|
| - // Scans a single JavaScript token.
|
| - void Scan();
|
| + // Scans a possible HTML comment -- begins with '<!'.
|
| + Token::Value ScanHtmlComment();
|
|
|
| void ScanDecimalDigits();
|
| Token::Value ScanNumber(bool seen_period);
|
| @@ -544,9 +505,6 @@ class JavaScriptScanner : public Scanner {
|
| void ScanEscape();
|
| Token::Value ScanString();
|
|
|
| - // Scans a possible HTML comment -- begins with '<!'.
|
| - Token::Value ScanHtmlComment();
|
| -
|
| // Decodes a unicode escape-sequence which is part of an identifier.
|
| // If the escape sequence cannot be decoded the result is kBadChar.
|
| uc32 ScanIdentifierUnicodeEscape();
|
| @@ -555,9 +513,30 @@ class JavaScriptScanner : public Scanner {
|
| // flags.
|
| bool ScanLiteralUnicodeEscape();
|
|
|
| + // Return the current source position.
|
| + int source_pos() {
|
| + return source_->pos() - kCharacterLookaheadBufferSize;
|
| + }
|
| +
|
| + UnicodeCache* unicode_cache_;
|
| +
|
| + // Buffers collecting literal strings, numbers, etc.
|
| + LiteralBuffer literal_buffer1_;
|
| + LiteralBuffer literal_buffer2_;
|
| +
|
| + TokenDesc current_; // desc for current token (as returned by Next())
|
| + TokenDesc next_; // desc for next token (one token look-ahead)
|
| +
|
| + // Input stream. Must be initialized to an UC16CharacterStream.
|
| + UC16CharacterStream* source_;
|
| +
|
| +
|
| // Start position of the octal literal last scanned.
|
| Location octal_pos_;
|
|
|
| + // One Unicode character look-ahead; c0_ < 0 at the end of the input.
|
| + uc32 c0_;
|
| +
|
| // Whether there is a line terminator whitespace character after
|
| // the current token, and before the next. Does not count newlines
|
| // inside multiline comments.
|
|
|