| Index: src/scanner-base.h
|
| diff --git a/src/scanner-base.h b/src/scanner-base.h
|
| index 50f30305c4aa3e150bedb3965668c56678d8af1b..4952f1cd650cea9f9447c01a75c95ad7e2f9aa5c 100644
|
| --- a/src/scanner-base.h
|
| +++ b/src/scanner-base.h
|
| @@ -37,11 +37,24 @@
|
| #include "unicode-inl.h"
|
| #include "char-predicates.h"
|
| #include "utils.h"
|
| +#include "list-inl.h"
|
|
|
| namespace v8 {
|
| namespace internal {
|
|
|
| -// Interface through which the scanner reads characters from the input source.
|
| +// Returns the value (0 .. 15) of a hexadecimal character c.
|
| +// If c is not a legal hexadecimal character, returns a value < 0.
|
| +inline int HexValue(uc32 c) {
|
| + c -= '0';
|
| + if (static_cast<unsigned>(c) <= 9) return c;
|
| + c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36.
|
| + if (static_cast<unsigned>(c) <= 6) return c + 10;
|
| + return -1;
|
| +}
|
| +
|
| +// ----------------------------------------------------------------------------
|
| +// UTF16Buffer - scanner input source with pushback.
|
| +
|
| class UTF16Buffer {
|
| public:
|
| UTF16Buffer();
|
| @@ -54,7 +67,11 @@ class UTF16Buffer {
|
|
|
| int pos() const { return pos_; }
|
|
|
| + static const int kNoEndPosition = 1;
|
| +
|
| protected:
|
| + // Initial value of end_ before the input stream is initialized.
|
| +
|
| int pos_; // Current position in the buffer.
|
| int end_; // Position where scanning should stop (EOF).
|
| };
|
| @@ -79,6 +96,292 @@ class ScannerConstants : AllStatic {
|
| static StaticResource<Utf8Decoder> utf8_decoder_;
|
| };
|
|
|
| +// ----------------------------------------------------------------------------
|
| +// LiteralCollector - Collector of chars of literals.
|
| +
|
| +class LiteralCollector {
|
| + public:
|
| + LiteralCollector();
|
| + ~LiteralCollector();
|
| +
|
| + inline void AddChar(uc32 c) {
|
| + if (recording_) {
|
| + if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) {
|
| + buffer_.Add(static_cast<char>(c));
|
| + } else {
|
| + AddCharSlow(c);
|
| + }
|
| + }
|
| + }
|
| +
|
| + void StartLiteral() {
|
| + buffer_.StartSequence();
|
| + recording_ = true;
|
| + }
|
| +
|
| + Vector<const char> EndLiteral() {
|
| + if (recording_) {
|
| + recording_ = false;
|
| + buffer_.Add(kEndMarker);
|
| + Vector<char> sequence = buffer_.EndSequence();
|
| + return Vector<const char>(sequence.start(), sequence.length());
|
| + }
|
| + return Vector<const char>();
|
| + }
|
| +
|
| + void DropLiteral() {
|
| + if (recording_) {
|
| + recording_ = false;
|
| + buffer_.DropSequence();
|
| + }
|
| + }
|
| +
|
| + void Reset() {
|
| + buffer_.Reset();
|
| + }
|
| +
|
| + // The end marker added after a parsed literal.
|
| + // Using zero allows the usage of strlen and similar functions on
|
| + // identifiers and numbers (but not strings, since they may contain zero
|
| + // bytes).
|
| + static const char kEndMarker = '\x00';
|
| + private:
|
| + static const int kInitialCapacity = 256;
|
| + SequenceCollector<char, 4> buffer_;
|
| + bool recording_;
|
| + void AddCharSlow(uc32 c);
|
| +};
|
| +
|
| +// ----------------------------------------------------------------------------
|
| +// Scanner base-class.
|
| +
|
| +// Generic functionality used by both JSON and JavaScript scanners.
|
| +class Scanner {
|
| + public:
|
| + typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;
|
| +
|
| + class LiteralScope {
|
| + public:
|
| + explicit LiteralScope(Scanner* self);
|
| + ~LiteralScope();
|
| + void Complete();
|
| +
|
| + private:
|
| + Scanner* scanner_;
|
| + bool complete_;
|
| + };
|
| +
|
| + Scanner();
|
| +
|
| + // Returns the current token again.
|
| + Token::Value current_token() { return current_.token; }
|
| +
|
| + // One token look-ahead (past the token returned by Next()).
|
| + Token::Value peek() const { return next_.token; }
|
| +
|
| + struct Location {
|
| + Location(int b, int e) : beg_pos(b), end_pos(e) { }
|
| + Location() : beg_pos(0), end_pos(0) { }
|
| + int beg_pos;
|
| + int end_pos;
|
| + };
|
| +
|
| + // Returns the location information for the current token
|
| + // (the token returned by Next()).
|
| + Location location() const { return current_.location; }
|
| + Location peek_location() const { return next_.location; }
|
| +
|
| + // Returns the literal string, if any, for the current token (the
|
| + // token returned by Next()). The string is 0-terminated and in
|
| + // UTF-8 format; they may contain 0-characters. Literal strings are
|
| + // collected for identifiers, strings, and numbers.
|
| + // These functions only give the correct result if the literal
|
| + // was scanned between calls to StartLiteral() and TerminateLiteral().
|
| + const char* literal_string() const {
|
| + return current_.literal_chars.start();
|
| + }
|
| +
|
| + int literal_length() const {
|
| + // Excluding terminal '\x00' added by TerminateLiteral().
|
| + return current_.literal_chars.length() - 1;
|
| + }
|
| +
|
| + Vector<const char> literal() const {
|
| + return Vector<const char>(literal_string(), literal_length());
|
| + }
|
| +
|
| + // Returns the literal string for the next token (the token that
|
| + // would be returned if Next() were called).
|
| + const char* next_literal_string() const {
|
| + return next_.literal_chars.start();
|
| + }
|
| +
|
| +
|
| + // Returns the length of the next token (that would be returned if
|
| + // Next() were called).
|
| + int next_literal_length() const {
|
| + // Excluding terminal '\x00' added by TerminateLiteral().
|
| + return next_.literal_chars.length() - 1;
|
| + }
|
| +
|
| + Vector<const char> next_literal() const {
|
| + return Vector<const char>(next_literal_string(), next_literal_length());
|
| + }
|
| +
|
| + bool stack_overflow() { return stack_overflow_; }
|
| +
|
| + static const int kCharacterLookaheadBufferSize = 1;
|
| +
|
| + protected:
|
| + // The current and look-ahead token.
|
| + struct TokenDesc {
|
| + Token::Value token;
|
| + Location location;
|
| + Vector<const char> literal_chars;
|
| + };
|
| +
|
| + // Call this after setting source_ to the input.
|
| + void Init() {
|
| + // Set c0_ (one character ahead)
|
| + ASSERT(kCharacterLookaheadBufferSize == 1);
|
| + Advance();
|
| + // Initialize current_ to not refer to a literal.
|
| + current_.literal_chars = Vector<const char>();
|
| + // Reset literal buffer.
|
| + literal_buffer_.Reset();
|
| + }
|
| +
|
| + // Literal buffer support
|
| + inline void StartLiteral() {
|
| + literal_buffer_.StartLiteral();
|
| + }
|
| +
|
| + inline void AddLiteralChar(uc32 c) {
|
| + literal_buffer_.AddChar(c);
|
| + }
|
| +
|
| + // Complete scanning of a literal.
|
| + inline void TerminateLiteral() {
|
| + next_.literal_chars = literal_buffer_.EndLiteral();
|
| + }
|
| +
|
| + // Stops scanning of a literal and drop the collected characters,
|
| + // e.g., due to an encountered error.
|
| + inline void DropLiteral() {
|
| + literal_buffer_.DropLiteral();
|
| + }
|
| +
|
| + inline void AddLiteralCharAdvance() {
|
| + AddLiteralChar(c0_);
|
| + Advance();
|
| + }
|
| +
|
| + // Low-level scanning support.
|
| + void Advance() { c0_ = source_->Advance(); }
|
| + void PushBack(uc32 ch) {
|
| + source_->PushBack(ch);
|
| + c0_ = ch;
|
| + }
|
| +
|
| + inline Token::Value Select(Token::Value tok) {
|
| + Advance();
|
| + return tok;
|
| + }
|
| +
|
| + inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) {
|
| + Advance();
|
| + if (c0_ == next) {
|
| + Advance();
|
| + return then;
|
| + } else {
|
| + return else_;
|
| + }
|
| + }
|
| +
|
| + uc32 ScanHexEscape(uc32 c, int length);
|
| + uc32 ScanOctalEscape(uc32 c, int length);
|
| +
|
| + // Return the current source position.
|
| + int source_pos() {
|
| + return source_->pos() - kCharacterLookaheadBufferSize;
|
| + }
|
| +
|
| + TokenDesc current_; // desc for current token (as returned by Next())
|
| + TokenDesc next_; // desc for next token (one token look-ahead)
|
| +
|
| + // Input stream. Must be initialized to an UTF16Buffer.
|
| + UTF16Buffer* source_;
|
| +
|
| + // Buffer to hold literal values (identifiers, strings, numbers)
|
| + // using '\x00'-terminated UTF-8 encoding. Handles allocation internally.
|
| + LiteralCollector literal_buffer_;
|
| +
|
| + bool stack_overflow_;
|
| +
|
| + // One Unicode character look-ahead; c0_ < 0 at the end of the input.
|
| + uc32 c0_;
|
| +};
|
| +
|
| +// ----------------------------------------------------------------------------
|
| +// JavaScriptScanner - base logic for JavaScript scanning.
|
| +
|
| +class JavaScriptScanner : public Scanner {
|
| + public:
|
| + JavaScriptScanner();
|
| +
|
| + // Returns the next token.
|
| + Token::Value Next();
|
| +
|
| + // Returns true if there was a line terminator before the peek'ed token.
|
| + bool has_line_terminator_before_next() const {
|
| + return has_line_terminator_before_next_;
|
| + }
|
| +
|
| + // Scans the input as a regular expression pattern, previous
|
| + // character(s) must be /(=). Returns true if a pattern is scanned.
|
| + bool ScanRegExpPattern(bool seen_equal);
|
| + // Returns true if regexp flags are scanned (always since flags can
|
| + // be empty).
|
| + bool ScanRegExpFlags();
|
| +
|
| + // Tells whether the buffer contains an identifier (no escapes).
|
| + // Used for checking if a property name is an identifier.
|
| + static bool IsIdentifier(unibrow::CharacterStream* buffer);
|
| +
|
| + // Seek forward to the given position. This operation does not
|
| + // work in general, for instance when there are pushed back
|
| + // characters, but works for seeking forward until simple delimiter
|
| + // tokens, which is what it is used for.
|
| + void SeekForward(int pos);
|
| +
|
| + protected:
|
| + bool SkipWhiteSpace();
|
| + Token::Value SkipSingleLineComment();
|
| + Token::Value SkipMultiLineComment();
|
| +
|
| + // Scans a single JavaScript token.
|
| + void Scan();
|
| +
|
| + void ScanDecimalDigits();
|
| + Token::Value ScanNumber(bool seen_period);
|
| + Token::Value ScanIdentifier();
|
| +
|
| + void ScanEscape();
|
| + Token::Value ScanString();
|
| +
|
| + // Scans a possible HTML comment -- begins with '<!'.
|
| + Token::Value ScanHtmlComment();
|
| +
|
| + // Decodes a unicode escape-sequence which is part of an identifier.
|
| + // If the escape sequence cannot be decoded the result is kBadChar.
|
| + uc32 ScanIdentifierUnicodeEscape();
|
| +
|
| + bool has_line_terminator_before_next_;
|
| +};
|
| +
|
| +
|
| +// ----------------------------------------------------------------------------
|
| +// Keyword matching state machine.
|
|
|
| class KeywordMatcher {
|
| // Incrementally recognize keywords.
|
|
|