src/lexer/experimental-scanner.h - Issue 88653003: Add literal handling to experimental scanner.

Unified Diff: src/lexer/experimental-scanner.h

Issue 88653003: Add literal handling to experimental scanner. (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Landing Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/lexer/experimental-scanner.h

diff --git a/src/lexer/experimental-scanner.h b/src/lexer/experimental-scanner.h

index ef65ee5cc1e199559dacadaedd8de47df9ea2e48..20f0adbe2c200741cb79a2b90e9e1353fc8629c0 100644

--- a/src/lexer/experimental-scanner.h

+++ b/src/lexer/experimental-scanner.h

@@ -36,6 +36,7 @@

#include "token.h"

#include "utils.h"

#include "v8stdint.h"

+#include "char-predicates-inl.h"

namespace v8 {

namespace internal {

@@ -64,6 +65,9 @@ class ScannerBase {

: isolate_(isolate),

unicode_cache_(isolate->unicode_cache()),

has_line_terminator_before_next_(true),

+ current_literal_(&literals_[0]),

+ next_literal_(&literals_[1]),

+ octal_pos_(Location::invalid()),

harmony_numeric_literals_(false),

harmony_modules_(false),

harmony_scoping_(false) {

@@ -89,6 +93,7 @@ class ScannerBase {

Token::Value Next() {

has_line_terminator_before_next_ = false;

current_ = next_;

+ std::swap(current_literal_, next_literal_);

Scan(); // Virtual! Will fill in next_.

return current_.token;

}

@@ -138,48 +143,83 @@ class ScannerBase {

// multiline comments? Atm doesn't look like we need to.

}

- // FIXME: implement these

Vector<const char> literal_ascii_string() {

- return Vector<const char>(); // FIXME

+ if (!current_literal_->Valid(current_.beg_pos)) {

+ FillLiteral(current_, current_literal_);

+ }

+ return current_literal_->ascii_string;

}

Vector<const uc16> literal_utf16_string() {

- return Vector<const uc16>(); // FIXME

+ if (!current_literal_->Valid(current_.beg_pos)) {

+ FillLiteral(current_, current_literal_);

+ }

+ return current_literal_->utf16_string;

+ }

+ int literal_length() {

+ if (!current_literal_->Valid(current_.beg_pos)) {

+ FillLiteral(current_, current_literal_);

+ }

+ return current_literal_->length;

}

bool is_literal_ascii() {

- return true; // FIXME

+ if (!current_literal_->Valid(current_.beg_pos)) {

+ FillLiteral(current_, current_literal_);

+ }

+ return current_literal_->is_ascii;

}

bool is_literal_contextual_keyword(Vector<const char> keyword) {

- return false; // FIXME

- }

- int literal_length() const {

- return 0; // FIXME

+ if (!is_literal_ascii()) return false;

+ Vector<const char> literal = literal_ascii_string();

+ return literal.length() == keyword.length() &&

+ (memcmp(literal.start(), keyword.start(), literal.length()) == 0);

}

bool literal_contains_escapes() const {

- return false; // FIXME

+ return current_.has_escapes;

}

Vector<const char> next_literal_ascii_string() {

- return Vector<const char>(); // FIXME

+ if (!next_literal_->Valid(next_.beg_pos)) {

+ FillLiteral(next_, next_literal_);

+ }

+ return next_literal_->ascii_string;

}

Vector<const uc16> next_literal_utf16_string() {

- return Vector<const uc16>(); // FIXME

+ if (!next_literal_->Valid(next_.beg_pos)) {

+ FillLiteral(next_, next_literal_);

+ }

+ return next_literal_->utf16_string;

+ }

+ int next_literal_length() {

+ if (!next_literal_->Valid(next_.beg_pos)) {

+ FillLiteral(next_, next_literal_);

+ }

+ return next_literal_->length;

}

bool is_next_literal_ascii() {

- return true; // FIXME

+ if (!next_literal_->Valid(next_.beg_pos)) {

+ FillLiteral(next_, next_literal_);

+ }

+ return next_literal_->is_ascii;

}

bool is_next_contextual_keyword(Vector<const char> keyword) {

- return false; // FIXME

- }

- int next_literal_length() const {

- return 0; // FIXME

+ if (!is_next_literal_ascii()) return false;

+ Vector<const char> literal = next_literal_ascii_string();

+ return literal.length() == keyword.length() &&

+ (memcmp(literal.start(), keyword.start(), literal.length()) == 0);

}

- uc32 ScanOctalEscape(uc32 c, int length) { return 0; } // FIXME

- Location octal_position() const {

- return Location(0, 0); // FIXME

- }

- void clear_octal_position() { } // FIXME

+ // Returns the location of the last seen octal literal.

+ Location octal_position() const { return octal_pos_; }

+ void clear_octal_position() { octal_pos_ = Location::invalid(); }

// Seek forward to the given position. This operation works for simple cases

// such as seeking forward until simple delimiter tokens, which is what it is

@@ -187,6 +227,7 @@ class ScannerBase {

// the "next" token. The "current" token will be invalid. FIXME: for utf-8,

// we need to decide if pos is counted in characters or in bytes.

virtual void SeekForward(int pos) = 0;

+ virtual void SetEnd(int pos) = 0;

// Scans the input as a regular expression pattern, previous character(s) must

// be /(=). Returns true if a pattern is scanned. FIXME: this won't work for

@@ -204,10 +245,21 @@ class ScannerBase {

bool has_escapes;

};

+ struct LiteralDesc {

+ int beg_pos;

+ bool is_ascii;

+ int length;

+ Vector<const char> ascii_string;

+ Vector<const uc16> utf16_string;

+ LiteralBuffer buffer;

+ bool Valid(int pos) { return beg_pos == pos; }

+ };

virtual void Scan() = 0;

virtual void SetBufferBasedOnHandle() = 0;

static void UpdateBuffersAfterGC(v8::Isolate*, GCType, GCCallbackFlags);

+ virtual bool FillLiteral(const TokenDesc& token, LiteralDesc* literal) = 0;

Isolate* isolate_;

UnicodeCache* unicode_cache_;

@@ -217,6 +269,12 @@ class ScannerBase {

TokenDesc current_; // desc for current token (as returned by Next())

TokenDesc next_; // desc for next token (one token look-ahead)

+ LiteralDesc* current_literal_;

+ LiteralDesc* next_literal_;

+ LiteralDesc literals_[2];

+ Location octal_pos_;

bool harmony_numeric_literals_;

bool harmony_modules_;

bool harmony_scoping_;

@@ -246,8 +304,10 @@ class ExperimentalScanner : public ScannerBase {

virtual ~ExperimentalScanner() { }

+ protected:

virtual void Scan();

virtual void SeekForward(int pos);

+ virtual void SetEnd(int pos);

virtual bool ScanRegExpPattern(bool seen_equal);

virtual bool ScanRegExpFlags();

@@ -270,6 +330,8 @@ class ExperimentalScanner : public ScannerBase {

const Char* GetNewBufferBasedOnHandle() const;

+ virtual bool FillLiteral(const TokenDesc& token, LiteralDesc* literal);

private:

bool ValidIdentifierPart() {

return unicode_cache_->IsIdentifierPart(ScanHexNumber(4));

@@ -282,6 +344,19 @@ class ExperimentalScanner : public ScannerBase {

uc32 ScanHexNumber(int length);

bool ScanLiteralUnicodeEscape();

+ const Char* ScanHexNumber(const Char* start,

+ const Char* end,

+ uc32* result);

+ const Char* ScanOctalEscape(const Char* start,

+ const Char* end,

+ uc32* result);

+ const Char* ScanIdentifierUnicodeEscape(const Char* start,

+ const Char* end,

+ uc32* result);

+ const Char* ScanEscape(const Char* start,

+ const Char* end,

+ LiteralBuffer* literal);

Handle<String> source_handle_;

const Char* buffer_;

const Char* buffer_end_;

@@ -302,6 +377,12 @@ void ExperimentalScanner<Char>::SeekForward(int pos) {

template<typename Char>

+void ExperimentalScanner<Char>::SetEnd(int pos) {

+ buffer_end_ = buffer_ + pos;

+template<typename Char>

bool ExperimentalScanner<Char>::ScanRegExpPattern(bool seen_equal) {

// Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags

bool in_character_class = false;

@@ -360,6 +441,7 @@ bool ExperimentalScanner<Char>::ScanRegExpFlags() {

return true;

}

template<typename Char>

uc32 ExperimentalScanner<Char>::ScanHexNumber(int length) {

// We have seen \uXXXX, let's see what it is.

@@ -374,6 +456,51 @@ uc32 ExperimentalScanner<Char>::ScanHexNumber(int length) {

return x;

}

+template<typename Char>

+const Char* ExperimentalScanner<Char>::ScanHexNumber(

+ const Char* cursor, const Char* end, uc32* result) {

+ uc32 x = 0;

+ for ( ; cursor < end; ++cursor) {

+ int d = HexValue(*cursor);

+ if (d < 0) {

+ *result = -1;

+ return NULL;

+ }

+ x = x * 16 + d;

+ }

+ *result = x;

+ return cursor;

+// Octal escapes of the forms '\0xx' and '\xxx' are not a part of

+// ECMA-262. Other JS VMs support them.

+template<typename Char>

+const Char* ExperimentalScanner<Char>::ScanOctalEscape(

+ const Char* start, const Char* end, uc32* result) {

+ uc32 x = *result - '0';

+ const Char* cursor;

+ for (cursor = start; cursor < end; cursor++) {

+ int d = *cursor - '0';

+ if (d < 0 || d > 7) break;

+ int nx = x * 8 + d;

+ if (nx >= 256) break;

+ x = nx;

+ }

+ // Anything except '\0' is an octal escape sequence, illegal in strict mode.

+ // Remember the position of octal escape sequences so that an error

+ // can be reported later (in strict mode).

+ // We don't report the error immediately, because the octal escape can

+ // occur before the "use strict" directive.

+ if (*result != '0' || cursor > start) {

+ octal_pos_ = Location(start - 1 - buffer_, cursor - 1 - buffer_);

+ }

+ *result = x;

+ return cursor;

template<typename Char>

bool ExperimentalScanner<Char>::ScanLiteralUnicodeEscape() {

ASSERT(cursor_ < buffer_end_);

@@ -395,6 +522,78 @@ bool ExperimentalScanner<Char>::ScanLiteralUnicodeEscape() {

}

+template<typename Char>

+const Char* ExperimentalScanner<Char>::ScanIdentifierUnicodeEscape(

+ const Char* cursor, const Char* end, uc32* result) {

+ ASSERT(*cursor == '\\');

+ if (++cursor >= end) return NULL;

+ if (*cursor != 'u') return NULL;

+ ++cursor;

+ if (cursor + 4 > end) return NULL;

+ cursor = ScanHexNumber(cursor, cursor + 4, result);

+ return cursor;

+template<typename Char>

+const Char* ExperimentalScanner<Char>::ScanEscape(

+ const Char* cursor, const Char* end, LiteralBuffer* literal) {

+ ASSERT(*cursor == '\\');

+ if (++cursor >= end) return NULL;

+ uc32 c = *cursor;

+ if (++cursor > end) return NULL;

+ // Skip escaped newlines.

+ if (unicode_cache_->IsLineTerminator(c)) {

+ uc32 peek = *cursor;

+ // Allow CR+LF newlines in multiline string literals.

+ if (IsCarriageReturn(c) && IsLineFeed(peek)) cursor++;

+ // Allow LF+CR newlines in multiline string literals.

+ if (IsLineFeed(c) && IsCarriageReturn(peek)) cursor++;

+ return cursor;

+ }

+ switch (c) {

+ case '\'': // fall through

+ case '"' : // fall through

+ case '\\': break;

+ case 'b' : c = '\b'; break;

+ case 'f' : c = '\f'; break;

+ case 'n' : c = '\n'; break;

+ case 'r' : c = '\r'; break;

+ case 't' : c = '\t'; break;

+ case 'u' : {

+ if (end > cursor + 4) return NULL;

+ cursor = ScanHexNumber(cursor, cursor + 4, &c);

+ if (cursor == NULL) return NULL;

+ break;

+ }

+ case 'v' : c = '\v'; break;

+ case 'x' : {

+ if (end > cursor + 2) return NULL ;

+ cursor = ScanHexNumber(cursor, cursor + 2, &c);

+ if (cursor == NULL) return NULL;

+ break;

+ }

+ case '0' : // fall through

+ case '1' : // fall through

+ case '2' : // fall through

+ case '3' : // fall through

+ case '4' : // fall through

+ case '5' : // fall through

+ case '6' : // fall through

+ case '7' :

+ if (end > cursor + 2) end = cursor + 2;

+ cursor = ScanOctalEscape(cursor, end, &c); break;

+ }

+ // According to ECMA-262, section 7.8.4, characters not covered by the

+ // above cases should be illegal, but they are commonly handled as

+ // non-escaped characters by JS VMs.

+ literal->AddChar(c);

+ return cursor;

} }

#endif // V8_LEXER_EXPERIMENTAL_SCANNER_H

« no previous file with comments | « no previous file | src/lexer/experimental-scanner.cc » ('j') | no next file with comments »