src/scanner.h - Issue 196133017: Experimental parser: merge r19949

Unified Diff: src/scanner.h

Issue 196133017: Experimental parser: merge r19949 (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 6 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/scanner.h

diff --git a/src/scanner.h b/src/scanner.h

index d351f99350e81233b529943e8530e720326ff578..bfed2367149f010eae07eccda23cd0ed2df26815 100644

--- a/src/scanner.h

+++ b/src/scanner.h

@@ -44,6 +44,9 @@ namespace v8 {

namespace internal {

+class ParserRecorder;

// Returns the value (0 .. 15) of a hexadecimal character c.

// If c is not a legal hexadecimal character, returns a value < 0.

inline int HexValue(uc32 c) {

@@ -120,8 +123,8 @@ class Utf16CharacterStream {

virtual bool ReadBlock() = 0;

virtual unsigned SlowSeekForward(unsigned code_unit_count) = 0;

- const uc16* buffer_cursor_;

- const uc16* buffer_end_;

+ const uint16_t* buffer_cursor_;

+ const uint16_t* buffer_end_;

unsigned pos_;

};

@@ -197,32 +200,32 @@ class DuplicateFinder {

backing_store_(16),

map_(&Match) { }

- int AddAsciiSymbol(Vector<const char> key, int value);

- int AddUtf16Symbol(Vector<const uint16_t> key, int value);

+ int AddOneByteSymbol(Vector<const uint8_t> key, int value);

+ int AddTwoByteSymbol(Vector<const uint16_t> key, int value);

// Add a a number literal by converting it (if necessary)

// to the string that ToString(ToNumber(literal)) would generate.

// and then adding that string with AddAsciiSymbol.

// This string is the actual value used as key in an object literal,

// and the one that must be different from the other keys.

- int AddNumber(Vector<const char> key, int value);

+ int AddNumber(Vector<const uint8_t> key, int value);

private:

- int AddSymbol(Vector<const byte> key, bool is_ascii, int value);

+ int AddSymbol(Vector<const uint8_t> key, bool is_one_byte, int value);

// Backs up the key and its length in the backing store.

// The backup is stored with a base 127 encoding of the

- // length (plus a bit saying whether the string is ASCII),

+ // length (plus a bit saying whether the string is one byte),

// followed by the bytes of the key.

- byte* BackupKey(Vector<const byte> key, bool is_ascii);

+ uint8_t* BackupKey(Vector<const uint8_t> key, bool is_one_byte);

// Compare two encoded keys (both pointing into the backing store)

// for having the same base-127 encoded lengths and ASCII-ness,

// and then having the same 'length' bytes following.

static bool Match(void* first, void* second);

// Creates a hash from a sequence of bytes.

- static uint32_t Hash(Vector<const byte> key, bool is_ascii);

+ static uint32_t Hash(Vector<const uint8_t> key, bool is_one_byte);

// Checks whether a string containing a JS number is its canonical

// form.

- static bool IsNumberCanonical(Vector<const char> key);

+ static bool IsNumberCanonical(Vector<const uint8_t> key);

// Size of buffer. Sufficient for using it to call DoubleToCString in

// from conversions.h.

@@ -242,7 +245,7 @@ class DuplicateFinder {

class LiteralBuffer {

public:

- LiteralBuffer() : is_ascii_(true), position_(0), backing_store_() { }

+ LiteralBuffer() : is_one_byte_(true), position_(0), backing_store_() { }

~LiteralBuffer() {

if (backing_store_.length() > 0) {

@@ -252,48 +255,48 @@ class LiteralBuffer {

INLINE(void AddChar(uint32_t code_unit)) {

if (position_ >= backing_store_.length()) ExpandBuffer();

- if (is_ascii_) {

+ if (is_one_byte_) {

if (code_unit <= unibrow::Latin1::kMaxChar) {

backing_store_[position_] = static_cast<byte>(code_unit);

position_ += kOneByteSize;

return;

}

- ConvertToUtf16();

+ ConvertToTwoByte();

}

ASSERT(code_unit < 0x10000u);

- *reinterpret_cast<uc16*>(&backing_store_[position_]) = code_unit;

+ *reinterpret_cast<uint16_t*>(&backing_store_[position_]) = code_unit;

position_ += kUC16Size;

}

- bool is_ascii() { return is_ascii_; }

+ bool is_one_byte() { return is_one_byte_; }

bool is_contextual_keyword(Vector<const char> keyword) {

- return is_ascii() && keyword.length() == position_ &&

+ return is_one_byte() && keyword.length() == position_ &&

(memcmp(keyword.start(), backing_store_.start(), position_) == 0);

}

- Vector<const uc16> utf16_literal() {

- ASSERT(!is_ascii_);

+ Vector<const uint16_t> two_byte_literal() {

+ ASSERT(!is_one_byte_);

ASSERT((position_ & 0x1) == 0);

- return Vector<const uc16>(

- reinterpret_cast<const uc16*>(backing_store_.start()),

+ return Vector<const uint16_t>(

+ reinterpret_cast<const uint16_t*>(backing_store_.start()),

position_ >> 1);

}

- Vector<const char> ascii_literal() {

- ASSERT(is_ascii_);

- return Vector<const char>(

- reinterpret_cast<const char*>(backing_store_.start()),

+ Vector<const uint8_t> one_byte_literal() {

+ ASSERT(is_one_byte_);

+ return Vector<const uint8_t>(

+ reinterpret_cast<const uint8_t*>(backing_store_.start()),

position_);

}

int length() {

- return is_ascii_ ? position_ : (position_ >> 1);

+ return is_one_byte_ ? position_ : (position_ >> 1);

}

void Reset() {

position_ = 0;

- is_ascii_ = true;

+ is_one_byte_ = true;

}

private:

@@ -314,8 +317,8 @@ class LiteralBuffer {

backing_store_ = new_store;

}

- void ConvertToUtf16() {

- ASSERT(is_ascii_);

+ void ConvertToTwoByte() {

+ ASSERT(is_one_byte_);

Vector<byte> new_store;

int new_content_size = position_ * kUC16Size;

if (new_content_size >= backing_store_.length()) {

@@ -326,7 +329,7 @@ class LiteralBuffer {

new_store = backing_store_;

}

uint8_t* src = backing_store_.start();

- uc16* dst = reinterpret_cast<uc16*>(new_store.start());

+ uint16_t* dst = reinterpret_cast<uint16_t*>(new_store.start());

for (int i = position_ - 1; i >= 0; i--) {

dst[i] = src[i];

}

@@ -335,10 +338,10 @@ class LiteralBuffer {

backing_store_ = new_store;

}

position_ = new_content_size;

- is_ascii_ = false;

+ is_one_byte_ = false;

}

- bool is_ascii_;

+ bool is_one_byte_;

int position_;

Vector<byte> backing_store_;

@@ -404,32 +407,13 @@ class Scanner {

// Returns the location information for the current token

// (the token last returned by Next()).

Location location() const { return current_.location; }

- // Returns the literal string, if any, for the current token (the

- // token last returned by Next()). The string is 0-terminated.

- // Literal strings are collected for identifiers, strings, and

- // numbers.

- // These functions only give the correct result if the literal

- // was scanned between calls to StartLiteral() and TerminateLiteral().

- Vector<const char> literal_ascii_string() {

- ASSERT_NOT_NULL(current_.literal_chars);

- return current_.literal_chars->ascii_literal();

- }

- Vector<const uc16> literal_utf16_string() {

- ASSERT_NOT_NULL(current_.literal_chars);

- return current_.literal_chars->utf16_literal();

- }

- bool is_literal_ascii() {

- ASSERT_NOT_NULL(current_.literal_chars);

- return current_.literal_chars->is_ascii();

- }

- bool is_literal_contextual_keyword(Vector<const char> keyword) {

- ASSERT_NOT_NULL(current_.literal_chars);

- return current_.literal_chars->is_contextual_keyword(keyword);

- }

- int literal_length() const {

- ASSERT_NOT_NULL(current_.literal_chars);

- return current_.literal_chars->length();

- }

+ // Similar functions for the upcoming token.

+ // One token look-ahead (past the token returned by Next()).

+ Token::Value peek() const { return next_.token; }

+ Location peek_location() const { return next_.location; }

bool literal_contains_escapes() const {

Location location = current_.location;

@@ -440,43 +424,47 @@ class Scanner {

}

return current_.literal_chars->length() != source_length;

}

- // Similar functions for the upcoming token.

- // One token look-ahead (past the token returned by Next()).

- Token::Value peek() const { return next_.token; }

- Location peek_location() const { return next_.location; }

- // Returns the literal string for the next token (the token that

- // would be returned if Next() were called).

- Vector<const char> next_literal_ascii_string() {

- ASSERT_NOT_NULL(next_.literal_chars);

- return next_.literal_chars->ascii_literal();

- }

- Vector<const uc16> next_literal_utf16_string() {

- ASSERT_NOT_NULL(next_.literal_chars);

- return next_.literal_chars->utf16_literal();

- }

- bool is_next_literal_ascii() {

- ASSERT_NOT_NULL(next_.literal_chars);

- return next_.literal_chars->is_ascii();

+ bool is_literal_contextual_keyword(Vector<const char> keyword) {

+ ASSERT_NOT_NULL(current_.literal_chars);

+ return current_.literal_chars->is_contextual_keyword(keyword);

}

bool is_next_contextual_keyword(Vector<const char> keyword) {

ASSERT_NOT_NULL(next_.literal_chars);

return next_.literal_chars->is_contextual_keyword(keyword);

}

- int next_literal_length() const {

- ASSERT_NOT_NULL(next_.literal_chars);

- return next_.literal_chars->length();

+ Handle<String> AllocateNextLiteralString(Isolate* isolate,

+ PretenureFlag tenured);

+ Handle<String> AllocateInternalizedString(Isolate* isolate);

+ double DoubleValue();

+ bool UnescapedLiteralMatches(const char* data, int length) {

+ if (is_literal_one_byte() &&

+ literal_length() == length &&

+ !literal_contains_escapes()) {

+ const char* token =

+ reinterpret_cast<const char*>(literal_one_byte_string().start());

+ return !strncmp(token, data, length);

+ }

+ return false;

+ }

+ void IsGetOrSet(bool* is_get, bool* is_set) {

+ if (is_literal_one_byte() &&

+ literal_length() == 3 &&

+ !literal_contains_escapes()) {

+ const char* token =

+ reinterpret_cast<const char*>(literal_one_byte_string().start());

+ *is_get = strncmp(token, "get", 3) == 0;

+ *is_set = !*is_get && strncmp(token, "set", 3) == 0;

+ }

}

- UnicodeCache* unicode_cache() { return unicode_cache_; }

+ int FindNumber(DuplicateFinder* finder, int value);

+ int FindSymbol(DuplicateFinder* finder, int value);

- static const int kCharacterLookaheadBufferSize = 1;

+ void LogSymbol(ParserRecorder* log, int position);

- // Scans octal escape sequence. Also accepts "\0" decimal escape sequence.

- uc32 ScanOctalEscape(uc32 c, int length);

+ UnicodeCache* unicode_cache() { return unicode_cache_; }

// Returns the location of the last seen octal literal.

Location octal_position() const { return octal_pos_; }

@@ -529,6 +517,11 @@ class Scanner {

LiteralBuffer* literal_chars;

};

+ static const int kCharacterLookaheadBufferSize = 1;

+ // Scans octal escape sequence. Also accepts "\0" decimal escape sequence.

+ uc32 ScanOctalEscape(uc32 c, int length);

// Call this after setting source_ to the input.

void Init() {

// Set c0_ (one character ahead)

@@ -589,6 +582,47 @@ class Scanner {

}

+ // Returns the literal string, if any, for the current token (the

+ // token last returned by Next()). The string is 0-terminated.

+ // Literal strings are collected for identifiers, strings, and

+ // numbers.

+ // These functions only give the correct result if the literal

+ // was scanned between calls to StartLiteral() and TerminateLiteral().

+ Vector<const uint8_t> literal_one_byte_string() {

+ ASSERT_NOT_NULL(current_.literal_chars);

+ return current_.literal_chars->one_byte_literal();

+ }

+ Vector<const uint16_t> literal_two_byte_string() {

+ ASSERT_NOT_NULL(current_.literal_chars);

+ return current_.literal_chars->two_byte_literal();

+ }

+ bool is_literal_one_byte() {

+ ASSERT_NOT_NULL(current_.literal_chars);

+ return current_.literal_chars->is_one_byte();

+ }

+ int literal_length() const {

+ ASSERT_NOT_NULL(current_.literal_chars);

+ return current_.literal_chars->length();

+ }

+ // Returns the literal string for the next token (the token that

+ // would be returned if Next() were called).

+ Vector<const uint8_t> next_literal_one_byte_string() {

+ ASSERT_NOT_NULL(next_.literal_chars);

+ return next_.literal_chars->one_byte_literal();

+ }

+ Vector<const uint16_t> next_literal_two_byte_string() {

+ ASSERT_NOT_NULL(next_.literal_chars);

+ return next_.literal_chars->two_byte_literal();

+ }

+ bool is_next_literal_one_byte() {

+ ASSERT_NOT_NULL(next_.literal_chars);

+ return next_.literal_chars->is_one_byte();

+ }

+ int next_literal_length() const {

+ ASSERT_NOT_NULL(next_.literal_chars);

+ return next_.literal_chars->length();

+ }

uc32 ScanHexNumber(int expected_length);

// Scans a single JavaScript token.

« no previous file with comments | « src/safepoint-table.h ('k') | src/scanner.cc » ('j') | no next file with comments »