src/scanner.h - Issue 3181036: Created collector class and used it to collect identifiers during scanning.

Unified Diff: src/scanner.h

Issue 3181036: Created collector class and used it to collect identifiers during scanning. (Closed)

Patch Set: Created 10 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: src/scanner.h

diff --git a/src/scanner.h b/src/scanner.h

index 2dce5a18e0d9904c0d7a788a70b751e592f68c12..1c256f15b88147e0203fe34742f2a889809c682f 100644

--- a/src/scanner.h

+++ b/src/scanner.h

@@ -40,45 +40,36 @@ class UTF8Buffer {

UTF8Buffer();

~UTF8Buffer();

- void AddChar(uc32 c) {

- ASSERT_NOT_NULL(data_);

- if (cursor_ <= limit_ &&

- static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) {

- *cursor_++ = static_cast<char>(c);

+ inline void AddChar(uc32 c) {

+ if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) {

+ buffer_.Add(static_cast<char>(c));

} else {

AddCharSlow(c);

}

- void Reset() {

- if (data_ == NULL) {

- data_ = NewArray<char>(kInitialCapacity);

- limit_ = ComputeLimit(data_, kInitialCapacity);

- }

- cursor_ = data_;

+ void StartLiteral() {

+ buffer_.StartSequence();

}

- int pos() const {

- ASSERT_NOT_NULL(data_);

- return static_cast<int>(cursor_ - data_);

+ Vector<const char> EndLiteral() {

+ buffer_.Add(kEndMarker);

+ Vector<char> sequence = buffer_.EndSequence();

+ return Vector<const char>(sequence.start(), sequence.length());

}

- char* data() const { return data_; }

+ // The end marker added after a parsed literal.

+ // Using zero allows the usage of strlen and similar functions on

+ // identifiers and numbers (but not strings, since they may contain zero

+ // bytes).

+ // TODO(lrn): Use '\xff' as end marker, since it cannot occur inside

+ // an utf-8 string. This requires changes in all places that uses

+ // str-functions on the literals, but allows a single pointer to represent

+ // the literal, even if it contains embedded zeros.

+ static const char kEndMarker = '\x00';

private:

static const int kInitialCapacity = 256;

- char* data_;

- char* cursor_;

- char* limit_;

- int Capacity() const {

- ASSERT_NOT_NULL(data_);

- return static_cast<int>(limit_ - data_) + unibrow::Utf8::kMaxEncodedSize;

- }

- static char* ComputeLimit(char* data, int capacity) {

- return (data + capacity) - unibrow::Utf8::kMaxEncodedSize;

- }

+ SequenceCollector<char> buffer_;

void AddCharSlow(uc32 c);

};

@@ -314,27 +305,34 @@ class Scanner {

// These functions only give the correct result if the literal

// was scanned between calls to StartLiteral() and TerminateLiteral().

const char* literal_string() const {

- return current_.literal_buffer->data();

+ return current_.literal_chars.start();

}

int literal_length() const {

- // Excluding terminal '\0' added by TerminateLiteral().

- return current_.literal_buffer->pos() - 1;

+ // Excluding terminal '\x00' added by TerminateLiteral().

+ return current_.literal_chars.length() - 1;

+ }

+ Vector<const char> literal() const {

+ return Vector<const char>(literal_string(), literal_length());

}

// Returns the literal string for the next token (the token that

// would be returned if Next() were called).

const char* next_literal_string() const {

- return next_.literal_buffer->data();

+ return next_.literal_chars.start();

}

// Returns the length of the next token (that would be returned if

// Next() were called).

int next_literal_length() const {

- return next_.literal_buffer->pos() - 1;

+ // Excluding terminal '\x00' added by TerminateLiteral().

+ return next_.literal_chars.length() - 1;

}

Vector<const char> next_literal() const {

- return Vector<const char>(next_literal_string(),

- next_literal_length());

+ return Vector<const char>(next_literal_string(), next_literal_length());

}

// Scans the input as a regular expression pattern, previous

@@ -371,7 +369,7 @@ class Scanner {

struct TokenDesc {

Token::Value token;

Location location;

- UTF8Buffer* literal_buffer;

+ Vector<const char> literal_chars;

};

void Init(Handle<String> source,

@@ -380,10 +378,10 @@ class Scanner {

ParserLanguage language);

// Literal buffer support

- void StartLiteral();

- void AddChar(uc32 ch);

- void AddCharAdvance();

- void TerminateLiteral();

+ inline void StartLiteral();

+ inline void AddChar(uc32 ch);

+ inline void AddCharAdvance();

+ inline void TerminateLiteral();

// Low-level scanning support.

void Advance() { c0_ = source_->Advance(); }

@@ -487,9 +485,8 @@ class Scanner {

SafeStringInputBuffer safe_string_input_buffer_;

// Buffer to hold literal values (identifiers, strings, numbers)

- // using 0-terminated UTF-8 encoding.

- UTF8Buffer literal_buffer_1_;

- UTF8Buffer literal_buffer_2_;

+ // using '\x00'-terminated UTF-8 encoding. Handles allocation internally.

+ UTF8Buffer literal_buffer_;

bool stack_overflow_;

static StaticResource<Utf8Decoder> utf8_decoder_;

« no previous file with comments | « src/runtime.cc ('k') | src/scanner.cc » ('j') | src/utils.h » ('J')