| Index: src/lexer/lexer.cc
|
| diff --git a/src/lexer/lexer.cc b/src/lexer/lexer.cc
|
| index df1e6541858b9edeac70ad05cc842f1348a69224..fe6df5998c567f81ef77c4d1bb406cd977cc527d 100644
|
| --- a/src/lexer/lexer.cc
|
| +++ b/src/lexer/lexer.cc
|
| @@ -139,10 +139,10 @@ void LexerGCHandler::UpdateLexersAfterGC() {
|
|
|
| LexerBase::LexerBase(UnicodeCache* unicode_cache)
|
| : unicode_cache_(unicode_cache),
|
| - has_line_terminator_before_next_(true),
|
| - has_multiline_comment_before_next_(false),
|
| current_literal_(&literals_[0]),
|
| next_literal_(&literals_[1]),
|
| + has_line_terminator_before_next_(true),
|
| + has_multiline_comment_before_next_(false),
|
| harmony_numeric_literals_(false),
|
| harmony_modules_(false),
|
| harmony_scoping_(false) {
|
| @@ -170,14 +170,13 @@ Lexer<Char>::Lexer(UnicodeCache* unicode_cache,
|
| : LexerBase(unicode_cache),
|
| isolate_(NULL),
|
| source_ptr_(source_ptr),
|
| - start_position_(0),
|
| end_position_(length),
|
| - buffer_(NULL),
|
| - buffer_end_(NULL),
|
| - start_(NULL),
|
| - cursor_(NULL),
|
| + buffer_(source_ptr),
|
| + buffer_end_(source_ptr + length),
|
| + start_(source_ptr),
|
| + cursor_(source_ptr),
|
| last_octal_end_(NULL) {
|
| - CHECK(false); // not yet supported
|
| + current_.beg_pos = current_.end_pos = next_.beg_pos = next_.end_pos = 0;
|
| }
|
|
|
|
|
| @@ -190,20 +189,16 @@ Lexer<Char>::Lexer(UnicodeCache* unicode_cache,
|
| isolate_(source->GetIsolate()),
|
| source_handle_(FlattenGetString(source)),
|
| source_ptr_(NULL),
|
| - start_position_(start_position),
|
| end_position_(end_position),
|
| buffer_(NULL),
|
| buffer_end_(NULL),
|
| start_(NULL),
|
| cursor_(NULL),
|
| last_octal_end_(NULL) {
|
| + cursor_ += start_position;
|
| UpdateBufferBasedOnHandle();
|
| - current_.beg_pos = current_.end_pos = next_.beg_pos = next_.end_pos = 0;
|
| isolate_->lexer_gc_handler()->AddLexer(this);
|
| - // TODO(dcarney): move this to UpdateBufferBasedOnHandle
|
| - cursor_ = buffer_ + start_position;
|
| - buffer_end_ = buffer_ + end_position;
|
| - start_ = cursor_;
|
| + current_.beg_pos = current_.end_pos = next_.beg_pos = next_.end_pos = 0;
|
| }
|
|
|
|
|
| @@ -215,16 +210,19 @@ Lexer<Char>::~Lexer() {
|
| }
|
|
|
|
|
| +// TODO(dcarney): utf8 handling
|
| template<typename Char>
|
| void Lexer<Char>::SeekForward(int pos) {
|
| + // TODO(dcarney): utf8 handling
|
| cursor_ = buffer_ + pos;
|
| start_ = cursor_;
|
| has_line_terminator_before_next_ = false;
|
| has_multiline_comment_before_next_ = false;
|
| - Scan(); // Fills in next_.
|
| + Scan();
|
| }
|
|
|
|
|
| +// TODO(dcarney): utf8 handling
|
| template<typename Char>
|
| bool Lexer<Char>::ScanRegExpPattern(bool seen_equal) {
|
| // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags
|
| @@ -269,6 +267,7 @@ bool Lexer<Char>::ScanRegExpPattern(bool seen_equal) {
|
| }
|
|
|
|
|
| +// TODO(dcarney): utf8 handling
|
| template<typename Char>
|
| bool Lexer<Char>::ScanRegExpFlags() {
|
| next_.beg_pos = cursor_ - buffer_;
|
| @@ -302,7 +301,7 @@ uc32 Lexer<Char>::ScanHexNumber(int length) {
|
|
|
|
|
| template<typename Char>
|
| -const Char* Lexer<Char>::ScanHexNumber(
|
| +static const Char* ScanHexNumber(
|
| const Char* cursor, const Char* end, uc32* result) {
|
| uc32 x = 0;
|
| for ( ; cursor < end; ++cursor) {
|
| @@ -321,7 +320,7 @@ const Char* Lexer<Char>::ScanHexNumber(
|
| // Octal escapes of the forms '\0xx' and '\xxx' are not a part of
|
| // ECMA-262. Other JS VMs support them.
|
| template<typename Char>
|
| -const Char* Lexer<Char>::ScanOctalEscape(
|
| +static const Char* ScanOctalEscape(
|
| const Char* start, const Char* end, uc32* result) {
|
| uc32 x = *result - '0';
|
| const Char* cursor;
|
| @@ -337,6 +336,7 @@ const Char* Lexer<Char>::ScanOctalEscape(
|
| }
|
|
|
|
|
| +// TODO(dcarney): utf8 handling
|
| template<typename Char>
|
| bool Lexer<Char>::ScanLiteralUnicodeEscape() {
|
| ASSERT(cursor_ < buffer_end_);
|
| @@ -359,7 +359,7 @@ bool Lexer<Char>::ScanLiteralUnicodeEscape() {
|
|
|
|
|
| template<typename Char>
|
| -const Char* Lexer<Char>::ScanIdentifierUnicodeEscape(
|
| +static const Char* ScanIdentifierUnicodeEscape(
|
| const Char* cursor, const Char* end, uc32* result) {
|
| ASSERT(*cursor == '\\');
|
| if (++cursor >= end) return NULL;
|
| @@ -372,14 +372,16 @@ const Char* Lexer<Char>::ScanIdentifierUnicodeEscape(
|
|
|
|
|
| template<typename Char>
|
| -const Char* Lexer<Char>::ScanEscape(
|
| - const Char* cursor, const Char* end, LiteralBuffer* literal) {
|
| +static const Char* ScanEscape(UnicodeCache* cache,
|
| + const Char* cursor,
|
| + const Char* end,
|
| + LiteralBuffer* literal) {
|
| ASSERT(*cursor == '\\');
|
| if (++cursor >= end) return NULL;
|
| uc32 c = *cursor;
|
| if (++cursor > end) return NULL;
|
| // Skip escaped newlines.
|
| - if (unicode_cache_->IsLineTerminator(c)) {
|
| + if (cache->IsLineTerminator(c)) {
|
| uc32 peek = *cursor;
|
| // Allow CR+LF newlines in multiline string literals.
|
| if (IsCarriageReturn(c) && IsLineFeed(peek)) cursor++;
|
| @@ -432,14 +434,14 @@ const Char* Lexer<Char>::ScanEscape(
|
|
|
| template<typename Char>
|
| LexerBase::Location Lexer<Char>::octal_position() const {
|
| - if (!last_octal_end_)
|
| - return Location::invalid();
|
| + if (!last_octal_end_) return Location::invalid();
|
| // The last octal might be an octal escape or an octal number. Whichever it
|
| // is, we'll find the start by just scanning back until we hit a non-octal
|
| // character.
|
| const Char* temp_cursor = last_octal_end_ - 1;
|
| - while (temp_cursor >= buffer_ && *temp_cursor >= '0' && *temp_cursor <= '7')
|
| + while (temp_cursor >= buffer_ && *temp_cursor >= '0' && *temp_cursor <= '7') {
|
| --temp_cursor;
|
| + }
|
| return Location(temp_cursor - buffer_ + 1, last_octal_end_ - buffer_);
|
| }
|
|
|
| @@ -477,102 +479,153 @@ void Lexer<Char>::UpdateBufferBasedOnHandle() {
|
| int cursor_offset = cursor_ - buffer_;
|
| int last_octal_end_offset = last_octal_end_ - buffer_;
|
| buffer_ = new_buffer;
|
| - buffer_end_ = buffer_ + source_handle_->length();
|
| + buffer_end_ = buffer_ + end_position_;
|
| start_ = buffer_ + start_offset;
|
| cursor_ = buffer_ + cursor_offset;
|
| if (last_octal_end_ != NULL) {
|
| last_octal_end_ = buffer_ + last_octal_end_offset;
|
| }
|
| - ResetLiterals();
|
| + current_literal_->Invalidate();
|
| + next_literal_->Invalidate();
|
| }
|
| }
|
|
|
|
|
| -template<>
|
| -bool Lexer<uint8_t>::IsSubstringOfSource(const TokenDesc& token) {
|
| - return !token.has_escapes;
|
| +void LexerBase::LiteralDesc::SetOneByteString(
|
| + Vector<const uint8_t> string, bool owned) {
|
| + is_in_buffer_ = false;
|
| + if (is_one_byte_string_owned_) {
|
| + one_byte_string_.Dispose();
|
| + }
|
| + is_one_byte_string_owned_ = owned;
|
| + is_one_byte_ = true;
|
| + one_byte_string_ = string;
|
| +}
|
| +
|
| +
|
| +void LexerBase::LiteralDesc::SetTwoByteString(Vector<const uint16_t> string) {
|
| + is_in_buffer_ = false;
|
| + is_one_byte_ = false;
|
| + two_byte_string_ = string;
|
| +}
|
| +
|
| +
|
| +void LexerBase::LiteralDesc::SetStringFromLiteralBuffer() {
|
| + is_one_byte_ = buffer.is_ascii();
|
| + is_in_buffer_ = true;
|
| + length = buffer.length();
|
| + if (is_one_byte_) {
|
| + if (is_one_byte_string_owned_) {
|
| + one_byte_string_.Dispose();
|
| + }
|
| + is_one_byte_string_owned_ = false;
|
| + one_byte_string_ = Vector<const uint8_t>::cast(buffer.ascii_literal());
|
| + } else {
|
| + two_byte_string_ = buffer.utf16_literal();
|
| + }
|
| +}
|
| +
|
| +
|
| +static inline bool IsOneByte(const uint8_t* cursor, const uint8_t* end) {
|
| + return true;
|
| +}
|
| +
|
| +
|
| +static inline bool IsOneByte(const uint16_t* cursor, const uint16_t* end) {
|
| + uint16_t acc = 0;
|
| + while (cursor != end) {
|
| + acc |= *cursor++ >> 8;
|
| + }
|
| + return acc == 0;
|
| +}
|
| +
|
| +
|
| +static inline bool IsOneByte(const int8_t* cursor, const int8_t* end) {
|
| + int8_t acc = 0;
|
| + while (cursor != end) {
|
| + acc |= *cursor++ >> 7;
|
| + }
|
| + return acc == 0;
|
| }
|
|
|
|
|
| template<>
|
| -bool Lexer<uint16_t>::IsSubstringOfSource(
|
| - const TokenDesc& token) {
|
| - if (token.has_escapes) return false;
|
| - const uint16_t* start = buffer_ + token.beg_pos;
|
| - const uint16_t* end = buffer_ + token.end_pos;
|
| - for (const uint16_t* cursor = start; cursor != end; ++cursor) {
|
| - if (*cursor >= unibrow::Latin1::kMaxChar) return true;
|
| +template<>
|
| +inline void Lexer<uint16_t>::SetLiteral<true>(const uint16_t* cursor,
|
| + const uint16_t* end,
|
| + LiteralDesc* literal) {
|
| + Vector<uint8_t> vector = Vector<uint8_t>::New(literal->length);
|
| + uint8_t* data = vector.start();
|
| + while (cursor < end) {
|
| + *data++ = *cursor++;
|
| }
|
| - return false;
|
| + literal->SetOneByteString(Vector<const uint8_t>::cast(vector), true);
|
| }
|
|
|
|
|
| template<>
|
| -bool Lexer<int8_t>::IsSubstringOfSource(const TokenDesc& token) {
|
| - // FIXME: implement.
|
| - UNREACHABLE();
|
| - return false;
|
| +template<>
|
| +inline void Lexer<uint16_t>::SetLiteral<false>(const uint16_t* start,
|
| + const uint16_t* end,
|
| + LiteralDesc* literal) {
|
| + literal->SetTwoByteString(Vector<const uint16_t>(start, literal->length));
|
| }
|
|
|
|
|
| template<>
|
| -bool Lexer<uint8_t>::FillLiteral(
|
| - const TokenDesc& token, LiteralDesc* literal) {
|
| - literal->beg_pos = token.beg_pos;
|
| - const uint8_t* start = buffer_ + token.beg_pos;
|
| - const uint8_t* end = buffer_ + token.end_pos;
|
| - if (token.token == Token::STRING) {
|
| - ++start;
|
| - --end;
|
| - }
|
| - if (IsSubstringOfSource(token)) {
|
| - literal->is_one_byte = true;
|
| - literal->is_in_buffer = false;
|
| - literal->offset = start - buffer_;
|
| - literal->length = end - start;
|
| - literal->one_byte_string = Vector<const uint8_t>(start, literal->length);
|
| - return true;
|
| - }
|
| - return CopyToLiteralBuffer(start, end, token, literal);
|
| +template<>
|
| +inline void Lexer<uint8_t>::SetLiteral<true>(const uint8_t* start,
|
| + const uint8_t* end,
|
| + LiteralDesc* literal) {
|
| + literal->SetOneByteString(
|
| + Vector<const uint8_t>(start, literal->length), false);
|
| }
|
|
|
|
|
| template<>
|
| -bool Lexer<uint16_t>::FillLiteral(
|
| - const TokenDesc& token, LiteralDesc* literal) {
|
| +template<>
|
| +inline void Lexer<int8_t>::SetLiteral<true>(const int8_t* start,
|
| + const int8_t* end,
|
| + LiteralDesc* literal) {
|
| + const uint8_t* cast = reinterpret_cast<const uint8_t*>(start);
|
| + literal->SetOneByteString(
|
| + Vector<const uint8_t>(cast, literal->length), false);
|
| +}
|
| +
|
| +
|
| +template<class Char>
|
| +bool Lexer<Char>::FillLiteral(const TokenDesc& token, LiteralDesc* literal) {
|
| literal->beg_pos = token.beg_pos;
|
| - const uint16_t* start = buffer_ + token.beg_pos;
|
| - const uint16_t* end = buffer_ + token.end_pos;
|
| + const Char* start = buffer_ + token.beg_pos;
|
| + const Char* end = buffer_ + token.end_pos;
|
| if (token.token == Token::STRING) {
|
| ++start;
|
| --end;
|
| }
|
| - if (IsSubstringOfSource(token)) {
|
| - literal->is_one_byte = false;
|
| - literal->is_in_buffer = false;
|
| - literal->offset = start - buffer_;
|
| - literal->length = end - start;
|
| - literal->two_byte_string = Vector<const uint16_t>(start, literal->length);
|
| - return true;
|
| + if (!token.has_escapes) {
|
| + bool is_one_byte = IsOneByte(start, end);
|
| + if (sizeof(Char) == 2 || is_one_byte) {
|
| + literal->offset = start - buffer_;
|
| + literal->length = end - start;
|
| + if (sizeof(Char) == 1) {
|
| + SetLiteral<true>(start, end, literal);
|
| + } else if (is_one_byte) {
|
| + SetLiteral<true>(start, end, literal);
|
| + } else {
|
| + SetLiteral<false>(start, end, literal);
|
| + }
|
| + return true;
|
| + }
|
| }
|
| return CopyToLiteralBuffer(start, end, token, literal);
|
| }
|
|
|
|
|
| -template<>
|
| -bool Lexer<int8_t>::FillLiteral(
|
| - const TokenDesc& token, LiteralDesc* literal) {
|
| - // FIXME: implement.
|
| - UNREACHABLE();
|
| - return false;
|
| -}
|
| -
|
| -
|
| template<class Char>
|
| bool Lexer<Char>::CopyToLiteralBuffer(const Char* start,
|
| - const Char* end,
|
| - const TokenDesc& token,
|
| - LiteralDesc* literal) {
|
| + const Char* end,
|
| + const TokenDesc& token,
|
| + LiteralDesc* literal) {
|
| literal->buffer.Reset();
|
| if (token.has_escapes) {
|
| for (const Char* cursor = start; cursor != end;) {
|
| @@ -585,25 +638,19 @@ bool Lexer<Char>::CopyToLiteralBuffer(const Char* start,
|
| if (cursor == NULL) return false;
|
| literal->buffer.AddChar(c);
|
| } else {
|
| - cursor = ScanEscape(cursor, end, &literal->buffer);
|
| + cursor = ScanEscape(unicode_cache_, cursor, end, &literal->buffer);
|
| ASSERT(cursor != NULL);
|
| if (cursor == NULL) return false;
|
| }
|
| }
|
| } else {
|
| + // TODO(dcarney): This can only happen for utf8 strings
|
| + // use a helper function.
|
| for (const Char* cursor = start; cursor != end;) {
|
| literal->buffer.AddChar(*cursor++);
|
| }
|
| }
|
| - literal->is_one_byte = literal->buffer.is_ascii();
|
| - literal->is_in_buffer = true;
|
| - literal->length = literal->buffer.length();
|
| - if (literal->is_one_byte) {
|
| - literal->one_byte_string =
|
| - Vector<const uint8_t>::cast(literal->buffer.ascii_literal());
|
| - } else {
|
| - literal->two_byte_string = literal->buffer.utf16_literal();
|
| - }
|
| + literal->SetStringFromLiteralBuffer();
|
| return true;
|
| }
|
|
|
| @@ -611,73 +658,79 @@ bool Lexer<Char>::CopyToLiteralBuffer(const Char* start,
|
| template<class Char>
|
| Handle<String> Lexer<Char>::InternalizeLiteral(
|
| LiteralDesc* literal) {
|
| - Factory* factory = isolate_->factory();
|
| - if (literal->is_in_buffer) {
|
| - return literal->is_one_byte
|
| - ? factory->InternalizeOneByteString(
|
| - Vector<const uint8_t>::cast(literal->one_byte_string))
|
| - : factory->InternalizeTwoByteString(literal->two_byte_string);
|
| - }
|
| - if (sizeof(Char) == 1) {
|
| - SubStringKey<uint8_t> key(
|
| - source_handle_, literal->offset, literal->length);
|
| - return factory->InternalizeStringWithKey(&key);
|
| - } else {
|
| - SubStringKey<uint16_t> key(
|
| - source_handle_, literal->offset, literal->length);
|
| - return factory->InternalizeStringWithKey(&key);
|
| - }
|
| + // Factory* factory = isolate_->factory();
|
| + // if (literal->is_in_buffer) {
|
| + // return literal->is_one_byte
|
| + // ? factory->InternalizeOneByteString(
|
| + // Vector<const uint8_t>::cast(literal->one_byte_string))
|
| + // : factory->InternalizeTwoByteString(literal->two_byte_string);
|
| + // }
|
| + // if (sizeof(Char) == 1) {
|
| + // SubStringKey<uint8_t> key(
|
| + // source_handle_, literal->offset, literal->length);
|
| + // return factory->InternalizeStringWithKey(&key);
|
| + // } else {
|
| + // SubStringKey<uint16_t> key(
|
| + // source_handle_, literal->offset, literal->length);
|
| + // return factory->InternalizeStringWithKey(&key);
|
| + // }
|
| + CHECK(false);
|
| + return Handle<String>();
|
| }
|
|
|
|
|
| template<>
|
| Handle<String> Lexer<uint8_t>::AllocateLiteral(
|
| LiteralDesc* literal, PretenureFlag pretenured) {
|
| - Factory* factory = isolate_->factory();
|
| - if (literal->is_in_buffer) {
|
| - return literal->is_one_byte
|
| - ? factory->NewStringFromOneByte(literal->one_byte_string, pretenured)
|
| - : factory->NewStringFromTwoByte(literal->two_byte_string, pretenured);
|
| - }
|
| - int from = literal->offset;
|
| - int length = literal->length;
|
| - // Save the offset and the length before allocating the string as it may
|
| - // cause a GC, invalidate the literal, and move the source.
|
| - Handle<String> result = factory->NewRawOneByteString(length, pretenured);
|
| - uint8_t* chars = SeqOneByteString::cast(*result)->GetChars();
|
| - String::WriteToFlat(*source_handle_, chars, from, from + length);
|
| - return result;
|
| + // Factory* factory = isolate_->factory();
|
| + // if (literal->is_in_buffer) {
|
| + // return literal->is_one_byte
|
| + // ? factory->NewStringFromOneByte(literal->one_byte_string, pretenured)
|
| + // : factory->NewStringFromTwoByte(literal->two_byte_string, pretenured)
|
| + // }
|
| + // int from = literal->offset;
|
| + // int length = literal->length;
|
| + // // Save the offset and the length before allocating the string as it may
|
| + // // cause a GC, invalidate the literal, and move the source.
|
| + // Handle<String> result = factory->NewRawOneByteString(length, pretenured);
|
| + // uint8_t* chars = SeqOneByteString::cast(*result)->GetChars();
|
| + // String::WriteToFlat(*source_handle_, chars, from, from + length);
|
| + // return result;
|
| + CHECK(false);
|
| + return Handle<String>();
|
| }
|
|
|
|
|
| template<>
|
| Handle<String> Lexer<uint16_t>::AllocateLiteral(
|
| LiteralDesc* literal, PretenureFlag pretenured) {
|
| - Factory* factory = isolate_->factory();
|
| - if (literal->is_in_buffer) {
|
| - return literal->is_one_byte
|
| - ? factory->NewStringFromOneByte(literal->one_byte_string, pretenured)
|
| - : factory->NewStringFromTwoByte(literal->two_byte_string, pretenured);
|
| - }
|
| - // Save the offset and the length before allocating the string as it may
|
| - // cause a GC, invalidate the literal, and move the source.
|
| - int from = literal->offset;
|
| - int length = literal->length;
|
| - Handle<String> result = factory->NewRawTwoByteString(length, pretenured);
|
| - uint16_t* chars = SeqTwoByteString::cast(*result)->GetChars();
|
| - String::WriteToFlat(*source_handle_, chars, from, from + length);
|
| - return result;
|
| + // Factory* factory = isolate_->factory();
|
| + // if (literal->is_in_buffer) {
|
| + // return literal->is_one_byte
|
| + // ? factory->NewStringFromOneByte(literal->one_byte_string, pretenured)
|
| + // : factory->NewStringFromTwoByte(literal->two_byte_string, pretenured)
|
| + // }
|
| + // // Save the offset and the length before allocating the string as it may
|
| + // // cause a GC, invalidate the literal, and move the source.
|
| + // int from = literal->offset;
|
| + // int length = literal->length;
|
| + // Handle<String> result = factory->NewRawTwoByteString(length, pretenured);
|
| + // uint16_t* chars = SeqTwoByteString::cast(*result)->GetChars();
|
| + // String::WriteToFlat(*source_handle_, chars, from, from + length);
|
| + // return result;
|
| + CHECK(false);
|
| + return Handle<String>();
|
| }
|
|
|
|
|
| template<>
|
| Handle<String> Lexer<int8_t>::AllocateLiteral(
|
| LiteralDesc* literal, PretenureFlag pretenured) {
|
| - // FIXME: implement
|
| - UNREACHABLE();
|
| + CHECK(false);
|
| return Handle<String>();
|
| }
|
|
|
| +
|
| template class Lexer<uint8_t>;
|
| template class Lexer<uint16_t>;
|
| template class Lexer<int8_t>;
|
|
|