src/lexer/lexer.cc - Issue 201693003: Experimental parser: more correct utf8 handling

Side by Side Diff: src/lexer/lexer.cc

Issue 201693003: Experimental parser: more correct utf8 handling (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 6 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2013 the V8 project authors. All rights reserved.	1 // Copyright 2013 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 146 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
157 Token::Value LexerBase::Next() {	157 Token::Value LexerBase::Next() {

158 has_line_terminator_before_next_ = false;	158 has_line_terminator_before_next_ = false;

159 has_multiline_comment_before_next_ = false;	159 has_multiline_comment_before_next_ = false;

160 current_ = next_;	160 current_ = next_;

161 std::swap(current_literal_, next_literal_);	161 std::swap(current_literal_, next_literal_);

162 Scan();	162 Scan();

163 return current_.token;	163 return current_.token;

164 }	164 }

165	165

166	166

	167 static uint32_t Advance(const int8_t** buffer, const int8_t* end) {

	168 unsigned bytes_read = 0;

	169 uint32_t c = unibrow::Utf8::ValueOf(reinterpret_cast<const uint8_t>(buffer),

	170 end - *buffer,

	171 &bytes_read);

	172 *buffer += bytes_read;

	173 return c;

	174 }

	175

	176

	177 static inline uint32_t Advance(const uint8_t** buffer, const uint8_t* end) {

	178 uint32_t c = **buffer;

	179 (*buffer)++;

	180 return c;

	181 }

	182

	183

	184 static inline uint32_t Advance(const uint16_t** buffer, const uint16_t* end) {

	185 uint32_t c = **buffer;

	186 (*buffer)++;

	187 return c;

	188 }

	189

	190

167 template<typename Char>	191 template<typename Char>

168 Lexer<Char>::Lexer(UnicodeCache* unicode_cache,	192 Lexer<Char>::Lexer(UnicodeCache* unicode_cache,

169 const Char* source_ptr,	193 const Char* source_ptr,

170 int length)	194 int length)

171 : LexerBase(unicode_cache),	195 : LexerBase(unicode_cache),

172 isolate_(NULL),	196 isolate_(NULL),

173 source_ptr_(source_ptr),	197 source_ptr_(source_ptr),

174 end_position_(length),	198 end_position_(length),

175 buffer_(source_ptr),	199 buffer_(source_ptr),

176 buffer_end_(source_ptr + length),	200 buffer_end_(source_ptr + length),

(...skipping 470 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
647 template<class Char>	671 template<class Char>

648 bool Lexer<Char>::CopyToLiteralBuffer(const TokenDesc& token,	672 bool Lexer<Char>::CopyToLiteralBuffer(const TokenDesc& token,

649 LiteralDesc* literal) {	673 LiteralDesc* literal) {

650 literal->buffer.Reset();	674 literal->buffer.Reset();

651 const Char* start = NULL;	675 const Char* start = NULL;

652 const Char* end = NULL;	676 const Char* end = NULL;

653 GetStartAndEnd<Char>(buffer_, token, &start, &end);	677 GetStartAndEnd<Char>(buffer_, token, &start, &end);

654 if (token.has_escapes) {	678 if (token.has_escapes) {

655 for (const Char* cursor = start; cursor != end;) {	679 for (const Char* cursor = start; cursor != end;) {

656 if (*cursor != '\\') {	680 if (*cursor != '\\') {

657 literal->buffer.AddChar(*cursor++);	681 literal->buffer.AddChar(Advance(&cursor, end));

658 } else if (token.token == Token::IDENTIFIER) {	682 } else if (token.token == Token::IDENTIFIER) {

659 uc32 c;	683 uc32 c;

660 cursor = ScanIdentifierUnicodeEscape(cursor, end, &c);	684 cursor = ScanIdentifierUnicodeEscape(cursor, end, &c);

661 ASSERT(cursor != NULL);	685 ASSERT(cursor != NULL);

662 if (cursor == NULL) return false;	686 if (cursor == NULL) return false;

663 literal->buffer.AddChar(c);	687 literal->buffer.AddChar(c);

664 } else {	688 } else {

665 cursor = ScanEscape(unicode_cache_, cursor, end, &literal->buffer);	689 cursor = ScanEscape(unicode_cache_, cursor, end, &literal->buffer);

666 ASSERT(cursor != NULL);	690 ASSERT(cursor != NULL);

667 if (cursor == NULL) return false;	691 if (cursor == NULL) return false;

668 }	692 }

669 }	693 }

670 } else {	694 } else {

671 // TODO(dcarney): This can only happen for utf8 strings

672 // use a helper function.

673 for (const Char* cursor = start; cursor != end;) {	695 for (const Char* cursor = start; cursor != end;) {

674 literal->buffer.AddChar(*cursor++);	696 literal->buffer.AddChar(Advance(&cursor, end));

675 }	697 }

676 }	698 }

677 literal->SetStringFromLiteralBuffer();	699 literal->SetStringFromLiteralBuffer();

678 return true;	700 return true;

679 }	701 }

680	702

681	703

682 template<class Char>	704 template<class Char>

683 Handle<String> Lexer<Char>::AllocateInternalizedString(	705 Handle<String> Lexer<Char>::AllocateInternalizedString(

684 Isolate* isolate) {	706 Isolate* isolate) {

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
721 LiteralOffsetAndLength<Char>(buffer_, token, &offset, &length);	743 LiteralOffsetAndLength<Char>(buffer_, token, &offset, &length);

722 return factory->NewSubString(source_handle_, offset, offset + length);	744 return factory->NewSubString(source_handle_, offset, offset + length);

723 }	745 }

724	746

725	747

726 template class Lexer<uint8_t>;	748 template class Lexer<uint8_t>;

727 template class Lexer<uint16_t>;	749 template class Lexer<uint16_t>;

728 template class Lexer<int8_t>;	750 template class Lexer<int8_t>;

729	751

730 } } // v8::internal	752 } } // v8::internal

OLD	NEW

« no previous file with comments | « no previous file | no next file » | no next file with comments »