Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(265)

Side by Side Diff: src/lexer/lexer.cc

Issue 201693003: Experimental parser: more correct utf8 handling (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 6 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2013 the V8 project authors. All rights reserved. 1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 146 matching lines...) Expand 10 before | Expand all | Expand 10 after
157 Token::Value LexerBase::Next() { 157 Token::Value LexerBase::Next() {
158 has_line_terminator_before_next_ = false; 158 has_line_terminator_before_next_ = false;
159 has_multiline_comment_before_next_ = false; 159 has_multiline_comment_before_next_ = false;
160 current_ = next_; 160 current_ = next_;
161 std::swap(current_literal_, next_literal_); 161 std::swap(current_literal_, next_literal_);
162 Scan(); 162 Scan();
163 return current_.token; 163 return current_.token;
164 } 164 }
165 165
166 166
167 static uint32_t Advance(const int8_t** buffer, const int8_t* end) {
168 unsigned bytes_read = 0;
169 uint32_t c = unibrow::Utf8::ValueOf(reinterpret_cast<const uint8_t*>(*buffer),
170 end - *buffer,
171 &bytes_read);
172 *buffer += bytes_read;
173 return c;
174 }
175
176
177 static inline uint32_t Advance(const uint8_t** buffer, const uint8_t* end) {
178 uint32_t c = **buffer;
179 (*buffer)++;
180 return c;
181 }
182
183
184 static inline uint32_t Advance(const uint16_t** buffer, const uint16_t* end) {
185 uint32_t c = **buffer;
186 (*buffer)++;
187 return c;
188 }
189
190
167 template<typename Char> 191 template<typename Char>
168 Lexer<Char>::Lexer(UnicodeCache* unicode_cache, 192 Lexer<Char>::Lexer(UnicodeCache* unicode_cache,
169 const Char* source_ptr, 193 const Char* source_ptr,
170 int length) 194 int length)
171 : LexerBase(unicode_cache), 195 : LexerBase(unicode_cache),
172 isolate_(NULL), 196 isolate_(NULL),
173 source_ptr_(source_ptr), 197 source_ptr_(source_ptr),
174 end_position_(length), 198 end_position_(length),
175 buffer_(source_ptr), 199 buffer_(source_ptr),
176 buffer_end_(source_ptr + length), 200 buffer_end_(source_ptr + length),
(...skipping 470 matching lines...) Expand 10 before | Expand all | Expand 10 after
647 template<class Char> 671 template<class Char>
648 bool Lexer<Char>::CopyToLiteralBuffer(const TokenDesc& token, 672 bool Lexer<Char>::CopyToLiteralBuffer(const TokenDesc& token,
649 LiteralDesc* literal) { 673 LiteralDesc* literal) {
650 literal->buffer.Reset(); 674 literal->buffer.Reset();
651 const Char* start = NULL; 675 const Char* start = NULL;
652 const Char* end = NULL; 676 const Char* end = NULL;
653 GetStartAndEnd<Char>(buffer_, token, &start, &end); 677 GetStartAndEnd<Char>(buffer_, token, &start, &end);
654 if (token.has_escapes) { 678 if (token.has_escapes) {
655 for (const Char* cursor = start; cursor != end;) { 679 for (const Char* cursor = start; cursor != end;) {
656 if (*cursor != '\\') { 680 if (*cursor != '\\') {
657 literal->buffer.AddChar(*cursor++); 681 literal->buffer.AddChar(Advance(&cursor, end));
658 } else if (token.token == Token::IDENTIFIER) { 682 } else if (token.token == Token::IDENTIFIER) {
659 uc32 c; 683 uc32 c;
660 cursor = ScanIdentifierUnicodeEscape(cursor, end, &c); 684 cursor = ScanIdentifierUnicodeEscape(cursor, end, &c);
661 ASSERT(cursor != NULL); 685 ASSERT(cursor != NULL);
662 if (cursor == NULL) return false; 686 if (cursor == NULL) return false;
663 literal->buffer.AddChar(c); 687 literal->buffer.AddChar(c);
664 } else { 688 } else {
665 cursor = ScanEscape(unicode_cache_, cursor, end, &literal->buffer); 689 cursor = ScanEscape(unicode_cache_, cursor, end, &literal->buffer);
666 ASSERT(cursor != NULL); 690 ASSERT(cursor != NULL);
667 if (cursor == NULL) return false; 691 if (cursor == NULL) return false;
668 } 692 }
669 } 693 }
670 } else { 694 } else {
671 // TODO(dcarney): This can only happen for utf8 strings
672 // use a helper function.
673 for (const Char* cursor = start; cursor != end;) { 695 for (const Char* cursor = start; cursor != end;) {
674 literal->buffer.AddChar(*cursor++); 696 literal->buffer.AddChar(Advance(&cursor, end));
675 } 697 }
676 } 698 }
677 literal->SetStringFromLiteralBuffer(); 699 literal->SetStringFromLiteralBuffer();
678 return true; 700 return true;
679 } 701 }
680 702
681 703
682 template<class Char> 704 template<class Char>
683 Handle<String> Lexer<Char>::AllocateInternalizedString( 705 Handle<String> Lexer<Char>::AllocateInternalizedString(
684 Isolate* isolate) { 706 Isolate* isolate) {
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
721 LiteralOffsetAndLength<Char>(buffer_, token, &offset, &length); 743 LiteralOffsetAndLength<Char>(buffer_, token, &offset, &length);
722 return factory->NewSubString(source_handle_, offset, offset + length); 744 return factory->NewSubString(source_handle_, offset, offset + length);
723 } 745 }
724 746
725 747
726 template class Lexer<uint8_t>; 748 template class Lexer<uint8_t>;
727 template class Lexer<uint16_t>; 749 template class Lexer<uint16_t>;
728 template class Lexer<int8_t>; 750 template class Lexer<int8_t>;
729 751
730 } } // v8::internal 752 } } // v8::internal
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698