src/lexer/lexer.cc - Issue 201693003: Experimental parser: more correct utf8 handling

Keyboard Shortcuts

	File
u :	up to issue
j / k :	jump to file after / before current file
J / K :	jump to next file with a comment after / before current file
	Side-by-side diff
i :	toggle intra-line diffs
e :	expand all comments
c :	collapse all comments
s :	toggle showing all comments
n / p :	next / previous diff chunk or comment
N / P :	next / previous comment
<Up> / <Down> :	next / previous line

	Issue
u :	up to list of issues
j / k :	jump to patch after / before current patch
o / <Enter> :	open current patch in side-by-side view
i :	open current patch in unified diff view

	Issue List
j / k :	jump to issue after / before current issue
o / <Enter> :	open current issue

Unified Diff: src/lexer/lexer.cc

Issue 201693003: Experimental parser: more correct utf8 handling (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 6 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/lexer/lexer.cc

diff --git a/src/lexer/lexer.cc b/src/lexer/lexer.cc

index 6e74bb8032b05821ac797caff727e933788e7c64..9140f99b5be1f9e801af2961e5f7b953cbff18f4 100644

--- a/src/lexer/lexer.cc

+++ b/src/lexer/lexer.cc

@@ -164,6 +164,30 @@ Token::Value LexerBase::Next() {

}

+static uint32_t Advance(const int8_t** buffer, const int8_t* end) {

+ unsigned bytes_read = 0;

+ uint32_t c = unibrow::Utf8::ValueOf(reinterpret_cast<const uint8_t*>(*buffer),

+ end - *buffer,

+ &bytes_read);

+ *buffer += bytes_read;

+ return c;

+static inline uint32_t Advance(const uint8_t** buffer, const uint8_t* end) {

+ uint32_t c = **buffer;

+ (*buffer)++;

+ return c;

+static inline uint32_t Advance(const uint16_t** buffer, const uint16_t* end) {

+ uint32_t c = **buffer;

+ (*buffer)++;

+ return c;

template<typename Char>

Lexer<Char>::Lexer(UnicodeCache* unicode_cache,

const Char* source_ptr,

@@ -654,7 +678,7 @@ bool Lexer<Char>::CopyToLiteralBuffer(const TokenDesc& token,

if (token.has_escapes) {

for (const Char* cursor = start; cursor != end;) {

if (*cursor != '\\') {

- literal->buffer.AddChar(*cursor++);

+ literal->buffer.AddChar(Advance(&cursor, end));

} else if (token.token == Token::IDENTIFIER) {

uc32 c;

cursor = ScanIdentifierUnicodeEscape(cursor, end, &c);

@@ -668,10 +692,8 @@ bool Lexer<Char>::CopyToLiteralBuffer(const TokenDesc& token,

}

} else {

- // TODO(dcarney): This can only happen for utf8 strings

- // use a helper function.

for (const Char* cursor = start; cursor != end;) {

- literal->buffer.AddChar(*cursor++);

+ literal->buffer.AddChar(Advance(&cursor, end));

}

literal->SetStringFromLiteralBuffer();

« no previous file with comments | « no previous file | no next file » | no next file with comments »