Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(32)

Unified Diff: src/lexer/lexer.cc

Issue 201693003: Experimental parser: more correct utf8 handling (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 6 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/lexer/lexer.cc
diff --git a/src/lexer/lexer.cc b/src/lexer/lexer.cc
index 6e74bb8032b05821ac797caff727e933788e7c64..9140f99b5be1f9e801af2961e5f7b953cbff18f4 100644
--- a/src/lexer/lexer.cc
+++ b/src/lexer/lexer.cc
@@ -164,6 +164,30 @@ Token::Value LexerBase::Next() {
}
+static uint32_t Advance(const int8_t** buffer, const int8_t* end) {
+ unsigned bytes_read = 0;
+ uint32_t c = unibrow::Utf8::ValueOf(reinterpret_cast<const uint8_t*>(*buffer),
+ end - *buffer,
+ &bytes_read);
+ *buffer += bytes_read;
+ return c;
+}
+
+
+static inline uint32_t Advance(const uint8_t** buffer, const uint8_t* end) {
+ uint32_t c = **buffer;
+ (*buffer)++;
+ return c;
+}
+
+
+static inline uint32_t Advance(const uint16_t** buffer, const uint16_t* end) {
+ uint32_t c = **buffer;
+ (*buffer)++;
+ return c;
+}
+
+
template<typename Char>
Lexer<Char>::Lexer(UnicodeCache* unicode_cache,
const Char* source_ptr,
@@ -654,7 +678,7 @@ bool Lexer<Char>::CopyToLiteralBuffer(const TokenDesc& token,
if (token.has_escapes) {
for (const Char* cursor = start; cursor != end;) {
if (*cursor != '\\') {
- literal->buffer.AddChar(*cursor++);
+ literal->buffer.AddChar(Advance(&cursor, end));
} else if (token.token == Token::IDENTIFIER) {
uc32 c;
cursor = ScanIdentifierUnicodeEscape(cursor, end, &c);
@@ -668,10 +692,8 @@ bool Lexer<Char>::CopyToLiteralBuffer(const TokenDesc& token,
}
}
} else {
- // TODO(dcarney): This can only happen for utf8 strings
- // use a helper function.
for (const Char* cursor = start; cursor != end;) {
- literal->buffer.AddChar(*cursor++);
+ literal->buffer.AddChar(Advance(&cursor, end));
}
}
literal->SetStringFromLiteralBuffer();
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698