src/api.cc - Issue 121173009: String:WriteUtf8: Add REPLACE_INVALID_UTF8 option

Unified Diff: src/api.cc

Issue 121173009: String:WriteUtf8: Add REPLACE_INVALID_UTF8 option (Closed) Base URL: git://github.com/v8/v8.git@master

Patch Set: Created 6 years, 12 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/api.cc

diff --git a/src/api.cc b/src/api.cc

index 9a68f639efec56559c0aa7ffe76a58ada5776fc4..d03601dcf475a0c750b8286f7387c9ce9c4ac6c4 100644

--- a/src/api.cc

+++ b/src/api.cc

@@ -4514,27 +4514,40 @@ class Utf8WriterVisitor {

utf16_chars_read_(0) {

}

- static int WriteEndCharacter(uint16_t character,

- int last_character,

+ // WritePair writes the current UTF-16 code unit to the given buffer. The

+ // function will go back inside the buffer to combine surrogate pairs.

+ // @TODO use uint16_t for previous?

dcarney 2014/01/04 15:56:45 previous is an int because of some special values

haimuiba 2014/01/06 05:40:18 Makes sense. Thx.

+ static int WritePair(uint16_t current, int previous, char* buffer) {

+ using namespace unibrow;

+ int code_point = current;

+ int written = 0;

+ if (Utf16::IsSurrogatePair(previous, current)) {

+ code_point = Utf16::CombineSurrogatePair(previous, current);

+ buffer -= Utf8::kSizeOfUnmatchedSurrogate;

+ written -= Utf8::kSizeOfUnmatchedSurrogate;

+ }

+ return written + Utf8::Encode(buffer, code_point, false);

dcarney 2014/01/04 15:56:45 having the length calculation here is too late. S

haimuiba 2014/01/06 05:40:18 Ok, I'll take a closer look.

+ }

+ // @TODO use uint16_t for previous?

+ static int WriteEndCharacter(uint16_t current,

+ int previous,

int remaining,

char* const buffer) {

using namespace unibrow;

ASSERT(remaining > 0);

- // We can't use a local buffer here because Encode needs to modify

- // previous characters in the stream. We know, however, that

- // exactly one character will be advanced.

- if (Utf16::IsTrailSurrogate(character) &&

- Utf16::IsLeadSurrogate(last_character)) {

- int written = Utf8::Encode(buffer, character, last_character);

+ // We can't use a local buffer here because WritePair needs to modify

+ // previous characters in the stream. We know, however, that exactly one

+ // character will be advanced.

+ if (Utf16::IsSurrogatePair(previous, current)) {

+ int written = WritePair(current, previous, buffer);

ASSERT(written == 1);

return written;

}

// Use a scratch buffer to check the required characters.

char temp_buffer[Utf8::kMaxEncodedSize];

// Can't encode using last_character as gcc has array bounds issues.

- int written = Utf8::Encode(temp_buffer,

- character,

- Utf16::kNoPreviousCharacter);

+ int written = WritePair(current, Utf16::kNoPreviousCharacter, temp_buffer);

// Won't fit.

if (written > remaining) return 0;

// Copy over the character from temp_buffer.

@@ -4581,7 +4594,7 @@ class Utf8WriterVisitor {

} else {

for (; i < fast_length; i++) {

uint16_t character = *chars++;

- buffer += Utf8::Encode(buffer, character, last_character);

+ buffer += WritePair(character, last_character, buffer);

last_character = character;

ASSERT(capacity_ == -1 || (buffer - start_) <= capacity_);

}

« no previous file with comments | « no previous file | src/unicode.h » ('j') | src/unicode.h » ('J')