Chromium Code Reviews| Index: src/api.cc |
| diff --git a/src/api.cc b/src/api.cc |
| index 9a68f639efec56559c0aa7ffe76a58ada5776fc4..d03601dcf475a0c750b8286f7387c9ce9c4ac6c4 100644 |
| --- a/src/api.cc |
| +++ b/src/api.cc |
| @@ -4514,27 +4514,40 @@ class Utf8WriterVisitor { |
| utf16_chars_read_(0) { |
| } |
| - static int WriteEndCharacter(uint16_t character, |
| - int last_character, |
| + // WritePair writes the current UTF-16 code unit to the given buffer. The |
| + // function will go back inside the buffer to combine surrogate pairs. |
| + // @TODO use uint16_t for previous? |
|
dcarney
2014/01/04 15:56:45
previous is an int because of some special values
haimuiba
2014/01/06 05:40:18
Makes sense. Thx.
|
| + static int WritePair(uint16_t current, int previous, char* buffer) { |
| + using namespace unibrow; |
| + int code_point = current; |
| + int written = 0; |
| + if (Utf16::IsSurrogatePair(previous, current)) { |
| + code_point = Utf16::CombineSurrogatePair(previous, current); |
| + buffer -= Utf8::kSizeOfUnmatchedSurrogate; |
| + written -= Utf8::kSizeOfUnmatchedSurrogate; |
| + } |
| + return written + Utf8::Encode(buffer, code_point, false); |
|
dcarney
2014/01/04 15:56:45
having the length calculation here is too late. S
haimuiba
2014/01/06 05:40:18
Ok, I'll take a closer look.
|
| + } |
| + |
| + // @TODO use uint16_t for previous? |
| + static int WriteEndCharacter(uint16_t current, |
| + int previous, |
| int remaining, |
| char* const buffer) { |
| using namespace unibrow; |
| ASSERT(remaining > 0); |
| - // We can't use a local buffer here because Encode needs to modify |
| - // previous characters in the stream. We know, however, that |
| - // exactly one character will be advanced. |
| - if (Utf16::IsTrailSurrogate(character) && |
| - Utf16::IsLeadSurrogate(last_character)) { |
| - int written = Utf8::Encode(buffer, character, last_character); |
| + // We can't use a local buffer here because WritePair needs to modify |
| + // previous characters in the stream. We know, however, that exactly one |
| + // character will be advanced. |
| + if (Utf16::IsSurrogatePair(previous, current)) { |
| + int written = WritePair(current, previous, buffer); |
| ASSERT(written == 1); |
| return written; |
| } |
| // Use a scratch buffer to check the required characters. |
| char temp_buffer[Utf8::kMaxEncodedSize]; |
| // Can't encode using last_character as gcc has array bounds issues. |
| - int written = Utf8::Encode(temp_buffer, |
| - character, |
| - Utf16::kNoPreviousCharacter); |
| + int written = WritePair(current, Utf16::kNoPreviousCharacter, temp_buffer); |
| // Won't fit. |
| if (written > remaining) return 0; |
| // Copy over the character from temp_buffer. |
| @@ -4581,7 +4594,7 @@ class Utf8WriterVisitor { |
| } else { |
| for (; i < fast_length; i++) { |
| uint16_t character = *chars++; |
| - buffer += Utf8::Encode(buffer, character, last_character); |
| + buffer += WritePair(character, last_character, buffer); |
| last_character = character; |
| ASSERT(capacity_ == -1 || (buffer - start_) <= capacity_); |
| } |