Index: src/api.cc |
diff --git a/src/api.cc b/src/api.cc |
index 9a68f639efec56559c0aa7ffe76a58ada5776fc4..d03601dcf475a0c750b8286f7387c9ce9c4ac6c4 100644 |
--- a/src/api.cc |
+++ b/src/api.cc |
@@ -4514,27 +4514,40 @@ class Utf8WriterVisitor { |
utf16_chars_read_(0) { |
} |
- static int WriteEndCharacter(uint16_t character, |
- int last_character, |
+ // WritePair writes the current UTF-16 code unit to the given buffer. The |
+ // function will go back inside the buffer to combine surrogate pairs. |
+ // @TODO use uint16_t for previous? |
dcarney
2014/01/04 15:56:45
previous is an int because of some special values
haimuiba
2014/01/06 05:40:18
Makes sense. Thx.
|
+ static int WritePair(uint16_t current, int previous, char* buffer) { |
+ using namespace unibrow; |
+ int code_point = current; |
+ int written = 0; |
+ if (Utf16::IsSurrogatePair(previous, current)) { |
+ code_point = Utf16::CombineSurrogatePair(previous, current); |
+ buffer -= Utf8::kSizeOfUnmatchedSurrogate; |
+ written -= Utf8::kSizeOfUnmatchedSurrogate; |
+ } |
+ return written + Utf8::Encode(buffer, code_point, false); |
dcarney
2014/01/04 15:56:45
having the length calculation here is too late. S
haimuiba
2014/01/06 05:40:18
Ok, I'll take a closer look.
|
+ } |
+ |
+ // @TODO use uint16_t for previous? |
+ static int WriteEndCharacter(uint16_t current, |
+ int previous, |
int remaining, |
char* const buffer) { |
using namespace unibrow; |
ASSERT(remaining > 0); |
- // We can't use a local buffer here because Encode needs to modify |
- // previous characters in the stream. We know, however, that |
- // exactly one character will be advanced. |
- if (Utf16::IsTrailSurrogate(character) && |
- Utf16::IsLeadSurrogate(last_character)) { |
- int written = Utf8::Encode(buffer, character, last_character); |
+ // We can't use a local buffer here because WritePair needs to modify |
+ // previous characters in the stream. We know, however, that exactly one |
+ // character will be advanced. |
+ if (Utf16::IsSurrogatePair(previous, current)) { |
+ int written = WritePair(current, previous, buffer); |
ASSERT(written == 1); |
return written; |
} |
// Use a scratch buffer to check the required characters. |
char temp_buffer[Utf8::kMaxEncodedSize]; |
// Can't encode using last_character as gcc has array bounds issues. |
- int written = Utf8::Encode(temp_buffer, |
- character, |
- Utf16::kNoPreviousCharacter); |
+ int written = WritePair(current, Utf16::kNoPreviousCharacter, temp_buffer); |
// Won't fit. |
if (written > remaining) return 0; |
// Copy over the character from temp_buffer. |
@@ -4581,7 +4594,7 @@ class Utf8WriterVisitor { |
} else { |
for (; i < fast_length; i++) { |
uint16_t character = *chars++; |
- buffer += Utf8::Encode(buffer, character, last_character); |
+ buffer += WritePair(character, last_character, buffer); |
last_character = character; |
ASSERT(capacity_ == -1 || (buffer - start_) <= capacity_); |
} |