Index: src/scanner-character-streams.cc |
diff --git a/src/scanner-character-streams.cc b/src/scanner-character-streams.cc |
index d06f479f94bef5e4d6507d0018406955bdc80360..732b2b43f6469ee8ce9dcb9e0373678cbf62cdce 100644 |
--- a/src/scanner-character-streams.cc |
+++ b/src/scanner-character-streams.cc |
@@ -420,6 +420,12 @@ void ExternalStreamingStream::HandleUtf8SplitCharacters( |
utf8_split_char_buffer_length_ < 4) { |
--current_data_length_; |
++utf8_split_char_buffer_length_; |
+ if (c >= (3 << 6)) { |
+ // 3 << 6 = 0b11000000; this is the first byte of the multi-byte |
+ // character. No need to copy the previous characters into the conversion |
+ // buffer (even if they're multi-byte). |
+ break; |
+ } |
} |
CHECK(utf8_split_char_buffer_length_ <= 4); |
for (unsigned i = 0; i < utf8_split_char_buffer_length_; ++i) { |