Index: src/parsing/scanner-character-streams.cc |
diff --git a/src/parsing/scanner-character-streams.cc b/src/parsing/scanner-character-streams.cc |
index 53db66293c74eefb5e272573217745d0d8fc3df5..3f10cfa4c16421f1481f838d28a71a38b2174a90 100644 |
--- a/src/parsing/scanner-character-streams.cc |
+++ b/src/parsing/scanner-character-streams.cc |
@@ -286,6 +286,20 @@ void Utf8ExternalStreamingStream::FillBufferFromCurrentChunk() { |
uint16_t* cursor = buffer_ + (buffer_end_ - buffer_start_); |
DCHECK_EQ(cursor, buffer_end_); |
+ // If the current chunk is the last (empty) chunk we'll have to process |
+ // any left-over, partial characters. |
+ if (chunk.length == 0) { |
+ unibrow::uchar t = |
+ unibrow::Utf8::ValueOfIncrementalFinish(¤t_.pos.incomplete_char); |
+ if (t != unibrow::Utf8::kBufferEmpty) { |
+ DCHECK(t < unibrow::Utf16::kMaxNonSurrogateCharCode); |
jochen (gone - plz use gerrit)
2016/10/05 16:11:56
DCHECK_LT?
vogelheim
2016/10/05 16:22:23
I tried, but couldn't get that to work. :-(
I thi
|
+ *cursor = static_cast<uc16>(t); |
+ buffer_end_++; |
+ current_.pos.chars++; |
+ } |
+ return; |
+ } |
+ |
static const unibrow::uchar kUtf8Bom = 0xfeff; |
unibrow::Utf8::Utf8IncrementalBuffer incomplete_char = |
@@ -421,7 +435,7 @@ size_t Utf8ExternalStreamingStream::FillBuffer(size_t position) { |
if (current_.chunk_no == chunks_.size()) { |
out_of_data = !FetchChunk(); |
} |
- if (!out_of_data) FillBufferFromCurrentChunk(); |
+ FillBufferFromCurrentChunk(); |
} |
DCHECK_EQ(current_.pos.chars - position, buffer_end_ - buffer_cursor_); |