Chromium Code Reviews| Index: src/parsing/scanner-character-streams.cc |
| diff --git a/src/parsing/scanner-character-streams.cc b/src/parsing/scanner-character-streams.cc |
| index 3d3e59060629ec948eab13e8b146e3e5d766f6fe..53db66293c74eefb5e272573217745d0d8fc3df5 100644 |
| --- a/src/parsing/scanner-character-streams.cc |
| +++ b/src/parsing/scanner-character-streams.cc |
| @@ -286,6 +286,8 @@ void Utf8ExternalStreamingStream::FillBufferFromCurrentChunk() { |
| uint16_t* cursor = buffer_ + (buffer_end_ - buffer_start_); |
| DCHECK_EQ(cursor, buffer_end_); |
| + static const unibrow::uchar kUtf8Bom = 0xfeff; |
| + |
| unibrow::Utf8::Utf8IncrementalBuffer incomplete_char = |
| current_.pos.incomplete_char; |
| size_t it; |
| @@ -294,7 +296,11 @@ void Utf8ExternalStreamingStream::FillBufferFromCurrentChunk() { |
| unibrow::uchar t = |
| unibrow::Utf8::ValueOfIncremental(chunk.data[it], &incomplete_char); |
| if (t == unibrow::Utf8::kIncomplete) continue; |
| - if (t <= unibrow::Utf16::kMaxNonSurrogateCharCode) { |
| + if (V8_LIKELY(t < kUtf8Bom)) { |
|
vogelheim
2016/09/20 16:17:58
1st + 3rd branch could be merged, but I wanted to
marja
2016/09/20 18:52:30
I think this version is pretty readable too.
|
| + *(cursor++) = static_cast<uc16>(t); // The by most frequent case. |
| + } else if (t == kUtf8Bom && current_.pos.bytes + it == 2) { |
| + // BOM detected at beginning of the stream. Don't copy it. |
| + } else if (t <= unibrow::Utf16::kMaxNonSurrogateCharCode) { |
| *(cursor++) = static_cast<uc16>(t); |
| } else { |
| *(cursor++) = unibrow::Utf16::LeadSurrogate(t); |