Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(7)

Side by Side Diff: src/parsing/scanner-character-streams.cc

Issue 2354973002: Handle Utf-8 BOM at beginning of an Utf-8 stream. (Closed)
Patch Set: Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/parsing/scanner-character-streams.h" 5 #include "src/parsing/scanner-character-streams.h"
6 6
7 #include "include/v8.h" 7 #include "include/v8.h"
8 #include "src/globals.h" 8 #include "src/globals.h"
9 #include "src/handles.h" 9 #include "src/handles.h"
10 #include "src/objects-inl.h" 10 #include "src/objects-inl.h"
(...skipping 268 matching lines...) Expand 10 before | Expand all | Expand 10 after
279 DCHECK_EQ(buffer_start_, buffer_cursor_); 279 DCHECK_EQ(buffer_start_, buffer_cursor_);
280 DCHECK_LT(buffer_end_ + 1, buffer_start_ + kBufferSize); 280 DCHECK_LT(buffer_end_ + 1, buffer_start_ + kBufferSize);
281 281
282 const Chunk& chunk = chunks_[current_.chunk_no]; 282 const Chunk& chunk = chunks_[current_.chunk_no];
283 283
284 // The buffer_ is writable, but buffer_*_ members are const. So we get a 284 // The buffer_ is writable, but buffer_*_ members are const. So we get a
285 // non-const pointer into buffer that points to the same char as buffer_end_. 285 // non-const pointer into buffer that points to the same char as buffer_end_.
286 uint16_t* cursor = buffer_ + (buffer_end_ - buffer_start_); 286 uint16_t* cursor = buffer_ + (buffer_end_ - buffer_start_);
287 DCHECK_EQ(cursor, buffer_end_); 287 DCHECK_EQ(cursor, buffer_end_);
288 288
289 static const unibrow::uchar kUtf8Bom = 0xfeff;
290
289 unibrow::Utf8::Utf8IncrementalBuffer incomplete_char = 291 unibrow::Utf8::Utf8IncrementalBuffer incomplete_char =
290 current_.pos.incomplete_char; 292 current_.pos.incomplete_char;
291 size_t it; 293 size_t it;
292 for (it = current_.pos.bytes - chunk.start.bytes; 294 for (it = current_.pos.bytes - chunk.start.bytes;
293 it < chunk.length && cursor + 1 < buffer_start_ + kBufferSize; it++) { 295 it < chunk.length && cursor + 1 < buffer_start_ + kBufferSize; it++) {
294 unibrow::uchar t = 296 unibrow::uchar t =
295 unibrow::Utf8::ValueOfIncremental(chunk.data[it], &incomplete_char); 297 unibrow::Utf8::ValueOfIncremental(chunk.data[it], &incomplete_char);
296 if (t == unibrow::Utf8::kIncomplete) continue; 298 if (t == unibrow::Utf8::kIncomplete) continue;
297 if (t <= unibrow::Utf16::kMaxNonSurrogateCharCode) { 299 if (V8_LIKELY(t < kUtf8Bom)) {
vogelheim 2016/09/20 16:17:58 1st + 3rd branch could be merged, but I wanted to
marja 2016/09/20 18:52:30 I think this version is pretty readable too.
300 *(cursor++) = static_cast<uc16>(t); // The by most frequent case.
301 } else if (t == kUtf8Bom && current_.pos.bytes + it == 2) {
302 // BOM detected at beginning of the stream. Don't copy it.
303 } else if (t <= unibrow::Utf16::kMaxNonSurrogateCharCode) {
298 *(cursor++) = static_cast<uc16>(t); 304 *(cursor++) = static_cast<uc16>(t);
299 } else { 305 } else {
300 *(cursor++) = unibrow::Utf16::LeadSurrogate(t); 306 *(cursor++) = unibrow::Utf16::LeadSurrogate(t);
301 *(cursor++) = unibrow::Utf16::TrailSurrogate(t); 307 *(cursor++) = unibrow::Utf16::TrailSurrogate(t);
302 } 308 }
303 } 309 }
304 310
305 current_.pos.bytes = chunk.start.bytes + it; 311 current_.pos.bytes = chunk.start.bytes + it;
306 current_.pos.chars += (cursor - buffer_end_); 312 current_.pos.chars += (cursor - buffer_end_);
307 current_.pos.incomplete_char = incomplete_char; 313 current_.pos.incomplete_char = incomplete_char;
(...skipping 346 matching lines...) Expand 10 before | Expand all | Expand 10 after
654 return new OneByteExternalStreamingStream(source_stream); 660 return new OneByteExternalStreamingStream(source_stream);
655 case v8::ScriptCompiler::StreamedSource::UTF8: 661 case v8::ScriptCompiler::StreamedSource::UTF8:
656 return new Utf8ExternalStreamingStream(source_stream); 662 return new Utf8ExternalStreamingStream(source_stream);
657 } 663 }
658 UNREACHABLE(); 664 UNREACHABLE();
659 return nullptr; 665 return nullptr;
660 } 666 }
661 667
662 } // namespace internal 668 } // namespace internal
663 } // namespace v8 669 } // namespace v8
OLDNEW
« no previous file with comments | « no previous file | test/cctest/parsing/test-scanner-streams.cc » ('j') | test/cctest/parsing/test-scanner-streams.cc » ('J')

Powered by Google App Engine
This is Rietveld 408576698