Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(3)

Side by Side Diff: src/scanner-character-streams.cc

Issue 662003003: Script streaming: more UTF-8 handing fixes (again). (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: rebased Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | test/cctest/test-api.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/v8.h" 5 #include "src/v8.h"
6 6
7 #include "src/scanner-character-streams.h" 7 #include "src/scanner-character-streams.h"
8 8
9 #include "include/v8.h" 9 #include "include/v8.h"
10 #include "src/handles.h" 10 #include "src/handles.h"
11 #include "src/unicode-inl.h" 11 #include "src/unicode-inl.h"
12 12
13 namespace v8 { 13 namespace v8 {
14 namespace internal { 14 namespace internal {
15 15
16 namespace { 16 namespace {
17 17
18 unsigned CopyCharsHelper(uint16_t* dest, unsigned length, const uint8_t* src, 18 unsigned CopyCharsHelper(uint16_t* dest, unsigned length, const uint8_t* src,
19 unsigned* src_pos, unsigned src_length, 19 unsigned* src_pos, unsigned src_length,
20 ScriptCompiler::StreamedSource::Encoding encoding) { 20 ScriptCompiler::StreamedSource::Encoding encoding) {
21 // It's possible that this will be called with length 0, but don't assume that
22 // the functions this calls handle it gracefully.
23 if (length == 0) return 0;
24
21 if (encoding == ScriptCompiler::StreamedSource::UTF8) { 25 if (encoding == ScriptCompiler::StreamedSource::UTF8) {
22 return v8::internal::Utf8ToUtf16CharacterStream::CopyChars( 26 return v8::internal::Utf8ToUtf16CharacterStream::CopyChars(
23 dest, length, src, src_pos, src_length); 27 dest, length, src, src_pos, src_length);
24 } 28 }
25 29
26 unsigned to_fill = length; 30 unsigned to_fill = length;
27 if (to_fill > src_length - *src_pos) to_fill = src_length - *src_pos; 31 if (to_fill > src_length - *src_pos) to_fill = src_length - *src_pos;
28 32
29 if (encoding == ScriptCompiler::StreamedSource::ONE_BYTE) { 33 if (encoding == ScriptCompiler::StreamedSource::ONE_BYTE) {
30 v8::internal::CopyChars<uint8_t, uint16_t>(dest, src + *src_pos, to_fill); 34 v8::internal::CopyChars<uint8_t, uint16_t>(dest, src + *src_pos, to_fill);
(...skipping 343 matching lines...) Expand 10 before | Expand all | Expand 10 after
374 current_data_ = NULL; 378 current_data_ = NULL;
375 current_data_length_ = 0; 379 current_data_length_ = 0;
376 current_data_offset_ = 0; 380 current_data_offset_ = 0;
377 } 381 }
378 } 382 }
379 return data_in_buffer; 383 return data_in_buffer;
380 } 384 }
381 385
382 void ExternalStreamingStream::HandleUtf8SplitCharacters( 386 void ExternalStreamingStream::HandleUtf8SplitCharacters(
383 unsigned* data_in_buffer) { 387 unsigned* data_in_buffer) {
388 // Note the following property of UTF-8 which makes this function possible:
389 // Given any byte, we can always read its local environment (in both
390 // directions) to find out the (possibly multi-byte) character it belongs
391 // to. Single byte characters are of the form 0b0XXXXXXX. The first byte of a
392 // multi-byte character is of the form 0b110XXXXX, 0b1110XXXX or
393 // 0b11110XXX. The continuation bytes are of the form 0b10XXXXXX.
394
384 // First check if we have leftover data from the last chunk. 395 // First check if we have leftover data from the last chunk.
385 unibrow::uchar c; 396 unibrow::uchar c;
386 if (utf8_split_char_buffer_length_ > 0) { 397 if (utf8_split_char_buffer_length_ > 0) {
387 // Move the bytes which are part of the split character (which started in 398 // Move the bytes which are part of the split character (which started in
388 // the previous chunk) into utf8_split_char_buffer_. 399 // the previous chunk) into utf8_split_char_buffer_. Note that the
400 // continuation bytes are of the form 0b10XXXXXX, thus c >> 6 == 2.
389 while (current_data_offset_ < current_data_length_ && 401 while (current_data_offset_ < current_data_length_ &&
390 utf8_split_char_buffer_length_ < 4 && 402 utf8_split_char_buffer_length_ < 4 &&
391 (c = current_data_[current_data_offset_]) > 403 (c = current_data_[current_data_offset_]) >> 6 == 2) {
392 unibrow::Utf8::kMaxOneByteChar) {
393 utf8_split_char_buffer_[utf8_split_char_buffer_length_] = c; 404 utf8_split_char_buffer_[utf8_split_char_buffer_length_] = c;
394 ++utf8_split_char_buffer_length_; 405 ++utf8_split_char_buffer_length_;
395 ++current_data_offset_; 406 ++current_data_offset_;
396 } 407 }
397 408
398 // Convert the data in utf8_split_char_buffer_. 409 // Convert the data in utf8_split_char_buffer_.
399 unsigned new_offset = 0; 410 unsigned new_offset = 0;
400 unsigned new_chars_in_buffer = 411 unsigned new_chars_in_buffer =
401 CopyCharsHelper(buffer_ + *data_in_buffer, 412 CopyCharsHelper(buffer_ + *data_in_buffer,
402 kBufferSize - *data_in_buffer, utf8_split_char_buffer_, 413 kBufferSize - *data_in_buffer, utf8_split_char_buffer_,
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
448 int end_position) 459 int end_position)
449 : Utf16CharacterStream(), 460 : Utf16CharacterStream(),
450 source_(data), 461 source_(data),
451 raw_data_(data->GetTwoByteData(start_position)) { 462 raw_data_(data->GetTwoByteData(start_position)) {
452 buffer_cursor_ = raw_data_, 463 buffer_cursor_ = raw_data_,
453 buffer_end_ = raw_data_ + (end_position - start_position); 464 buffer_end_ = raw_data_ + (end_position - start_position);
454 pos_ = start_position; 465 pos_ = start_position;
455 } 466 }
456 467
457 } } // namespace v8::internal 468 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « no previous file | test/cctest/test-api.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698