| OLD | NEW |
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "src/v8.h" | 5 #include "src/v8.h" |
| 6 | 6 |
| 7 #include "src/scanner-character-streams.h" | 7 #include "src/scanner-character-streams.h" |
| 8 | 8 |
| 9 #include "include/v8.h" | 9 #include "include/v8.h" |
| 10 #include "src/handles.h" | 10 #include "src/handles.h" |
| 11 #include "src/unicode-inl.h" | 11 #include "src/unicode-inl.h" |
| 12 | 12 |
| 13 namespace v8 { | 13 namespace v8 { |
| 14 namespace internal { | 14 namespace internal { |
| 15 | 15 |
| 16 namespace { | 16 namespace { |
| 17 | 17 |
| 18 unsigned CopyCharsHelper(uint16_t* dest, unsigned length, const uint8_t* src, | 18 unsigned CopyCharsHelper(uint16_t* dest, unsigned length, const uint8_t* src, |
| 19 unsigned* src_pos, unsigned src_length, | 19 unsigned* src_pos, unsigned src_length, |
| 20 ScriptCompiler::StreamedSource::Encoding encoding) { | 20 ScriptCompiler::StreamedSource::Encoding encoding) { |
| 21 // It's possible that this will be called with length 0, but don't assume that |
| 22 // the functions this calls handle it gracefully. |
| 23 if (length == 0) return 0; |
| 24 |
| 21 if (encoding == ScriptCompiler::StreamedSource::UTF8) { | 25 if (encoding == ScriptCompiler::StreamedSource::UTF8) { |
| 22 return v8::internal::Utf8ToUtf16CharacterStream::CopyChars( | 26 return v8::internal::Utf8ToUtf16CharacterStream::CopyChars( |
| 23 dest, length, src, src_pos, src_length); | 27 dest, length, src, src_pos, src_length); |
| 24 } | 28 } |
| 25 | 29 |
| 26 unsigned to_fill = length; | 30 unsigned to_fill = length; |
| 27 if (to_fill > src_length - *src_pos) to_fill = src_length - *src_pos; | 31 if (to_fill > src_length - *src_pos) to_fill = src_length - *src_pos; |
| 28 | 32 |
| 29 if (encoding == ScriptCompiler::StreamedSource::ONE_BYTE) { | 33 if (encoding == ScriptCompiler::StreamedSource::ONE_BYTE) { |
| 30 v8::internal::CopyChars<uint8_t, uint16_t>(dest, src + *src_pos, to_fill); | 34 v8::internal::CopyChars<uint8_t, uint16_t>(dest, src + *src_pos, to_fill); |
| (...skipping 343 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 374 current_data_ = NULL; | 378 current_data_ = NULL; |
| 375 current_data_length_ = 0; | 379 current_data_length_ = 0; |
| 376 current_data_offset_ = 0; | 380 current_data_offset_ = 0; |
| 377 } | 381 } |
| 378 } | 382 } |
| 379 return data_in_buffer; | 383 return data_in_buffer; |
| 380 } | 384 } |
| 381 | 385 |
| 382 void ExternalStreamingStream::HandleUtf8SplitCharacters( | 386 void ExternalStreamingStream::HandleUtf8SplitCharacters( |
| 383 unsigned* data_in_buffer) { | 387 unsigned* data_in_buffer) { |
| 388 // Note the following property of UTF-8 which makes this function possible: |
| 389 // Given any byte, we can always read its local environment (in both |
| 390 // directions) to find out the (possibly multi-byte) character it belongs |
| 391 // to. Single byte characters are of the form 0b0XXXXXXX. The first byte of a |
| 392 // multi-byte character is of the form 0b110XXXXX, 0b1110XXXX or |
| 393 // 0b11110XXX. The continuation bytes are of the form 0b10XXXXXX. |
| 394 |
| 384 // First check if we have leftover data from the last chunk. | 395 // First check if we have leftover data from the last chunk. |
| 385 unibrow::uchar c; | 396 unibrow::uchar c; |
| 386 if (utf8_split_char_buffer_length_ > 0) { | 397 if (utf8_split_char_buffer_length_ > 0) { |
| 387 // Move the bytes which are part of the split character (which started in | 398 // Move the bytes which are part of the split character (which started in |
| 388 // the previous chunk) into utf8_split_char_buffer_. | 399 // the previous chunk) into utf8_split_char_buffer_. Note that the |
| 400 // continuation bytes are of the form 0b10XXXXXX, thus c >> 6 == 2. |
| 389 while (current_data_offset_ < current_data_length_ && | 401 while (current_data_offset_ < current_data_length_ && |
| 390 utf8_split_char_buffer_length_ < 4 && | 402 utf8_split_char_buffer_length_ < 4 && |
| 391 (c = current_data_[current_data_offset_]) > | 403 (c = current_data_[current_data_offset_]) >> 6 == 2) { |
| 392 unibrow::Utf8::kMaxOneByteChar) { | |
| 393 utf8_split_char_buffer_[utf8_split_char_buffer_length_] = c; | 404 utf8_split_char_buffer_[utf8_split_char_buffer_length_] = c; |
| 394 ++utf8_split_char_buffer_length_; | 405 ++utf8_split_char_buffer_length_; |
| 395 ++current_data_offset_; | 406 ++current_data_offset_; |
| 396 } | 407 } |
| 397 | 408 |
| 398 // Convert the data in utf8_split_char_buffer_. | 409 // Convert the data in utf8_split_char_buffer_. |
| 399 unsigned new_offset = 0; | 410 unsigned new_offset = 0; |
| 400 unsigned new_chars_in_buffer = | 411 unsigned new_chars_in_buffer = |
| 401 CopyCharsHelper(buffer_ + *data_in_buffer, | 412 CopyCharsHelper(buffer_ + *data_in_buffer, |
| 402 kBufferSize - *data_in_buffer, utf8_split_char_buffer_, | 413 kBufferSize - *data_in_buffer, utf8_split_char_buffer_, |
| (...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 448 int end_position) | 459 int end_position) |
| 449 : Utf16CharacterStream(), | 460 : Utf16CharacterStream(), |
| 450 source_(data), | 461 source_(data), |
| 451 raw_data_(data->GetTwoByteData(start_position)) { | 462 raw_data_(data->GetTwoByteData(start_position)) { |
| 452 buffer_cursor_ = raw_data_, | 463 buffer_cursor_ = raw_data_, |
| 453 buffer_end_ = raw_data_ + (end_position - start_position); | 464 buffer_end_ = raw_data_ + (end_position - start_position); |
| 454 pos_ = start_position; | 465 pos_ = start_position; |
| 455 } | 466 } |
| 456 | 467 |
| 457 } } // namespace v8::internal | 468 } } // namespace v8::internal |
| OLD | NEW |