OLD | NEW |
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/v8.h" | 5 #include "src/v8.h" |
6 | 6 |
7 #include "src/scanner-character-streams.h" | 7 #include "src/scanner-character-streams.h" |
8 | 8 |
9 #include "include/v8.h" | 9 #include "include/v8.h" |
10 #include "src/handles.h" | 10 #include "src/handles.h" |
11 #include "src/unicode-inl.h" | 11 #include "src/unicode-inl.h" |
12 | 12 |
13 namespace v8 { | 13 namespace v8 { |
14 namespace internal { | 14 namespace internal { |
15 | 15 |
16 namespace { | 16 namespace { |
17 | 17 |
18 unsigned CopyCharsHelper(uint16_t* dest, unsigned length, const uint8_t* src, | 18 unsigned CopyCharsHelper(uint16_t* dest, unsigned length, const uint8_t* src, |
19 unsigned* src_pos, unsigned src_length, | 19 unsigned* src_pos, unsigned src_length, |
20 ScriptCompiler::StreamedSource::Encoding encoding) { | 20 ScriptCompiler::StreamedSource::Encoding encoding) { |
| 21 // It's possible that this will be called with length 0, but don't assume that |
| 22 // the functions this calls handle it gracefully. |
| 23 if (length == 0) return 0; |
| 24 |
21 if (encoding == ScriptCompiler::StreamedSource::UTF8) { | 25 if (encoding == ScriptCompiler::StreamedSource::UTF8) { |
22 return v8::internal::Utf8ToUtf16CharacterStream::CopyChars( | 26 return v8::internal::Utf8ToUtf16CharacterStream::CopyChars( |
23 dest, length, src, src_pos, src_length); | 27 dest, length, src, src_pos, src_length); |
24 } | 28 } |
25 | 29 |
26 unsigned to_fill = length; | 30 unsigned to_fill = length; |
27 if (to_fill > src_length - *src_pos) to_fill = src_length - *src_pos; | 31 if (to_fill > src_length - *src_pos) to_fill = src_length - *src_pos; |
28 | 32 |
29 if (encoding == ScriptCompiler::StreamedSource::ONE_BYTE) { | 33 if (encoding == ScriptCompiler::StreamedSource::ONE_BYTE) { |
30 v8::internal::CopyChars<uint8_t, uint16_t>(dest, src + *src_pos, to_fill); | 34 v8::internal::CopyChars<uint8_t, uint16_t>(dest, src + *src_pos, to_fill); |
(...skipping 343 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
374 current_data_ = NULL; | 378 current_data_ = NULL; |
375 current_data_length_ = 0; | 379 current_data_length_ = 0; |
376 current_data_offset_ = 0; | 380 current_data_offset_ = 0; |
377 } | 381 } |
378 } | 382 } |
379 return data_in_buffer; | 383 return data_in_buffer; |
380 } | 384 } |
381 | 385 |
382 void ExternalStreamingStream::HandleUtf8SplitCharacters( | 386 void ExternalStreamingStream::HandleUtf8SplitCharacters( |
383 unsigned* data_in_buffer) { | 387 unsigned* data_in_buffer) { |
| 388 // Note the following property of UTF-8 which makes this function possible: |
| 389 // Given any byte, we can always read its local environment (in both |
| 390 // directions) to find out the (possibly multi-byte) character it belongs |
| 391 // to. Single byte characters are of the form 0b0XXXXXXX. The first byte of a |
| 392 // multi-byte character is of the form 0b110XXXXX, 0b1110XXXX or |
| 393 // 0b11110XXX. The continuation bytes are of the form 0b10XXXXXX. |
| 394 |
384 // First check if we have leftover data from the last chunk. | 395 // First check if we have leftover data from the last chunk. |
385 unibrow::uchar c; | 396 unibrow::uchar c; |
386 if (utf8_split_char_buffer_length_ > 0) { | 397 if (utf8_split_char_buffer_length_ > 0) { |
387 // Move the bytes which are part of the split character (which started in | 398 // Move the bytes which are part of the split character (which started in |
388 // the previous chunk) into utf8_split_char_buffer_. | 399 // the previous chunk) into utf8_split_char_buffer_. Note that the |
| 400 // continuation bytes are of the form 0b10XXXXXX, thus c >> 6 == 2. |
389 while (current_data_offset_ < current_data_length_ && | 401 while (current_data_offset_ < current_data_length_ && |
390 utf8_split_char_buffer_length_ < 4 && | 402 utf8_split_char_buffer_length_ < 4 && |
391 (c = current_data_[current_data_offset_]) > | 403 (c = current_data_[current_data_offset_]) >> 6 == 2) { |
392 unibrow::Utf8::kMaxOneByteChar) { | |
393 utf8_split_char_buffer_[utf8_split_char_buffer_length_] = c; | 404 utf8_split_char_buffer_[utf8_split_char_buffer_length_] = c; |
394 ++utf8_split_char_buffer_length_; | 405 ++utf8_split_char_buffer_length_; |
395 ++current_data_offset_; | 406 ++current_data_offset_; |
396 } | 407 } |
397 | 408 |
398 // Convert the data in utf8_split_char_buffer_. | 409 // Convert the data in utf8_split_char_buffer_. |
399 unsigned new_offset = 0; | 410 unsigned new_offset = 0; |
400 unsigned new_chars_in_buffer = | 411 unsigned new_chars_in_buffer = |
401 CopyCharsHelper(buffer_ + *data_in_buffer, | 412 CopyCharsHelper(buffer_ + *data_in_buffer, |
402 kBufferSize - *data_in_buffer, utf8_split_char_buffer_, | 413 kBufferSize - *data_in_buffer, utf8_split_char_buffer_, |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
448 int end_position) | 459 int end_position) |
449 : Utf16CharacterStream(), | 460 : Utf16CharacterStream(), |
450 source_(data), | 461 source_(data), |
451 raw_data_(data->GetTwoByteData(start_position)) { | 462 raw_data_(data->GetTwoByteData(start_position)) { |
452 buffer_cursor_ = raw_data_, | 463 buffer_cursor_ = raw_data_, |
453 buffer_end_ = raw_data_ + (end_position - start_position); | 464 buffer_end_ = raw_data_ + (end_position - start_position); |
454 pos_ = start_position; | 465 pos_ = start_position; |
455 } | 466 } |
456 | 467 |
457 } } // namespace v8::internal | 468 } } // namespace v8::internal |
OLD | NEW |