| OLD | NEW | 
|---|
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. | 
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be | 
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. | 
| 4 | 4 | 
| 5 #include "src/v8.h" | 5 #include "src/v8.h" | 
| 6 | 6 | 
| 7 #include "src/scanner-character-streams.h" | 7 #include "src/scanner-character-streams.h" | 
| 8 | 8 | 
| 9 #include "include/v8.h" | 9 #include "include/v8.h" | 
| 10 #include "src/handles.h" | 10 #include "src/handles.h" | 
| 11 #include "src/unicode-inl.h" | 11 #include "src/unicode-inl.h" | 
| 12 | 12 | 
| 13 namespace v8 { | 13 namespace v8 { | 
| 14 namespace internal { | 14 namespace internal { | 
| 15 | 15 | 
| 16 namespace { | 16 namespace { | 
| 17 | 17 | 
| 18 unsigned CopyCharsHelper(uint16_t* dest, unsigned length, const uint8_t* src, | 18 unsigned CopyCharsHelper(uint16_t* dest, unsigned length, const uint8_t* src, | 
| 19                          unsigned* src_pos, unsigned src_length, | 19                          unsigned* src_pos, unsigned src_length, | 
| 20                          ScriptCompiler::StreamedSource::Encoding encoding) { | 20                          ScriptCompiler::StreamedSource::Encoding encoding) { | 
|  | 21   // It's possible that this will be called with length 0, but don't assume that | 
|  | 22   // the functions this calls handle it gracefully. | 
|  | 23   if (length == 0) return 0; | 
|  | 24 | 
| 21   if (encoding == ScriptCompiler::StreamedSource::UTF8) { | 25   if (encoding == ScriptCompiler::StreamedSource::UTF8) { | 
| 22     return v8::internal::Utf8ToUtf16CharacterStream::CopyChars( | 26     return v8::internal::Utf8ToUtf16CharacterStream::CopyChars( | 
| 23         dest, length, src, src_pos, src_length); | 27         dest, length, src, src_pos, src_length); | 
| 24   } | 28   } | 
| 25 | 29 | 
| 26   unsigned to_fill = length; | 30   unsigned to_fill = length; | 
| 27   if (to_fill > src_length - *src_pos) to_fill = src_length - *src_pos; | 31   if (to_fill > src_length - *src_pos) to_fill = src_length - *src_pos; | 
| 28 | 32 | 
| 29   if (encoding == ScriptCompiler::StreamedSource::ONE_BYTE) { | 33   if (encoding == ScriptCompiler::StreamedSource::ONE_BYTE) { | 
| 30     v8::internal::CopyChars<uint8_t, uint16_t>(dest, src + *src_pos, to_fill); | 34     v8::internal::CopyChars<uint8_t, uint16_t>(dest, src + *src_pos, to_fill); | 
| (...skipping 343 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 374       current_data_ = NULL; | 378       current_data_ = NULL; | 
| 375       current_data_length_ = 0; | 379       current_data_length_ = 0; | 
| 376       current_data_offset_ = 0; | 380       current_data_offset_ = 0; | 
| 377     } | 381     } | 
| 378   } | 382   } | 
| 379   return data_in_buffer; | 383   return data_in_buffer; | 
| 380 } | 384 } | 
| 381 | 385 | 
| 382 void ExternalStreamingStream::HandleUtf8SplitCharacters( | 386 void ExternalStreamingStream::HandleUtf8SplitCharacters( | 
| 383     unsigned* data_in_buffer) { | 387     unsigned* data_in_buffer) { | 
|  | 388   // Note the following property of UTF-8 which makes this function possible: | 
|  | 389   // Given any byte, we can always read its local environment (in both | 
|  | 390   // directions) to find out the (possibly multi-byte) character it belongs | 
|  | 391   // to. Single byte characters are of the form 0b0XXXXXXX. The first byte of a | 
|  | 392   // multi-byte character is of the form 0b110XXXXX, 0b1110XXXX or | 
|  | 393   // 0b11110XXX. The continuation bytes are of the form 0b10XXXXXX. | 
|  | 394 | 
| 384   // First check if we have leftover data from the last chunk. | 395   // First check if we have leftover data from the last chunk. | 
| 385   unibrow::uchar c; | 396   unibrow::uchar c; | 
| 386   if (utf8_split_char_buffer_length_ > 0) { | 397   if (utf8_split_char_buffer_length_ > 0) { | 
| 387     // Move the bytes which are part of the split character (which started in | 398     // Move the bytes which are part of the split character (which started in | 
| 388     // the previous chunk) into utf8_split_char_buffer_. | 399     // the previous chunk) into utf8_split_char_buffer_. Note that the | 
|  | 400     // continuation bytes are of the form 0b10XXXXXX, thus c >> 6 == 2. | 
| 389     while (current_data_offset_ < current_data_length_ && | 401     while (current_data_offset_ < current_data_length_ && | 
| 390            utf8_split_char_buffer_length_ < 4 && | 402            utf8_split_char_buffer_length_ < 4 && | 
| 391            (c = current_data_[current_data_offset_]) > | 403            (c = current_data_[current_data_offset_]) >> 6 == 2) { | 
| 392                unibrow::Utf8::kMaxOneByteChar) { |  | 
| 393       utf8_split_char_buffer_[utf8_split_char_buffer_length_] = c; | 404       utf8_split_char_buffer_[utf8_split_char_buffer_length_] = c; | 
| 394       ++utf8_split_char_buffer_length_; | 405       ++utf8_split_char_buffer_length_; | 
| 395       ++current_data_offset_; | 406       ++current_data_offset_; | 
| 396     } | 407     } | 
| 397 | 408 | 
| 398     // Convert the data in utf8_split_char_buffer_. | 409     // Convert the data in utf8_split_char_buffer_. | 
| 399     unsigned new_offset = 0; | 410     unsigned new_offset = 0; | 
| 400     unsigned new_chars_in_buffer = | 411     unsigned new_chars_in_buffer = | 
| 401         CopyCharsHelper(buffer_ + *data_in_buffer, | 412         CopyCharsHelper(buffer_ + *data_in_buffer, | 
| 402                         kBufferSize - *data_in_buffer, utf8_split_char_buffer_, | 413                         kBufferSize - *data_in_buffer, utf8_split_char_buffer_, | 
| (...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 448         int end_position) | 459         int end_position) | 
| 449     : Utf16CharacterStream(), | 460     : Utf16CharacterStream(), | 
| 450       source_(data), | 461       source_(data), | 
| 451       raw_data_(data->GetTwoByteData(start_position)) { | 462       raw_data_(data->GetTwoByteData(start_position)) { | 
| 452   buffer_cursor_ = raw_data_, | 463   buffer_cursor_ = raw_data_, | 
| 453   buffer_end_ = raw_data_ + (end_position - start_position); | 464   buffer_end_ = raw_data_ + (end_position - start_position); | 
| 454   pos_ = start_position; | 465   pos_ = start_position; | 
| 455 } | 466 } | 
| 456 | 467 | 
| 457 } }  // namespace v8::internal | 468 } }  // namespace v8::internal | 
| OLD | NEW | 
|---|