OLD | NEW |
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/v8.h" | 5 #include "src/v8.h" |
6 | 6 |
7 #include "src/scanner-character-streams.h" | 7 #include "src/scanner-character-streams.h" |
8 | 8 |
9 #include "include/v8.h" | 9 #include "include/v8.h" |
10 #include "src/handles.h" | 10 #include "src/handles.h" |
(...skipping 402 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
413 // current chunk to utf8_split_char_buffer_. They will be converted when the | 413 // current chunk to utf8_split_char_buffer_. They will be converted when the |
414 // next data chunk arrives. Note that all valid UTF-8 characters are at most 4 | 414 // next data chunk arrives. Note that all valid UTF-8 characters are at most 4 |
415 // bytes long, but if the data is invalid, we can have character values bigger | 415 // bytes long, but if the data is invalid, we can have character values bigger |
416 // than unibrow::Utf8::kMaxOneByteChar for more than 4 consecutive bytes. | 416 // than unibrow::Utf8::kMaxOneByteChar for more than 4 consecutive bytes. |
417 while (current_data_length_ > current_data_offset_ && | 417 while (current_data_length_ > current_data_offset_ && |
418 (c = current_data_[current_data_length_ - 1]) > | 418 (c = current_data_[current_data_length_ - 1]) > |
419 unibrow::Utf8::kMaxOneByteChar && | 419 unibrow::Utf8::kMaxOneByteChar && |
420 utf8_split_char_buffer_length_ < 4) { | 420 utf8_split_char_buffer_length_ < 4) { |
421 --current_data_length_; | 421 --current_data_length_; |
422 ++utf8_split_char_buffer_length_; | 422 ++utf8_split_char_buffer_length_; |
| 423 if (c >= (3 << 6)) { |
| 424 // 3 << 6 = 0b11000000; this is the first byte of the multi-byte |
| 425 // character. No need to copy the previous characters into the conversion |
| 426 // buffer (even if they're multi-byte). |
| 427 break; |
| 428 } |
423 } | 429 } |
424 CHECK(utf8_split_char_buffer_length_ <= 4); | 430 CHECK(utf8_split_char_buffer_length_ <= 4); |
425 for (unsigned i = 0; i < utf8_split_char_buffer_length_; ++i) { | 431 for (unsigned i = 0; i < utf8_split_char_buffer_length_; ++i) { |
426 utf8_split_char_buffer_[i] = current_data_[current_data_length_ + i]; | 432 utf8_split_char_buffer_[i] = current_data_[current_data_length_ + i]; |
427 } | 433 } |
428 } | 434 } |
429 | 435 |
430 | 436 |
431 // ---------------------------------------------------------------------------- | 437 // ---------------------------------------------------------------------------- |
432 // ExternalTwoByteStringUtf16CharacterStream | 438 // ExternalTwoByteStringUtf16CharacterStream |
433 | 439 |
434 ExternalTwoByteStringUtf16CharacterStream:: | 440 ExternalTwoByteStringUtf16CharacterStream:: |
435 ~ExternalTwoByteStringUtf16CharacterStream() { } | 441 ~ExternalTwoByteStringUtf16CharacterStream() { } |
436 | 442 |
437 | 443 |
438 ExternalTwoByteStringUtf16CharacterStream | 444 ExternalTwoByteStringUtf16CharacterStream |
439 ::ExternalTwoByteStringUtf16CharacterStream( | 445 ::ExternalTwoByteStringUtf16CharacterStream( |
440 Handle<ExternalTwoByteString> data, | 446 Handle<ExternalTwoByteString> data, |
441 int start_position, | 447 int start_position, |
442 int end_position) | 448 int end_position) |
443 : Utf16CharacterStream(), | 449 : Utf16CharacterStream(), |
444 source_(data), | 450 source_(data), |
445 raw_data_(data->GetTwoByteData(start_position)) { | 451 raw_data_(data->GetTwoByteData(start_position)) { |
446 buffer_cursor_ = raw_data_, | 452 buffer_cursor_ = raw_data_, |
447 buffer_end_ = raw_data_ + (end_position - start_position); | 453 buffer_end_ = raw_data_ + (end_position - start_position); |
448 pos_ = start_position; | 454 pos_ = start_position; |
449 } | 455 } |
450 | 456 |
451 } } // namespace v8::internal | 457 } } // namespace v8::internal |
OLD | NEW |