src/scanner-character-streams.cc - Issue 662003003: Script streaming: more UTF-8 handing fixes (again).

Side by Side Diff: src/scanner-character-streams.cc

Issue 662003003: Script streaming: more UTF-8 handing fixes (again). (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge

Patch Set: rebased Created 6 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "src/v8.h"	5 #include "src/v8.h"

6	6

7 #include "src/scanner-character-streams.h"	7 #include "src/scanner-character-streams.h"

8	8

9 #include "include/v8.h"	9 #include "include/v8.h"

10 #include "src/handles.h"	10 #include "src/handles.h"

11 #include "src/unicode-inl.h"	11 #include "src/unicode-inl.h"

12	12

13 namespace v8 {	13 namespace v8 {

14 namespace internal {	14 namespace internal {

15	15

16 namespace {	16 namespace {

17	17

18 unsigned CopyCharsHelper(uint16_t* dest, unsigned length, const uint8_t* src,	18 unsigned CopyCharsHelper(uint16_t* dest, unsigned length, const uint8_t* src,

19 unsigned* src_pos, unsigned src_length,	19 unsigned* src_pos, unsigned src_length,

20 ScriptCompiler::StreamedSource::Encoding encoding) {	20 ScriptCompiler::StreamedSource::Encoding encoding) {

	21 // It's possible that this will be called with length 0, but don't assume that

	22 // the functions this calls handle it gracefully.

	23 if (length == 0) return 0;

	24

21 if (encoding == ScriptCompiler::StreamedSource::UTF8) {	25 if (encoding == ScriptCompiler::StreamedSource::UTF8) {

22 return v8::internal::Utf8ToUtf16CharacterStream::CopyChars(	26 return v8::internal::Utf8ToUtf16CharacterStream::CopyChars(

23 dest, length, src, src_pos, src_length);	27 dest, length, src, src_pos, src_length);

24 }	28 }

25	29

26 unsigned to_fill = length;	30 unsigned to_fill = length;

27 if (to_fill > src_length - src_pos) to_fill = src_length - src_pos;	31 if (to_fill > src_length - src_pos) to_fill = src_length - src_pos;

28	32

29 if (encoding == ScriptCompiler::StreamedSource::ONE_BYTE) {	33 if (encoding == ScriptCompiler::StreamedSource::ONE_BYTE) {

30 v8::internal::CopyChars<uint8_t, uint16_t>(dest, src + *src_pos, to_fill);	34 v8::internal::CopyChars<uint8_t, uint16_t>(dest, src + *src_pos, to_fill);

(...skipping 343 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
374 current_data_ = NULL;	378 current_data_ = NULL;

375 current_data_length_ = 0;	379 current_data_length_ = 0;

376 current_data_offset_ = 0;	380 current_data_offset_ = 0;

377 }	381 }

378 }	382 }

379 return data_in_buffer;	383 return data_in_buffer;

380 }	384 }

381	385

382 void ExternalStreamingStream::HandleUtf8SplitCharacters(	386 void ExternalStreamingStream::HandleUtf8SplitCharacters(

383 unsigned* data_in_buffer) {	387 unsigned* data_in_buffer) {

	388 // Note the following property of UTF-8 which makes this function possible:

	389 // Given any byte, we can always read its local environment (in both

	390 // directions) to find out the (possibly multi-byte) character it belongs

	391 // to. Single byte characters are of the form 0b0XXXXXXX. The first byte of a

	392 // multi-byte character is of the form 0b110XXXXX, 0b1110XXXX or

	393 // 0b11110XXX. The continuation bytes are of the form 0b10XXXXXX.

	394

384 // First check if we have leftover data from the last chunk.	395 // First check if we have leftover data from the last chunk.

385 unibrow::uchar c;	396 unibrow::uchar c;

386 if (utf8_split_char_buffer_length_ > 0) {	397 if (utf8_split_char_buffer_length_ > 0) {

387 // Move the bytes which are part of the split character (which started in	398 // Move the bytes which are part of the split character (which started in

388 // the previous chunk) into utf8_split_char_buffer_.	399 // the previous chunk) into utf8_split_char_buffer_. Note that the

	400 // continuation bytes are of the form 0b10XXXXXX, thus c >> 6 == 2.

389 while (current_data_offset_ < current_data_length_ &&	401 while (current_data_offset_ < current_data_length_ &&

390 utf8_split_char_buffer_length_ < 4 &&	402 utf8_split_char_buffer_length_ < 4 &&

391 (c = current_data_[current_data_offset_]) >	403 (c = current_data_[current_data_offset_]) >> 6 == 2) {

392 unibrow::Utf8::kMaxOneByteChar) {

393 utf8_split_char_buffer_[utf8_split_char_buffer_length_] = c;	404 utf8_split_char_buffer_[utf8_split_char_buffer_length_] = c;

394 ++utf8_split_char_buffer_length_;	405 ++utf8_split_char_buffer_length_;

395 ++current_data_offset_;	406 ++current_data_offset_;

396 }	407 }

397	408

398 // Convert the data in utf8_split_char_buffer_.	409 // Convert the data in utf8_split_char_buffer_.

399 unsigned new_offset = 0;	410 unsigned new_offset = 0;

400 unsigned new_chars_in_buffer =	411 unsigned new_chars_in_buffer =

401 CopyCharsHelper(buffer_ + *data_in_buffer,	412 CopyCharsHelper(buffer_ + *data_in_buffer,

402 kBufferSize - *data_in_buffer, utf8_split_char_buffer_,	413 kBufferSize - *data_in_buffer, utf8_split_char_buffer_,

(...skipping 45 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
448 int end_position)	459 int end_position)

449 : Utf16CharacterStream(),	460 : Utf16CharacterStream(),

450 source_(data),	461 source_(data),

451 raw_data_(data->GetTwoByteData(start_position)) {	462 raw_data_(data->GetTwoByteData(start_position)) {

452 buffer_cursor_ = raw_data_,	463 buffer_cursor_ = raw_data_,

453 buffer_end_ = raw_data_ + (end_position - start_position);	464 buffer_end_ = raw_data_ + (end_position - start_position);

454 pos_ = start_position;	465 pos_ = start_position;

455 }	466 }

456	467

457 } } // namespace v8::internal	468 } } // namespace v8::internal

OLD	NEW

« no previous file with comments | « no previous file | test/cctest/test-api.cc » ('j') | no next file with comments »