src/scanner-character-streams.cc - Issue 654503002: Script streaming: UTF-8 handling fix.

Side by Side Diff: src/scanner-character-streams.cc

Issue 654503002: Script streaming: UTF-8 handling fix. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge

Patch Set: Created 6 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "src/v8.h"	5 #include "src/v8.h"

6	6

7 #include "src/scanner-character-streams.h"	7 #include "src/scanner-character-streams.h"

8	8

9 #include "include/v8.h"	9 #include "include/v8.h"

10 #include "src/handles.h"	10 #include "src/handles.h"

(...skipping 402 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
413 // current chunk to utf8_split_char_buffer_. They will be converted when the	413 // current chunk to utf8_split_char_buffer_. They will be converted when the

414 // next data chunk arrives. Note that all valid UTF-8 characters are at most 4	414 // next data chunk arrives. Note that all valid UTF-8 characters are at most 4

415 // bytes long, but if the data is invalid, we can have character values bigger	415 // bytes long, but if the data is invalid, we can have character values bigger

416 // than unibrow::Utf8::kMaxOneByteChar for more than 4 consecutive bytes.	416 // than unibrow::Utf8::kMaxOneByteChar for more than 4 consecutive bytes.

417 while (current_data_length_ > current_data_offset_ &&	417 while (current_data_length_ > current_data_offset_ &&

418 (c = current_data_[current_data_length_ - 1]) >	418 (c = current_data_[current_data_length_ - 1]) >

419 unibrow::Utf8::kMaxOneByteChar &&	419 unibrow::Utf8::kMaxOneByteChar &&

420 utf8_split_char_buffer_length_ < 4) {	420 utf8_split_char_buffer_length_ < 4) {

421 --current_data_length_;	421 --current_data_length_;

422 ++utf8_split_char_buffer_length_;	422 ++utf8_split_char_buffer_length_;

	423 if (c >= (3 << 6)) {

	424 // 3 << 6 = 0b11000000; this is the first byte of the multi-byte

	425 // character. No need to copy the previous characters into the conversion

	426 // buffer (even if they're multi-byte).

	427 break;

	428 }

423 }	429 }

424 CHECK(utf8_split_char_buffer_length_ <= 4);	430 CHECK(utf8_split_char_buffer_length_ <= 4);

425 for (unsigned i = 0; i < utf8_split_char_buffer_length_; ++i) {	431 for (unsigned i = 0; i < utf8_split_char_buffer_length_; ++i) {

426 utf8_split_char_buffer_[i] = current_data_[current_data_length_ + i];	432 utf8_split_char_buffer_[i] = current_data_[current_data_length_ + i];

427 }	433 }

428 }	434 }

429	435

430	436

431 // ----------------------------------------------------------------------------	437 // ----------------------------------------------------------------------------

432 // ExternalTwoByteStringUtf16CharacterStream	438 // ExternalTwoByteStringUtf16CharacterStream

433	439

434 ExternalTwoByteStringUtf16CharacterStream::	440 ExternalTwoByteStringUtf16CharacterStream::

435 ~ExternalTwoByteStringUtf16CharacterStream() { }	441 ~ExternalTwoByteStringUtf16CharacterStream() { }

436	442

437	443

438 ExternalTwoByteStringUtf16CharacterStream	444 ExternalTwoByteStringUtf16CharacterStream

439 ::ExternalTwoByteStringUtf16CharacterStream(	445 ::ExternalTwoByteStringUtf16CharacterStream(

440 Handle<ExternalTwoByteString> data,	446 Handle<ExternalTwoByteString> data,

441 int start_position,	447 int start_position,

442 int end_position)	448 int end_position)

443 : Utf16CharacterStream(),	449 : Utf16CharacterStream(),

444 source_(data),	450 source_(data),

445 raw_data_(data->GetTwoByteData(start_position)) {	451 raw_data_(data->GetTwoByteData(start_position)) {

446 buffer_cursor_ = raw_data_,	452 buffer_cursor_ = raw_data_,

447 buffer_end_ = raw_data_ + (end_position - start_position);	453 buffer_end_ = raw_data_ + (end_position - start_position);

448 pos_ = start_position;	454 pos_ = start_position;

449 }	455 }

450	456

451 } } // namespace v8::internal	457 } } // namespace v8::internal

OLD	NEW

« no previous file with comments | « no previous file | test/cctest/test-api.cc » ('j') | no next file with comments »