Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(4)

Side by Side Diff: src/scanner-character-streams.cc

Issue 607043002: Script streaming: fix split UTF-8 character handling. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: rebased Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | test/cctest/test-api.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/v8.h" 5 #include "src/v8.h"
6 6
7 #include "src/scanner-character-streams.h" 7 #include "src/scanner-character-streams.h"
8 8
9 #include "include/v8.h" 9 #include "include/v8.h"
10 #include "src/handles.h" 10 #include "src/handles.h"
(...skipping 393 matching lines...) Expand 10 before | Expand all | Expand 10 after
404 *data_in_buffer += new_chars_in_buffer; 404 *data_in_buffer += new_chars_in_buffer;
405 // Make sure we used all the data. 405 // Make sure we used all the data.
406 DCHECK(new_offset == utf8_split_char_buffer_length_); 406 DCHECK(new_offset == utf8_split_char_buffer_length_);
407 DCHECK(*data_in_buffer <= kBufferSize); 407 DCHECK(*data_in_buffer <= kBufferSize);
408 408
409 utf8_split_char_buffer_length_ = 0; 409 utf8_split_char_buffer_length_ = 0;
410 } 410 }
411 411
412 // Move bytes which are part of an incomplete character from the end of the 412 // Move bytes which are part of an incomplete character from the end of the
413 // current chunk to utf8_split_char_buffer_. They will be converted when the 413 // current chunk to utf8_split_char_buffer_. They will be converted when the
414 // next data chunk arrives. 414 // next data chunk arrives. Note that all valid UTF-8 characters are at most 4
415 // bytes long, but if the data is invalid, we can have character values bigger
416 // than unibrow::Utf8::kMaxOneByteChar for more than 4 consecutive bytes.
415 while (current_data_length_ > current_data_offset_ && 417 while (current_data_length_ > current_data_offset_ &&
416 (c = current_data_[current_data_length_ - 1]) > 418 (c = current_data_[current_data_length_ - 1]) >
417 unibrow::Utf8::kMaxOneByteChar) { 419 unibrow::Utf8::kMaxOneByteChar &&
420 utf8_split_char_buffer_length_ < 4) {
418 --current_data_length_; 421 --current_data_length_;
419 ++utf8_split_char_buffer_length_; 422 ++utf8_split_char_buffer_length_;
420 } 423 }
424 CHECK(utf8_split_char_buffer_length_ <= 4);
421 for (unsigned i = 0; i < utf8_split_char_buffer_length_; ++i) { 425 for (unsigned i = 0; i < utf8_split_char_buffer_length_; ++i) {
422 utf8_split_char_buffer_[i] = current_data_[current_data_length_ + i]; 426 utf8_split_char_buffer_[i] = current_data_[current_data_length_ + i];
423 } 427 }
424 } 428 }
425 429
426 430
427 // ---------------------------------------------------------------------------- 431 // ----------------------------------------------------------------------------
428 // ExternalTwoByteStringUtf16CharacterStream 432 // ExternalTwoByteStringUtf16CharacterStream
429 433
430 ExternalTwoByteStringUtf16CharacterStream:: 434 ExternalTwoByteStringUtf16CharacterStream::
431 ~ExternalTwoByteStringUtf16CharacterStream() { } 435 ~ExternalTwoByteStringUtf16CharacterStream() { }
432 436
433 437
434 ExternalTwoByteStringUtf16CharacterStream 438 ExternalTwoByteStringUtf16CharacterStream
435 ::ExternalTwoByteStringUtf16CharacterStream( 439 ::ExternalTwoByteStringUtf16CharacterStream(
436 Handle<ExternalTwoByteString> data, 440 Handle<ExternalTwoByteString> data,
437 int start_position, 441 int start_position,
438 int end_position) 442 int end_position)
439 : Utf16CharacterStream(), 443 : Utf16CharacterStream(),
440 source_(data), 444 source_(data),
441 raw_data_(data->GetTwoByteData(start_position)) { 445 raw_data_(data->GetTwoByteData(start_position)) {
442 buffer_cursor_ = raw_data_, 446 buffer_cursor_ = raw_data_,
443 buffer_end_ = raw_data_ + (end_position - start_position); 447 buffer_end_ = raw_data_ + (end_position - start_position);
444 pos_ = start_position; 448 pos_ = start_position;
445 } 449 }
446 450
447 } } // namespace v8::internal 451 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « no previous file | test/cctest/test-api.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698