| OLD | NEW |
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "src/scanner-character-streams.h" | 5 #include "src/scanner-character-streams.h" |
| 6 | 6 |
| 7 #include "include/v8.h" | 7 #include "include/v8.h" |
| 8 #include "src/globals.h" | 8 #include "src/globals.h" |
| 9 #include "src/handles.h" | 9 #include "src/handles.h" |
| 10 #include "src/list-inl.h" // TODO(mstarzinger): Temporary cycle breaker! | 10 #include "src/list-inl.h" // TODO(mstarzinger): Temporary cycle breaker! |
| (...skipping 329 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 340 // completely; it will typically leave the last character empty (see | 340 // completely; it will typically leave the last character empty (see |
| 341 // Utf8ToUtf16CharacterStream::CopyChars). | 341 // Utf8ToUtf16CharacterStream::CopyChars). |
| 342 while (data_in_buffer < kBufferSize - 1) { | 342 while (data_in_buffer < kBufferSize - 1) { |
| 343 if (current_data_ == NULL) { | 343 if (current_data_ == NULL) { |
| 344 // GetSomeData will wait until the embedder has enough data. Here's an | 344 // GetSomeData will wait until the embedder has enough data. Here's an |
| 345 // interface between the API which uses size_t (which is the correct type | 345 // interface between the API which uses size_t (which is the correct type |
| 346 // here) and the internal parts which use size_t. | 346 // here) and the internal parts which use size_t. |
| 347 current_data_length_ = source_stream_->GetMoreData(¤t_data_); | 347 current_data_length_ = source_stream_->GetMoreData(¤t_data_); |
| 348 current_data_offset_ = 0; | 348 current_data_offset_ = 0; |
| 349 bool data_ends = current_data_length_ == 0; | 349 bool data_ends = current_data_length_ == 0; |
| 350 bookmark_data_is_from_current_data_ = false; |
| 350 | 351 |
| 351 // A caveat: a data chunk might end with bytes from an incomplete UTF-8 | 352 // A caveat: a data chunk might end with bytes from an incomplete UTF-8 |
| 352 // character (the rest of the bytes will be in the next chunk). | 353 // character (the rest of the bytes will be in the next chunk). |
| 353 if (encoding_ == ScriptCompiler::StreamedSource::UTF8) { | 354 if (encoding_ == ScriptCompiler::StreamedSource::UTF8) { |
| 354 HandleUtf8SplitCharacters(&data_in_buffer); | 355 HandleUtf8SplitCharacters(&data_in_buffer); |
| 355 if (!data_ends && current_data_offset_ == current_data_length_) { | 356 if (!data_ends && current_data_offset_ == current_data_length_) { |
| 356 // The data stream didn't end, but we used all the data in the | 357 // The data stream didn't end, but we used all the data in the |
| 357 // chunk. This will only happen when the chunk was really small. We | 358 // chunk. This will only happen when the chunk was really small. We |
| 358 // don't handle the case where a UTF-8 character is split over several | 359 // don't handle the case where a UTF-8 character is split over several |
| 359 // chunks; in that case V8 won't crash, but it will be a parse error. | 360 // chunks; in that case V8 won't crash, but it will be a parse error. |
| (...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 399 // | 400 // |
| 400 // The underlying source_stream_ instance likely could re-construct this | 401 // The underlying source_stream_ instance likely could re-construct this |
| 401 // local data for us, but with the given interfaces we have no way of | 402 // local data for us, but with the given interfaces we have no way of |
| 402 // accomplishing this. Thus, we'll have to save all data locally. | 403 // accomplishing this. Thus, we'll have to save all data locally. |
| 403 // | 404 // |
| 404 // What gets saved where: | 405 // What gets saved where: |
| 405 // - pos_ => bookmark_ | 406 // - pos_ => bookmark_ |
| 406 // - buffer_[buffer_cursor_ .. buffer_end_] => bookmark_buffer_ | 407 // - buffer_[buffer_cursor_ .. buffer_end_] => bookmark_buffer_ |
| 407 // - current_data_[.._offset_ .. .._length_] => bookmark_data_ | 408 // - current_data_[.._offset_ .. .._length_] => bookmark_data_ |
| 408 // - utf8_split_char_buffer_* => bookmark_utf8_split... | 409 // - utf8_split_char_buffer_* => bookmark_utf8_split... |
| 410 // |
| 411 // To make sure we don't unnecessarily copy data, we also maintain |
| 412 // whether bookmark_data_ contains a copy of the current current_data_ |
| 413 // block. This is done with: |
| 414 // - bookmark_data_is_from_current_data_ |
| 415 // - bookmark_data_offset_: offset into bookmark_data_ |
| 416 // |
| 417 // Note that bookmark_data_is_from_current_data_ must be maintained |
| 418 // whenever current_data_ is updated. |
| 409 | 419 |
| 410 bookmark_ = pos_; | 420 bookmark_ = pos_; |
| 411 | 421 |
| 412 size_t buffer_length = buffer_end_ - buffer_cursor_; | 422 size_t buffer_length = buffer_end_ - buffer_cursor_; |
| 413 bookmark_buffer_.Dispose(); | 423 bookmark_buffer_.Dispose(); |
| 414 bookmark_buffer_ = Vector<uint16_t>::New(static_cast<int>(buffer_length)); | 424 bookmark_buffer_ = Vector<uint16_t>::New(static_cast<int>(buffer_length)); |
| 415 CopyCharsUnsigned(bookmark_buffer_.start(), buffer_cursor_, buffer_length); | 425 CopyCharsUnsigned(bookmark_buffer_.start(), buffer_cursor_, buffer_length); |
| 416 | 426 |
| 417 size_t data_length = current_data_length_ - current_data_offset_; | 427 size_t data_length = current_data_length_ - current_data_offset_; |
| 418 bookmark_data_.Dispose(); | 428 size_t bookmark_data_length = static_cast<size_t>(bookmark_data_.length()); |
| 419 bookmark_data_ = Vector<uint8_t>::New(static_cast<int>(data_length)); | 429 if (bookmark_data_is_from_current_data_ && |
| 420 CopyBytes(bookmark_data_.start(), current_data_ + current_data_offset_, | 430 data_length < bookmark_data_length) { |
| 421 data_length); | 431 // Fast case: bookmark_data_ was previously copied from the current |
| 432 // data block, and we have enough data for this bookmark. |
| 433 bookmark_data_offset_ = bookmark_data_length - data_length; |
| 434 } else { |
| 435 // Slow case: We need to copy current_data_. |
| 436 bookmark_data_.Dispose(); |
| 437 bookmark_data_ = Vector<uint8_t>::New(static_cast<int>(data_length)); |
| 438 CopyBytes(bookmark_data_.start(), current_data_ + current_data_offset_, |
| 439 data_length); |
| 440 bookmark_data_is_from_current_data_ = true; |
| 441 bookmark_data_offset_ = 0; |
| 442 } |
| 422 | 443 |
| 423 bookmark_utf8_split_char_buffer_length_ = utf8_split_char_buffer_length_; | 444 bookmark_utf8_split_char_buffer_length_ = utf8_split_char_buffer_length_; |
| 424 for (size_t i = 0; i < utf8_split_char_buffer_length_; i++) { | 445 for (size_t i = 0; i < utf8_split_char_buffer_length_; i++) { |
| 425 bookmark_utf8_split_char_buffer_[i] = utf8_split_char_buffer_[i]; | 446 bookmark_utf8_split_char_buffer_[i] = utf8_split_char_buffer_[i]; |
| 426 } | 447 } |
| 427 | 448 |
| 428 return source_stream_->SetBookmark(); | 449 return source_stream_->SetBookmark(); |
| 429 } | 450 } |
| 430 | 451 |
| 431 | 452 |
| 432 void ExternalStreamingStream::ResetToBookmark() { | 453 void ExternalStreamingStream::ResetToBookmark() { |
| 433 source_stream_->ResetToBookmark(); | 454 source_stream_->ResetToBookmark(); |
| 434 FlushCurrent(); | 455 FlushCurrent(); |
| 435 | 456 |
| 436 pos_ = bookmark_; | 457 pos_ = bookmark_; |
| 437 | 458 |
| 438 // bookmark_data_* => current_data_* | 459 // bookmark_data_* => current_data_* |
| 439 // (current_data_ assumes ownership of its memory.) | 460 // (current_data_ assumes ownership of its memory.) |
| 440 uint8_t* data = new uint8_t[bookmark_data_.length()]; | 461 uint8_t* data = new uint8_t[bookmark_data_.length() - bookmark_data_offset_]; |
| 441 current_data_offset_ = 0; | 462 current_data_offset_ = 0; |
| 442 current_data_length_ = bookmark_data_.length(); | 463 current_data_length_ = bookmark_data_.length() - bookmark_data_offset_; |
| 443 CopyCharsUnsigned(data, bookmark_data_.begin(), bookmark_data_.length()); | 464 CopyCharsUnsigned(data, bookmark_data_.begin() + bookmark_data_offset_, |
| 465 bookmark_data_.length()); |
| 444 delete[] current_data_; | 466 delete[] current_data_; |
| 445 current_data_ = data; | 467 current_data_ = data; |
| 468 bookmark_data_is_from_current_data_ = true; |
| 446 | 469 |
| 447 // bookmark_buffer_ needs to be copied to buffer_. | 470 // bookmark_buffer_ needs to be copied to buffer_. |
| 448 CopyCharsUnsigned(buffer_, bookmark_buffer_.begin(), | 471 CopyCharsUnsigned(buffer_, bookmark_buffer_.begin(), |
| 449 bookmark_buffer_.length()); | 472 bookmark_buffer_.length()); |
| 450 buffer_cursor_ = buffer_; | 473 buffer_cursor_ = buffer_; |
| 451 buffer_end_ = buffer_ + bookmark_buffer_.length(); | 474 buffer_end_ = buffer_ + bookmark_buffer_.length(); |
| 452 | 475 |
| 453 // utf8 split char buffer | 476 // utf8 split char buffer |
| 454 utf8_split_char_buffer_length_ = bookmark_utf8_split_char_buffer_length_; | 477 utf8_split_char_buffer_length_ = bookmark_utf8_split_char_buffer_length_; |
| 455 for (size_t i = 0; i < bookmark_utf8_split_char_buffer_length_; i++) { | 478 for (size_t i = 0; i < bookmark_utf8_split_char_buffer_length_; i++) { |
| 456 utf8_split_char_buffer_[i] = bookmark_utf8_split_char_buffer_[i]; | 479 utf8_split_char_buffer_[i] = bookmark_utf8_split_char_buffer_[i]; |
| 457 } | 480 } |
| 458 } | 481 } |
| 459 | 482 |
| 460 | 483 |
| 461 void ExternalStreamingStream::FlushCurrent() { | 484 void ExternalStreamingStream::FlushCurrent() { |
| 462 delete[] current_data_; | 485 delete[] current_data_; |
| 463 current_data_ = NULL; | 486 current_data_ = NULL; |
| 464 current_data_length_ = 0; | 487 current_data_length_ = 0; |
| 465 current_data_offset_ = 0; | 488 current_data_offset_ = 0; |
| 489 bookmark_data_is_from_current_data_ = false; |
| 466 } | 490 } |
| 467 | 491 |
| 468 | 492 |
| 469 void ExternalStreamingStream::HandleUtf8SplitCharacters( | 493 void ExternalStreamingStream::HandleUtf8SplitCharacters( |
| 470 size_t* data_in_buffer) { | 494 size_t* data_in_buffer) { |
| 471 // Note the following property of UTF-8 which makes this function possible: | 495 // Note the following property of UTF-8 which makes this function possible: |
| 472 // Given any byte, we can always read its local environment (in both | 496 // Given any byte, we can always read its local environment (in both |
| 473 // directions) to find out the (possibly multi-byte) character it belongs | 497 // directions) to find out the (possibly multi-byte) character it belongs |
| 474 // to. Single byte characters are of the form 0b0XXXXXXX. The first byte of a | 498 // to. Single byte characters are of the form 0b0XXXXXXX. The first byte of a |
| 475 // multi-byte character is of the form 0b110XXXXX, 0b1110XXXX or | 499 // multi-byte character is of the form 0b110XXXXX, 0b1110XXXX or |
| (...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 555 } | 579 } |
| 556 | 580 |
| 557 | 581 |
| 558 void ExternalTwoByteStringUtf16CharacterStream::ResetToBookmark() { | 582 void ExternalTwoByteStringUtf16CharacterStream::ResetToBookmark() { |
| 559 DCHECK(bookmark_ != kNoBookmark); | 583 DCHECK(bookmark_ != kNoBookmark); |
| 560 pos_ = bookmark_; | 584 pos_ = bookmark_; |
| 561 buffer_cursor_ = raw_data_ + bookmark_; | 585 buffer_cursor_ = raw_data_ + bookmark_; |
| 562 } | 586 } |
| 563 } // namespace internal | 587 } // namespace internal |
| 564 } // namespace v8 | 588 } // namespace v8 |
| OLD | NEW |