OLD | NEW |
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/scanner-character-streams.h" | 5 #include "src/scanner-character-streams.h" |
6 | 6 |
7 #include "include/v8.h" | 7 #include "include/v8.h" |
8 #include "src/globals.h" | 8 #include "src/globals.h" |
9 #include "src/handles.h" | 9 #include "src/handles.h" |
10 #include "src/list-inl.h" // TODO(mstarzinger): Temporary cycle breaker! | 10 #include "src/list-inl.h" // TODO(mstarzinger): Temporary cycle breaker! |
(...skipping 329 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
340 // completely; it will typically leave the last character empty (see | 340 // completely; it will typically leave the last character empty (see |
341 // Utf8ToUtf16CharacterStream::CopyChars). | 341 // Utf8ToUtf16CharacterStream::CopyChars). |
342 while (data_in_buffer < kBufferSize - 1) { | 342 while (data_in_buffer < kBufferSize - 1) { |
343 if (current_data_ == NULL) { | 343 if (current_data_ == NULL) { |
344 // GetSomeData will wait until the embedder has enough data. Here's an | 344 // GetSomeData will wait until the embedder has enough data. Here's an |
345 // interface between the API which uses size_t (which is the correct type | 345 // interface between the API which uses size_t (which is the correct type |
346 // here) and the internal parts which use size_t. | 346 // here) and the internal parts which use size_t. |
347 current_data_length_ = source_stream_->GetMoreData(¤t_data_); | 347 current_data_length_ = source_stream_->GetMoreData(¤t_data_); |
348 current_data_offset_ = 0; | 348 current_data_offset_ = 0; |
349 bool data_ends = current_data_length_ == 0; | 349 bool data_ends = current_data_length_ == 0; |
| 350 bookmark_data_is_from_current_data_ = false; |
350 | 351 |
351 // A caveat: a data chunk might end with bytes from an incomplete UTF-8 | 352 // A caveat: a data chunk might end with bytes from an incomplete UTF-8 |
352 // character (the rest of the bytes will be in the next chunk). | 353 // character (the rest of the bytes will be in the next chunk). |
353 if (encoding_ == ScriptCompiler::StreamedSource::UTF8) { | 354 if (encoding_ == ScriptCompiler::StreamedSource::UTF8) { |
354 HandleUtf8SplitCharacters(&data_in_buffer); | 355 HandleUtf8SplitCharacters(&data_in_buffer); |
355 if (!data_ends && current_data_offset_ == current_data_length_) { | 356 if (!data_ends && current_data_offset_ == current_data_length_) { |
356 // The data stream didn't end, but we used all the data in the | 357 // The data stream didn't end, but we used all the data in the |
357 // chunk. This will only happen when the chunk was really small. We | 358 // chunk. This will only happen when the chunk was really small. We |
358 // don't handle the case where a UTF-8 character is split over several | 359 // don't handle the case where a UTF-8 character is split over several |
359 // chunks; in that case V8 won't crash, but it will be a parse error. | 360 // chunks; in that case V8 won't crash, but it will be a parse error. |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
399 // | 400 // |
400 // The underlying source_stream_ instance likely could re-construct this | 401 // The underlying source_stream_ instance likely could re-construct this |
401 // local data for us, but with the given interfaces we have no way of | 402 // local data for us, but with the given interfaces we have no way of |
402 // accomplishing this. Thus, we'll have to save all data locally. | 403 // accomplishing this. Thus, we'll have to save all data locally. |
403 // | 404 // |
404 // What gets saved where: | 405 // What gets saved where: |
405 // - pos_ => bookmark_ | 406 // - pos_ => bookmark_ |
406 // - buffer_[buffer_cursor_ .. buffer_end_] => bookmark_buffer_ | 407 // - buffer_[buffer_cursor_ .. buffer_end_] => bookmark_buffer_ |
407 // - current_data_[.._offset_ .. .._length_] => bookmark_data_ | 408 // - current_data_[.._offset_ .. .._length_] => bookmark_data_ |
408 // - utf8_split_char_buffer_* => bookmark_utf8_split... | 409 // - utf8_split_char_buffer_* => bookmark_utf8_split... |
| 410 // |
| 411 // To make sure we don't unnecessarily copy data, we also maintain |
| 412 // whether bookmark_data_ contains a copy of the current current_data_ |
| 413 // block. This is done with: |
| 414 // - bookmark_data_is_from_current_data_ |
| 415 // - bookmark_data_offset_: offset into bookmark_data_ |
| 416 // |
| 417 // Note that bookmark_data_is_from_current_data_ must be maintained |
| 418 // whenever current_data_ is updated. |
409 | 419 |
410 bookmark_ = pos_; | 420 bookmark_ = pos_; |
411 | 421 |
412 size_t buffer_length = buffer_end_ - buffer_cursor_; | 422 size_t buffer_length = buffer_end_ - buffer_cursor_; |
413 bookmark_buffer_.Dispose(); | 423 bookmark_buffer_.Dispose(); |
414 bookmark_buffer_ = Vector<uint16_t>::New(static_cast<int>(buffer_length)); | 424 bookmark_buffer_ = Vector<uint16_t>::New(static_cast<int>(buffer_length)); |
415 CopyCharsUnsigned(bookmark_buffer_.start(), buffer_cursor_, buffer_length); | 425 CopyCharsUnsigned(bookmark_buffer_.start(), buffer_cursor_, buffer_length); |
416 | 426 |
417 size_t data_length = current_data_length_ - current_data_offset_; | 427 size_t data_length = current_data_length_ - current_data_offset_; |
418 bookmark_data_.Dispose(); | 428 size_t bookmark_data_length = static_cast<size_t>(bookmark_data_.length()); |
419 bookmark_data_ = Vector<uint8_t>::New(static_cast<int>(data_length)); | 429 if (bookmark_data_is_from_current_data_ && |
420 CopyBytes(bookmark_data_.start(), current_data_ + current_data_offset_, | 430 data_length < bookmark_data_length) { |
421 data_length); | 431 // Fast case: bookmark_data_ was previously copied from the current |
| 432 // data block, and we have enough data for this bookmark. |
| 433 bookmark_data_offset_ = bookmark_data_length - data_length; |
| 434 } else { |
| 435 // Slow case: We need to copy current_data_. |
| 436 bookmark_data_.Dispose(); |
| 437 bookmark_data_ = Vector<uint8_t>::New(static_cast<int>(data_length)); |
| 438 CopyBytes(bookmark_data_.start(), current_data_ + current_data_offset_, |
| 439 data_length); |
| 440 bookmark_data_is_from_current_data_ = true; |
| 441 bookmark_data_offset_ = 0; |
| 442 } |
422 | 443 |
423 bookmark_utf8_split_char_buffer_length_ = utf8_split_char_buffer_length_; | 444 bookmark_utf8_split_char_buffer_length_ = utf8_split_char_buffer_length_; |
424 for (size_t i = 0; i < utf8_split_char_buffer_length_; i++) { | 445 for (size_t i = 0; i < utf8_split_char_buffer_length_; i++) { |
425 bookmark_utf8_split_char_buffer_[i] = utf8_split_char_buffer_[i]; | 446 bookmark_utf8_split_char_buffer_[i] = utf8_split_char_buffer_[i]; |
426 } | 447 } |
427 | 448 |
428 return source_stream_->SetBookmark(); | 449 return source_stream_->SetBookmark(); |
429 } | 450 } |
430 | 451 |
431 | 452 |
432 void ExternalStreamingStream::ResetToBookmark() { | 453 void ExternalStreamingStream::ResetToBookmark() { |
433 source_stream_->ResetToBookmark(); | 454 source_stream_->ResetToBookmark(); |
434 FlushCurrent(); | 455 FlushCurrent(); |
435 | 456 |
436 pos_ = bookmark_; | 457 pos_ = bookmark_; |
437 | 458 |
438 // bookmark_data_* => current_data_* | 459 // bookmark_data_* => current_data_* |
439 // (current_data_ assumes ownership of its memory.) | 460 // (current_data_ assumes ownership of its memory.) |
440 uint8_t* data = new uint8_t[bookmark_data_.length()]; | 461 uint8_t* data = new uint8_t[bookmark_data_.length() - bookmark_data_offset_]; |
441 current_data_offset_ = 0; | 462 current_data_offset_ = 0; |
442 current_data_length_ = bookmark_data_.length(); | 463 current_data_length_ = bookmark_data_.length() - bookmark_data_offset_; |
443 CopyCharsUnsigned(data, bookmark_data_.begin(), bookmark_data_.length()); | 464 CopyCharsUnsigned(data, bookmark_data_.begin() + bookmark_data_offset_, |
| 465 bookmark_data_.length()); |
444 delete[] current_data_; | 466 delete[] current_data_; |
445 current_data_ = data; | 467 current_data_ = data; |
| 468 bookmark_data_is_from_current_data_ = true; |
446 | 469 |
447 // bookmark_buffer_ needs to be copied to buffer_. | 470 // bookmark_buffer_ needs to be copied to buffer_. |
448 CopyCharsUnsigned(buffer_, bookmark_buffer_.begin(), | 471 CopyCharsUnsigned(buffer_, bookmark_buffer_.begin(), |
449 bookmark_buffer_.length()); | 472 bookmark_buffer_.length()); |
450 buffer_cursor_ = buffer_; | 473 buffer_cursor_ = buffer_; |
451 buffer_end_ = buffer_ + bookmark_buffer_.length(); | 474 buffer_end_ = buffer_ + bookmark_buffer_.length(); |
452 | 475 |
453 // utf8 split char buffer | 476 // utf8 split char buffer |
454 utf8_split_char_buffer_length_ = bookmark_utf8_split_char_buffer_length_; | 477 utf8_split_char_buffer_length_ = bookmark_utf8_split_char_buffer_length_; |
455 for (size_t i = 0; i < bookmark_utf8_split_char_buffer_length_; i++) { | 478 for (size_t i = 0; i < bookmark_utf8_split_char_buffer_length_; i++) { |
456 utf8_split_char_buffer_[i] = bookmark_utf8_split_char_buffer_[i]; | 479 utf8_split_char_buffer_[i] = bookmark_utf8_split_char_buffer_[i]; |
457 } | 480 } |
458 } | 481 } |
459 | 482 |
460 | 483 |
461 void ExternalStreamingStream::FlushCurrent() { | 484 void ExternalStreamingStream::FlushCurrent() { |
462 delete[] current_data_; | 485 delete[] current_data_; |
463 current_data_ = NULL; | 486 current_data_ = NULL; |
464 current_data_length_ = 0; | 487 current_data_length_ = 0; |
465 current_data_offset_ = 0; | 488 current_data_offset_ = 0; |
| 489 bookmark_data_is_from_current_data_ = false; |
466 } | 490 } |
467 | 491 |
468 | 492 |
469 void ExternalStreamingStream::HandleUtf8SplitCharacters( | 493 void ExternalStreamingStream::HandleUtf8SplitCharacters( |
470 size_t* data_in_buffer) { | 494 size_t* data_in_buffer) { |
471 // Note the following property of UTF-8 which makes this function possible: | 495 // Note the following property of UTF-8 which makes this function possible: |
472 // Given any byte, we can always read its local environment (in both | 496 // Given any byte, we can always read its local environment (in both |
473 // directions) to find out the (possibly multi-byte) character it belongs | 497 // directions) to find out the (possibly multi-byte) character it belongs |
474 // to. Single byte characters are of the form 0b0XXXXXXX. The first byte of a | 498 // to. Single byte characters are of the form 0b0XXXXXXX. The first byte of a |
475 // multi-byte character is of the form 0b110XXXXX, 0b1110XXXX or | 499 // multi-byte character is of the form 0b110XXXXX, 0b1110XXXX or |
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
555 } | 579 } |
556 | 580 |
557 | 581 |
558 void ExternalTwoByteStringUtf16CharacterStream::ResetToBookmark() { | 582 void ExternalTwoByteStringUtf16CharacterStream::ResetToBookmark() { |
559 DCHECK(bookmark_ != kNoBookmark); | 583 DCHECK(bookmark_ != kNoBookmark); |
560 pos_ = bookmark_; | 584 pos_ = bookmark_; |
561 buffer_cursor_ = raw_data_ + bookmark_; | 585 buffer_cursor_ = raw_data_ + bookmark_; |
562 } | 586 } |
563 } // namespace internal | 587 } // namespace internal |
564 } // namespace v8 | 588 } // namespace v8 |
OLD | NEW |