Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(661)

Side by Side Diff: src/scanner-character-streams.cc

Issue 1346613002: Avoid excessive data copying for ExternalStreamingStream::SetBookmark. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: Make compilers happy (signed/unsigned comparison) Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/scanner-character-streams.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/scanner-character-streams.h" 5 #include "src/scanner-character-streams.h"
6 6
7 #include "include/v8.h" 7 #include "include/v8.h"
8 #include "src/globals.h" 8 #include "src/globals.h"
9 #include "src/handles.h" 9 #include "src/handles.h"
10 #include "src/list-inl.h" // TODO(mstarzinger): Temporary cycle breaker! 10 #include "src/list-inl.h" // TODO(mstarzinger): Temporary cycle breaker!
(...skipping 329 matching lines...) Expand 10 before | Expand all | Expand 10 after
340 // completely; it will typically leave the last character empty (see 340 // completely; it will typically leave the last character empty (see
341 // Utf8ToUtf16CharacterStream::CopyChars). 341 // Utf8ToUtf16CharacterStream::CopyChars).
342 while (data_in_buffer < kBufferSize - 1) { 342 while (data_in_buffer < kBufferSize - 1) {
343 if (current_data_ == NULL) { 343 if (current_data_ == NULL) {
344 // GetSomeData will wait until the embedder has enough data. Here's an 344 // GetSomeData will wait until the embedder has enough data. Here's an
345 // interface between the API which uses size_t (which is the correct type 345 // interface between the API which uses size_t (which is the correct type
346 // here) and the internal parts which use size_t. 346 // here) and the internal parts which use size_t.
347 current_data_length_ = source_stream_->GetMoreData(&current_data_); 347 current_data_length_ = source_stream_->GetMoreData(&current_data_);
348 current_data_offset_ = 0; 348 current_data_offset_ = 0;
349 bool data_ends = current_data_length_ == 0; 349 bool data_ends = current_data_length_ == 0;
350 bookmark_data_is_from_current_data_ = false;
350 351
351 // A caveat: a data chunk might end with bytes from an incomplete UTF-8 352 // A caveat: a data chunk might end with bytes from an incomplete UTF-8
352 // character (the rest of the bytes will be in the next chunk). 353 // character (the rest of the bytes will be in the next chunk).
353 if (encoding_ == ScriptCompiler::StreamedSource::UTF8) { 354 if (encoding_ == ScriptCompiler::StreamedSource::UTF8) {
354 HandleUtf8SplitCharacters(&data_in_buffer); 355 HandleUtf8SplitCharacters(&data_in_buffer);
355 if (!data_ends && current_data_offset_ == current_data_length_) { 356 if (!data_ends && current_data_offset_ == current_data_length_) {
356 // The data stream didn't end, but we used all the data in the 357 // The data stream didn't end, but we used all the data in the
357 // chunk. This will only happen when the chunk was really small. We 358 // chunk. This will only happen when the chunk was really small. We
358 // don't handle the case where a UTF-8 character is split over several 359 // don't handle the case where a UTF-8 character is split over several
359 // chunks; in that case V8 won't crash, but it will be a parse error. 360 // chunks; in that case V8 won't crash, but it will be a parse error.
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
399 // 400 //
400 // The underlying source_stream_ instance likely could re-construct this 401 // The underlying source_stream_ instance likely could re-construct this
401 // local data for us, but with the given interfaces we have no way of 402 // local data for us, but with the given interfaces we have no way of
402 // accomplishing this. Thus, we'll have to save all data locally. 403 // accomplishing this. Thus, we'll have to save all data locally.
403 // 404 //
404 // What gets saved where: 405 // What gets saved where:
405 // - pos_ => bookmark_ 406 // - pos_ => bookmark_
406 // - buffer_[buffer_cursor_ .. buffer_end_] => bookmark_buffer_ 407 // - buffer_[buffer_cursor_ .. buffer_end_] => bookmark_buffer_
407 // - current_data_[.._offset_ .. .._length_] => bookmark_data_ 408 // - current_data_[.._offset_ .. .._length_] => bookmark_data_
408 // - utf8_split_char_buffer_* => bookmark_utf8_split... 409 // - utf8_split_char_buffer_* => bookmark_utf8_split...
410 //
411 // To make sure we don't unnecessarily copy data, we also maintain
412 // whether bookmark_data_ contains a copy of the current current_data_
413 // block. This is done with:
414 // - bookmark_data_is_from_current_data_
415 // - bookmark_data_offset_: offset into bookmark_data_
416 //
417 // Note that bookmark_data_is_from_current_data_ must be maintained
418 // whenever current_data_ is updated.
409 419
410 bookmark_ = pos_; 420 bookmark_ = pos_;
411 421
412 size_t buffer_length = buffer_end_ - buffer_cursor_; 422 size_t buffer_length = buffer_end_ - buffer_cursor_;
413 bookmark_buffer_.Dispose(); 423 bookmark_buffer_.Dispose();
414 bookmark_buffer_ = Vector<uint16_t>::New(static_cast<int>(buffer_length)); 424 bookmark_buffer_ = Vector<uint16_t>::New(static_cast<int>(buffer_length));
415 CopyCharsUnsigned(bookmark_buffer_.start(), buffer_cursor_, buffer_length); 425 CopyCharsUnsigned(bookmark_buffer_.start(), buffer_cursor_, buffer_length);
416 426
417 size_t data_length = current_data_length_ - current_data_offset_; 427 size_t data_length = current_data_length_ - current_data_offset_;
418 bookmark_data_.Dispose(); 428 size_t bookmark_data_length = static_cast<size_t>(bookmark_data_.length());
419 bookmark_data_ = Vector<uint8_t>::New(static_cast<int>(data_length)); 429 if (bookmark_data_is_from_current_data_ &&
420 CopyBytes(bookmark_data_.start(), current_data_ + current_data_offset_, 430 data_length < bookmark_data_length) {
421 data_length); 431 // Fast case: bookmark_data_ was previously copied from the current
432 // data block, and we have enough data for this bookmark.
433 bookmark_data_offset_ = bookmark_data_length - data_length;
434 } else {
435 // Slow case: We need to copy current_data_.
436 bookmark_data_.Dispose();
437 bookmark_data_ = Vector<uint8_t>::New(static_cast<int>(data_length));
438 CopyBytes(bookmark_data_.start(), current_data_ + current_data_offset_,
439 data_length);
440 bookmark_data_is_from_current_data_ = true;
441 bookmark_data_offset_ = 0;
442 }
422 443
423 bookmark_utf8_split_char_buffer_length_ = utf8_split_char_buffer_length_; 444 bookmark_utf8_split_char_buffer_length_ = utf8_split_char_buffer_length_;
424 for (size_t i = 0; i < utf8_split_char_buffer_length_; i++) { 445 for (size_t i = 0; i < utf8_split_char_buffer_length_; i++) {
425 bookmark_utf8_split_char_buffer_[i] = utf8_split_char_buffer_[i]; 446 bookmark_utf8_split_char_buffer_[i] = utf8_split_char_buffer_[i];
426 } 447 }
427 448
428 return source_stream_->SetBookmark(); 449 return source_stream_->SetBookmark();
429 } 450 }
430 451
431 452
432 void ExternalStreamingStream::ResetToBookmark() { 453 void ExternalStreamingStream::ResetToBookmark() {
433 source_stream_->ResetToBookmark(); 454 source_stream_->ResetToBookmark();
434 FlushCurrent(); 455 FlushCurrent();
435 456
436 pos_ = bookmark_; 457 pos_ = bookmark_;
437 458
438 // bookmark_data_* => current_data_* 459 // bookmark_data_* => current_data_*
439 // (current_data_ assumes ownership of its memory.) 460 // (current_data_ assumes ownership of its memory.)
440 uint8_t* data = new uint8_t[bookmark_data_.length()]; 461 uint8_t* data = new uint8_t[bookmark_data_.length() - bookmark_data_offset_];
441 current_data_offset_ = 0; 462 current_data_offset_ = 0;
442 current_data_length_ = bookmark_data_.length(); 463 current_data_length_ = bookmark_data_.length() - bookmark_data_offset_;
443 CopyCharsUnsigned(data, bookmark_data_.begin(), bookmark_data_.length()); 464 CopyCharsUnsigned(data, bookmark_data_.begin() + bookmark_data_offset_,
465 bookmark_data_.length());
444 delete[] current_data_; 466 delete[] current_data_;
445 current_data_ = data; 467 current_data_ = data;
468 bookmark_data_is_from_current_data_ = true;
446 469
447 // bookmark_buffer_ needs to be copied to buffer_. 470 // bookmark_buffer_ needs to be copied to buffer_.
448 CopyCharsUnsigned(buffer_, bookmark_buffer_.begin(), 471 CopyCharsUnsigned(buffer_, bookmark_buffer_.begin(),
449 bookmark_buffer_.length()); 472 bookmark_buffer_.length());
450 buffer_cursor_ = buffer_; 473 buffer_cursor_ = buffer_;
451 buffer_end_ = buffer_ + bookmark_buffer_.length(); 474 buffer_end_ = buffer_ + bookmark_buffer_.length();
452 475
453 // utf8 split char buffer 476 // utf8 split char buffer
454 utf8_split_char_buffer_length_ = bookmark_utf8_split_char_buffer_length_; 477 utf8_split_char_buffer_length_ = bookmark_utf8_split_char_buffer_length_;
455 for (size_t i = 0; i < bookmark_utf8_split_char_buffer_length_; i++) { 478 for (size_t i = 0; i < bookmark_utf8_split_char_buffer_length_; i++) {
456 utf8_split_char_buffer_[i] = bookmark_utf8_split_char_buffer_[i]; 479 utf8_split_char_buffer_[i] = bookmark_utf8_split_char_buffer_[i];
457 } 480 }
458 } 481 }
459 482
460 483
461 void ExternalStreamingStream::FlushCurrent() { 484 void ExternalStreamingStream::FlushCurrent() {
462 delete[] current_data_; 485 delete[] current_data_;
463 current_data_ = NULL; 486 current_data_ = NULL;
464 current_data_length_ = 0; 487 current_data_length_ = 0;
465 current_data_offset_ = 0; 488 current_data_offset_ = 0;
489 bookmark_data_is_from_current_data_ = false;
466 } 490 }
467 491
468 492
469 void ExternalStreamingStream::HandleUtf8SplitCharacters( 493 void ExternalStreamingStream::HandleUtf8SplitCharacters(
470 size_t* data_in_buffer) { 494 size_t* data_in_buffer) {
471 // Note the following property of UTF-8 which makes this function possible: 495 // Note the following property of UTF-8 which makes this function possible:
472 // Given any byte, we can always read its local environment (in both 496 // Given any byte, we can always read its local environment (in both
473 // directions) to find out the (possibly multi-byte) character it belongs 497 // directions) to find out the (possibly multi-byte) character it belongs
474 // to. Single byte characters are of the form 0b0XXXXXXX. The first byte of a 498 // to. Single byte characters are of the form 0b0XXXXXXX. The first byte of a
475 // multi-byte character is of the form 0b110XXXXX, 0b1110XXXX or 499 // multi-byte character is of the form 0b110XXXXX, 0b1110XXXX or
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after
555 } 579 }
556 580
557 581
558 void ExternalTwoByteStringUtf16CharacterStream::ResetToBookmark() { 582 void ExternalTwoByteStringUtf16CharacterStream::ResetToBookmark() {
559 DCHECK(bookmark_ != kNoBookmark); 583 DCHECK(bookmark_ != kNoBookmark);
560 pos_ = bookmark_; 584 pos_ = bookmark_;
561 buffer_cursor_ = raw_data_ + bookmark_; 585 buffer_cursor_ = raw_data_ + bookmark_;
562 } 586 }
563 } // namespace internal 587 } // namespace internal
564 } // namespace v8 588 } // namespace v8
OLDNEW
« no previous file with comments | « src/scanner-character-streams.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698