Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(234)

Side by Side Diff: src/parsing/scanner-character-streams.cc

Issue 2391273002: Fix bad-char handling in utf-8 streaming streams. Also add test. (Closed)
Patch Set: Improve comments. Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | src/unicode.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/parsing/scanner-character-streams.h" 5 #include "src/parsing/scanner-character-streams.h"
6 6
7 #include "include/v8.h" 7 #include "include/v8.h"
8 #include "src/globals.h" 8 #include "src/globals.h"
9 #include "src/handles.h" 9 #include "src/handles.h"
10 #include "src/objects-inl.h" 10 #include "src/objects-inl.h"
(...skipping 268 matching lines...) Expand 10 before | Expand all | Expand 10 after
279 DCHECK_EQ(buffer_start_, buffer_cursor_); 279 DCHECK_EQ(buffer_start_, buffer_cursor_);
280 DCHECK_LT(buffer_end_ + 1, buffer_start_ + kBufferSize); 280 DCHECK_LT(buffer_end_ + 1, buffer_start_ + kBufferSize);
281 281
282 const Chunk& chunk = chunks_[current_.chunk_no]; 282 const Chunk& chunk = chunks_[current_.chunk_no];
283 283
284 // The buffer_ is writable, but buffer_*_ members are const. So we get a 284 // The buffer_ is writable, but buffer_*_ members are const. So we get a
285 // non-const pointer into buffer that points to the same char as buffer_end_. 285 // non-const pointer into buffer that points to the same char as buffer_end_.
286 uint16_t* cursor = buffer_ + (buffer_end_ - buffer_start_); 286 uint16_t* cursor = buffer_ + (buffer_end_ - buffer_start_);
287 DCHECK_EQ(cursor, buffer_end_); 287 DCHECK_EQ(cursor, buffer_end_);
288 288
289 // If the current chunk is the last (empty) chunk we'll have to process
290 // any left-over, partial characters.
291 if (chunk.length == 0) {
292 unibrow::uchar t =
293 unibrow::Utf8::ValueOfIncrementalFinish(&current_.pos.incomplete_char);
294 if (t != unibrow::Utf8::kBufferEmpty) {
295 DCHECK(t < unibrow::Utf16::kMaxNonSurrogateCharCode);
jochen (gone - plz use gerrit) 2016/10/05 16:11:56 DCHECK_LT?
vogelheim 2016/10/05 16:22:23 I tried, but couldn't get that to work. :-( I thi
296 *cursor = static_cast<uc16>(t);
297 buffer_end_++;
298 current_.pos.chars++;
299 }
300 return;
301 }
302
289 static const unibrow::uchar kUtf8Bom = 0xfeff; 303 static const unibrow::uchar kUtf8Bom = 0xfeff;
290 304
291 unibrow::Utf8::Utf8IncrementalBuffer incomplete_char = 305 unibrow::Utf8::Utf8IncrementalBuffer incomplete_char =
292 current_.pos.incomplete_char; 306 current_.pos.incomplete_char;
293 size_t it; 307 size_t it;
294 for (it = current_.pos.bytes - chunk.start.bytes; 308 for (it = current_.pos.bytes - chunk.start.bytes;
295 it < chunk.length && cursor + 1 < buffer_start_ + kBufferSize; it++) { 309 it < chunk.length && cursor + 1 < buffer_start_ + kBufferSize; it++) {
296 unibrow::uchar t = 310 unibrow::uchar t =
297 unibrow::Utf8::ValueOfIncremental(chunk.data[it], &incomplete_char); 311 unibrow::Utf8::ValueOfIncremental(chunk.data[it], &incomplete_char);
298 if (t == unibrow::Utf8::kIncomplete) continue; 312 if (t == unibrow::Utf8::kIncomplete) continue;
(...skipping 115 matching lines...) Expand 10 before | Expand all | Expand 10 after
414 if (out_of_data) return 0; 428 if (out_of_data) return 0;
415 429
416 // Fill the buffer, until we have at least one char (or are out of data). 430 // Fill the buffer, until we have at least one char (or are out of data).
417 // (The embedder might give us 1-byte blocks within a utf-8 char, so we 431 // (The embedder might give us 1-byte blocks within a utf-8 char, so we
418 // can't guarantee progress with one chunk. Thus we iterate.) 432 // can't guarantee progress with one chunk. Thus we iterate.)
419 while (!out_of_data && buffer_cursor_ == buffer_end_) { 433 while (!out_of_data && buffer_cursor_ == buffer_end_) {
420 // At end of current data, but there might be more? Then fetch it. 434 // At end of current data, but there might be more? Then fetch it.
421 if (current_.chunk_no == chunks_.size()) { 435 if (current_.chunk_no == chunks_.size()) {
422 out_of_data = !FetchChunk(); 436 out_of_data = !FetchChunk();
423 } 437 }
424 if (!out_of_data) FillBufferFromCurrentChunk(); 438 FillBufferFromCurrentChunk();
425 } 439 }
426 440
427 DCHECK_EQ(current_.pos.chars - position, buffer_end_ - buffer_cursor_); 441 DCHECK_EQ(current_.pos.chars - position, buffer_end_ - buffer_cursor_);
428 return buffer_end_ - buffer_cursor_; 442 return buffer_end_ - buffer_cursor_;
429 } 443 }
430 444
431 // ---------------------------------------------------------------------------- 445 // ----------------------------------------------------------------------------
432 // Chunks - helper for One- + TwoByteExternalStreamingStream 446 // Chunks - helper for One- + TwoByteExternalStreamingStream
433 namespace { 447 namespace {
434 448
(...skipping 225 matching lines...) Expand 10 before | Expand all | Expand 10 after
660 return new OneByteExternalStreamingStream(source_stream); 674 return new OneByteExternalStreamingStream(source_stream);
661 case v8::ScriptCompiler::StreamedSource::UTF8: 675 case v8::ScriptCompiler::StreamedSource::UTF8:
662 return new Utf8ExternalStreamingStream(source_stream); 676 return new Utf8ExternalStreamingStream(source_stream);
663 } 677 }
664 UNREACHABLE(); 678 UNREACHABLE();
665 return nullptr; 679 return nullptr;
666 } 680 }
667 681
668 } // namespace internal 682 } // namespace internal
669 } // namespace v8 683 } // namespace v8
OLDNEW
« no previous file with comments | « no previous file | src/unicode.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698