src/parsing/scanner-character-streams.cc - Issue 2391273002: Fix bad-char handling in utf-8 streaming streams. Also add test.

Side by Side Diff: src/parsing/scanner-character-streams.cc

Issue 2391273002: Fix bad-char handling in utf-8 streaming streams. Also add test. (Closed)

Patch Set: Improve comments. Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "src/parsing/scanner-character-streams.h"	5 #include "src/parsing/scanner-character-streams.h"

6	6

7 #include "include/v8.h"	7 #include "include/v8.h"

8 #include "src/globals.h"	8 #include "src/globals.h"

9 #include "src/handles.h"	9 #include "src/handles.h"

10 #include "src/objects-inl.h"	10 #include "src/objects-inl.h"

(...skipping 268 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
279 DCHECK_EQ(buffer_start_, buffer_cursor_);	279 DCHECK_EQ(buffer_start_, buffer_cursor_);

280 DCHECK_LT(buffer_end_ + 1, buffer_start_ + kBufferSize);	280 DCHECK_LT(buffer_end_ + 1, buffer_start_ + kBufferSize);

281	281

282 const Chunk& chunk = chunks_[current_.chunk_no];	282 const Chunk& chunk = chunks_[current_.chunk_no];

283	283

284 // The buffer_ is writable, but buffer_*_ members are const. So we get a	284 // The buffer_ is writable, but buffer_*_ members are const. So we get a

285 // non-const pointer into buffer that points to the same char as buffer_end_.	285 // non-const pointer into buffer that points to the same char as buffer_end_.

286 uint16_t* cursor = buffer_ + (buffer_end_ - buffer_start_);	286 uint16_t* cursor = buffer_ + (buffer_end_ - buffer_start_);

287 DCHECK_EQ(cursor, buffer_end_);	287 DCHECK_EQ(cursor, buffer_end_);

288	288

	289 // If the current chunk is the last (empty) chunk we'll have to process

	290 // any left-over, partial characters.

	291 if (chunk.length == 0) {

	292 unibrow::uchar t =

	293 unibrow::Utf8::ValueOfIncrementalFinish(&current_.pos.incomplete_char);

	294 if (t != unibrow::Utf8::kBufferEmpty) {

	295 DCHECK(t < unibrow::Utf16::kMaxNonSurrogateCharCode);
	jochen (gone - plz use gerrit) 2016/10/05 16:11:56 DCHECK_LT? DCHECK_LT? vogelheim 2016/10/05 16:22:23 I tried, but couldn't get that to work. :-( I thi Show quoted text On 2016/10/05 16:11:56, jochen (slow) wrote: > DCHECK_LT? I tried, but couldn't get that to work. :-( I think the issue is that kMaxNonSurr... was declared static const so that the compile can directly use the value, and is not materialized in memory. But the macro goo that implements DCHECK_LT eventually wants it as a const ..& parameter. Note the reference. And taking a reference requires that the parameter occupies memory of some sort. The error message is: ../../src/parsing/scanner-character-streams.cc:295: error: undefined reference to 'unibrow::Utf16::kMaxNonSurrogateCharCode' clang: error: linker command failed with exit code 1 (use -v to see invocation) Any ideas?
	296 *cursor = static_cast<uc16>(t);

	297 buffer_end_++;

	298 current_.pos.chars++;

	299 }

	300 return;

	301 }

	302

289 static const unibrow::uchar kUtf8Bom = 0xfeff;	303 static const unibrow::uchar kUtf8Bom = 0xfeff;

290	304

291 unibrow::Utf8::Utf8IncrementalBuffer incomplete_char =	305 unibrow::Utf8::Utf8IncrementalBuffer incomplete_char =

292 current_.pos.incomplete_char;	306 current_.pos.incomplete_char;

293 size_t it;	307 size_t it;

294 for (it = current_.pos.bytes - chunk.start.bytes;	308 for (it = current_.pos.bytes - chunk.start.bytes;

295 it < chunk.length && cursor + 1 < buffer_start_ + kBufferSize; it++) {	309 it < chunk.length && cursor + 1 < buffer_start_ + kBufferSize; it++) {

296 unibrow::uchar t =	310 unibrow::uchar t =

297 unibrow::Utf8::ValueOfIncremental(chunk.data[it], &incomplete_char);	311 unibrow::Utf8::ValueOfIncremental(chunk.data[it], &incomplete_char);

298 if (t == unibrow::Utf8::kIncomplete) continue;	312 if (t == unibrow::Utf8::kIncomplete) continue;

(...skipping 115 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
414 if (out_of_data) return 0;	428 if (out_of_data) return 0;

415	429

416 // Fill the buffer, until we have at least one char (or are out of data).	430 // Fill the buffer, until we have at least one char (or are out of data).

417 // (The embedder might give us 1-byte blocks within a utf-8 char, so we	431 // (The embedder might give us 1-byte blocks within a utf-8 char, so we

418 // can't guarantee progress with one chunk. Thus we iterate.)	432 // can't guarantee progress with one chunk. Thus we iterate.)

419 while (!out_of_data && buffer_cursor_ == buffer_end_) {	433 while (!out_of_data && buffer_cursor_ == buffer_end_) {

420 // At end of current data, but there might be more? Then fetch it.	434 // At end of current data, but there might be more? Then fetch it.

421 if (current_.chunk_no == chunks_.size()) {	435 if (current_.chunk_no == chunks_.size()) {

422 out_of_data = !FetchChunk();	436 out_of_data = !FetchChunk();

423 }	437 }

424 if (!out_of_data) FillBufferFromCurrentChunk();	438 FillBufferFromCurrentChunk();

425 }	439 }

426	440

427 DCHECK_EQ(current_.pos.chars - position, buffer_end_ - buffer_cursor_);	441 DCHECK_EQ(current_.pos.chars - position, buffer_end_ - buffer_cursor_);

428 return buffer_end_ - buffer_cursor_;	442 return buffer_end_ - buffer_cursor_;

429 }	443 }

430	444

431 // ----------------------------------------------------------------------------	445 // ----------------------------------------------------------------------------

432 // Chunks - helper for One- + TwoByteExternalStreamingStream	446 // Chunks - helper for One- + TwoByteExternalStreamingStream

433 namespace {	447 namespace {

434	448

(...skipping 225 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
660 return new OneByteExternalStreamingStream(source_stream);	674 return new OneByteExternalStreamingStream(source_stream);

661 case v8::ScriptCompiler::StreamedSource::UTF8:	675 case v8::ScriptCompiler::StreamedSource::UTF8:

662 return new Utf8ExternalStreamingStream(source_stream);	676 return new Utf8ExternalStreamingStream(source_stream);

663 }	677 }

664 UNREACHABLE();	678 UNREACHABLE();

665 return nullptr;	679 return nullptr;

666 }	680 }

667	681

668 } // namespace internal	682 } // namespace internal

669 } // namespace v8	683 } // namespace v8

OLD	NEW

« no previous file with comments | « no previous file | src/unicode.h » ('j') | no next file with comments »