test/cctest/parsing/test-scanner-streams.cc - Issue 2354973002: Handle Utf-8 BOM at beginning of an Utf-8 stream.

Keyboard Shortcuts

	File
u :	up to issue
j / k :	jump to file after / before current file
J / K :	jump to next file with a comment after / before current file
	Side-by-side diff
i :	toggle intra-line diffs
e :	expand all comments
c :	collapse all comments
s :	toggle showing all comments
n / p :	next / previous diff chunk or comment
N / P :	next / previous comment
<Up> / <Down> :	next / previous line

	Issue
u :	up to list of issues
j / k :	jump to patch after / before current patch
o / <Enter> :	open current patch in side-by-side view
i :	open current patch in unified diff view

	Issue List
j / k :	jump to issue after / before current issue
o / <Enter> :	open current issue

Unified Diff: test/cctest/parsing/test-scanner-streams.cc

Issue 2354973002: Handle Utf-8 BOM at beginning of an Utf-8 stream. (Closed)

Patch Set: Test w/ single-byte BOM chunks. Created 4 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: test/cctest/parsing/test-scanner-streams.cc

diff --git a/test/cctest/parsing/test-scanner-streams.cc b/test/cctest/parsing/test-scanner-streams.cc

index 1bd9dbc7cded7f16fb94ab92650df8f038446ce2..bf2762485708d5dacb5a7952df7824f9e94c901a 100644

--- a/test/cctest/parsing/test-scanner-streams.cc

+++ b/test/cctest/parsing/test-scanner-streams.cc

@@ -109,6 +109,67 @@ TEST(Utf8StreamAsciiOnly) {

} while (c != v8::internal::Utf16CharacterStream::kEndOfInput);

}

+TEST(Utf8StreamBOM) {

+ // Construct test string w/ UTF-8 BOM (byte order mark)

+ char data[3 + arraysize(unicode_utf8)] = {"\xef\xbb\xbf"};

+ strncpy(data + 3, unicode_utf8, arraysize(unicode_utf8));

+ const char* chunks[] = {data, "\0"};

+ ChunkSource chunk_source(chunks);

+ std::unique_ptr<v8::internal::Utf16CharacterStream> stream(

+ v8::internal::ScannerStream::For(

+ &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8));

+ // Read the data without tripping over the BOM.

+ for (size_t i = 0; unicode_ucs2[i]; i++) {

+ CHECK_EQ(unicode_ucs2[i], stream->Advance());

+ }

+ CHECK_EQ(v8::internal::Utf16CharacterStream::kEndOfInput, stream->Advance());

+ // Make sure seek works.

+ stream->Seek(0);

+ CHECK_EQ(unicode_ucs2[0], stream->Advance());

+ stream->Seek(5);

+ CHECK_EQ(unicode_ucs2[5], stream->Advance());

+TEST(Utf8SplitBOM) {

+ // Construct chunks with a BOM split into two chunks.

+ char partial_bom[] = "\xef\xbb";

+ char data[1 + arraysize(unicode_utf8)] = {"\xbf"};

+ strncpy(data + 1, unicode_utf8, arraysize(unicode_utf8));

+ {

+ const char* chunks[] = {partial_bom, data, "\0"};

+ ChunkSource chunk_source(chunks);

+ std::unique_ptr<v8::internal::Utf16CharacterStream> stream(

+ v8::internal::ScannerStream::For(

+ &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8));

+ // Read the data without tripping over the BOM.

+ for (size_t i = 0; unicode_ucs2[i]; i++) {

+ CHECK_EQ(unicode_ucs2[i], stream->Advance());

+ }

+ // And now with single-byte BOM chunks.

+ char bom_byte_1[] = "\xef";

+ char bom_byte_2[] = "\xbb";

+ {

+ const char* chunks[] = {bom_byte_1, bom_byte_2, data, "\0"};

+ ChunkSource chunk_source(chunks);

+ std::unique_ptr<v8::internal::Utf16CharacterStream> stream(

+ v8::internal::ScannerStream::For(

+ &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8));

+ // Read the data without tripping over the BOM.

+ for (size_t i = 0; unicode_ucs2[i]; i++) {

+ CHECK_EQ(unicode_ucs2[i], stream->Advance());

+ }

TEST(Utf8ChunkBoundaries) {

// Test utf-8 parsing at chunk boundaries.

« no previous file with comments | « src/parsing/scanner-character-streams.cc ('k') | no next file » | no next file with comments »