Index: test/cctest/parsing/test-scanner-streams.cc |
diff --git a/test/cctest/parsing/test-scanner-streams.cc b/test/cctest/parsing/test-scanner-streams.cc |
index 1bd9dbc7cded7f16fb94ab92650df8f038446ce2..bf2762485708d5dacb5a7952df7824f9e94c901a 100644 |
--- a/test/cctest/parsing/test-scanner-streams.cc |
+++ b/test/cctest/parsing/test-scanner-streams.cc |
@@ -109,6 +109,67 @@ TEST(Utf8StreamAsciiOnly) { |
} while (c != v8::internal::Utf16CharacterStream::kEndOfInput); |
} |
+TEST(Utf8StreamBOM) { |
+ // Construct test string w/ UTF-8 BOM (byte order mark) |
+ char data[3 + arraysize(unicode_utf8)] = {"\xef\xbb\xbf"}; |
+ strncpy(data + 3, unicode_utf8, arraysize(unicode_utf8)); |
+ |
+ const char* chunks[] = {data, "\0"}; |
+ ChunkSource chunk_source(chunks); |
+ std::unique_ptr<v8::internal::Utf16CharacterStream> stream( |
+ v8::internal::ScannerStream::For( |
+ &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8)); |
+ |
+ // Read the data without tripping over the BOM. |
+ for (size_t i = 0; unicode_ucs2[i]; i++) { |
+ CHECK_EQ(unicode_ucs2[i], stream->Advance()); |
+ } |
+ CHECK_EQ(v8::internal::Utf16CharacterStream::kEndOfInput, stream->Advance()); |
+ |
+ // Make sure seek works. |
+ stream->Seek(0); |
+ CHECK_EQ(unicode_ucs2[0], stream->Advance()); |
+ |
+ stream->Seek(5); |
+ CHECK_EQ(unicode_ucs2[5], stream->Advance()); |
+} |
+ |
+TEST(Utf8SplitBOM) { |
+ // Construct chunks with a BOM split into two chunks. |
+ char partial_bom[] = "\xef\xbb"; |
+ char data[1 + arraysize(unicode_utf8)] = {"\xbf"}; |
+ strncpy(data + 1, unicode_utf8, arraysize(unicode_utf8)); |
+ |
+ { |
+ const char* chunks[] = {partial_bom, data, "\0"}; |
+ ChunkSource chunk_source(chunks); |
+ std::unique_ptr<v8::internal::Utf16CharacterStream> stream( |
+ v8::internal::ScannerStream::For( |
+ &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8)); |
+ |
+ // Read the data without tripping over the BOM. |
+ for (size_t i = 0; unicode_ucs2[i]; i++) { |
+ CHECK_EQ(unicode_ucs2[i], stream->Advance()); |
+ } |
+ } |
+ |
+ // And now with single-byte BOM chunks. |
+ char bom_byte_1[] = "\xef"; |
+ char bom_byte_2[] = "\xbb"; |
+ { |
+ const char* chunks[] = {bom_byte_1, bom_byte_2, data, "\0"}; |
+ ChunkSource chunk_source(chunks); |
+ std::unique_ptr<v8::internal::Utf16CharacterStream> stream( |
+ v8::internal::ScannerStream::For( |
+ &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8)); |
+ |
+ // Read the data without tripping over the BOM. |
+ for (size_t i = 0; unicode_ucs2[i]; i++) { |
+ CHECK_EQ(unicode_ucs2[i], stream->Advance()); |
+ } |
+ } |
+} |
+ |
TEST(Utf8ChunkBoundaries) { |
// Test utf-8 parsing at chunk boundaries. |