Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(464)

Unified Diff: test/cctest/parsing/test-scanner-streams.cc

Issue 2354973002: Handle Utf-8 BOM at beginning of an Utf-8 stream. (Closed)
Patch Set: Test w/ single-byte BOM chunks. Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/parsing/scanner-character-streams.cc ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: test/cctest/parsing/test-scanner-streams.cc
diff --git a/test/cctest/parsing/test-scanner-streams.cc b/test/cctest/parsing/test-scanner-streams.cc
index 1bd9dbc7cded7f16fb94ab92650df8f038446ce2..bf2762485708d5dacb5a7952df7824f9e94c901a 100644
--- a/test/cctest/parsing/test-scanner-streams.cc
+++ b/test/cctest/parsing/test-scanner-streams.cc
@@ -109,6 +109,67 @@ TEST(Utf8StreamAsciiOnly) {
} while (c != v8::internal::Utf16CharacterStream::kEndOfInput);
}
+TEST(Utf8StreamBOM) {
+ // Construct test string w/ UTF-8 BOM (byte order mark)
+ char data[3 + arraysize(unicode_utf8)] = {"\xef\xbb\xbf"};
+ strncpy(data + 3, unicode_utf8, arraysize(unicode_utf8));
+
+ const char* chunks[] = {data, "\0"};
+ ChunkSource chunk_source(chunks);
+ std::unique_ptr<v8::internal::Utf16CharacterStream> stream(
+ v8::internal::ScannerStream::For(
+ &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8));
+
+ // Read the data without tripping over the BOM.
+ for (size_t i = 0; unicode_ucs2[i]; i++) {
+ CHECK_EQ(unicode_ucs2[i], stream->Advance());
+ }
+ CHECK_EQ(v8::internal::Utf16CharacterStream::kEndOfInput, stream->Advance());
+
+ // Make sure seek works.
+ stream->Seek(0);
+ CHECK_EQ(unicode_ucs2[0], stream->Advance());
+
+ stream->Seek(5);
+ CHECK_EQ(unicode_ucs2[5], stream->Advance());
+}
+
+TEST(Utf8SplitBOM) {
+ // Construct chunks with a BOM split into two chunks.
+ char partial_bom[] = "\xef\xbb";
+ char data[1 + arraysize(unicode_utf8)] = {"\xbf"};
+ strncpy(data + 1, unicode_utf8, arraysize(unicode_utf8));
+
+ {
+ const char* chunks[] = {partial_bom, data, "\0"};
+ ChunkSource chunk_source(chunks);
+ std::unique_ptr<v8::internal::Utf16CharacterStream> stream(
+ v8::internal::ScannerStream::For(
+ &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8));
+
+ // Read the data without tripping over the BOM.
+ for (size_t i = 0; unicode_ucs2[i]; i++) {
+ CHECK_EQ(unicode_ucs2[i], stream->Advance());
+ }
+ }
+
+ // And now with single-byte BOM chunks.
+ char bom_byte_1[] = "\xef";
+ char bom_byte_2[] = "\xbb";
+ {
+ const char* chunks[] = {bom_byte_1, bom_byte_2, data, "\0"};
+ ChunkSource chunk_source(chunks);
+ std::unique_ptr<v8::internal::Utf16CharacterStream> stream(
+ v8::internal::ScannerStream::For(
+ &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8));
+
+ // Read the data without tripping over the BOM.
+ for (size_t i = 0; unicode_ucs2[i]; i++) {
+ CHECK_EQ(unicode_ucs2[i], stream->Advance());
+ }
+ }
+}
+
TEST(Utf8ChunkBoundaries) {
// Test utf-8 parsing at chunk boundaries.
« no previous file with comments | « src/parsing/scanner-character-streams.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698