Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(119)

Side by Side Diff: test/cctest/parsing/test-scanner-streams.cc

Issue 2354973002: Handle Utf-8 BOM at beginning of an Utf-8 stream. (Closed)
Patch Set: Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2016 the V8 project authors. All rights reserved. 1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/factory.h" // for i::Factory::NewExternalStringFrom*Byte 5 #include "src/factory.h" // for i::Factory::NewExternalStringFrom*Byte
6 #include "src/objects-inl.h" 6 #include "src/objects-inl.h"
7 #include "src/parsing/scanner-character-streams.h" 7 #include "src/parsing/scanner-character-streams.h"
8 #include "src/parsing/scanner.h" 8 #include "src/parsing/scanner.h"
9 #include "src/type-feedback-vector-inl.h" // for include "src/factory.h" 9 #include "src/type-feedback-vector-inl.h" // for include "src/factory.h"
10 #include "test/cctest/cctest.h" 10 #include "test/cctest/cctest.h"
(...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after
102 v8::internal::ScannerStream::For( 102 v8::internal::ScannerStream::For(
103 &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8)); 103 &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8));
104 104
105 // Read the data without dying. 105 // Read the data without dying.
106 v8::internal::uc32 c; 106 v8::internal::uc32 c;
107 do { 107 do {
108 c = stream->Advance(); 108 c = stream->Advance();
109 } while (c != v8::internal::Utf16CharacterStream::kEndOfInput); 109 } while (c != v8::internal::Utf16CharacterStream::kEndOfInput);
110 } 110 }
111 111
112 TEST(Utf8StreamBOM) {
113 // Construct test string w/ UTF-8 BOM (byte order mark)
114 char data[3 + arraysize(unicode_utf8)] = {"\xef\xbb\xbf"};
115 strncpy(data + 3, unicode_utf8, arraysize(unicode_utf8));
116
117 const char* chunks[] = {data, "\0"};
118 ChunkSource chunk_source(chunks);
119 std::unique_ptr<v8::internal::Utf16CharacterStream> stream(
120 v8::internal::ScannerStream::For(
121 &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8));
122
123 // Read the data without tripping over the BOM.
124 for (size_t i = 0; unicode_ucs2[i]; i++) {
125 CHECK_EQ(unicode_ucs2[i], stream->Advance());
126 }
127 CHECK_EQ(v8::internal::Utf16CharacterStream::kEndOfInput, stream->Advance());
128
129 // Make sure seek works.
130 stream->Seek(0);
131 CHECK_EQ(unicode_ucs2[0], stream->Advance());
132
133 stream->Seek(5);
134 CHECK_EQ(unicode_ucs2[5], stream->Advance());
135 }
136
137 TEST(Utf8SplitBOM) {
138 // Construct chunks with a BOM split into two chunks.
139 char partial_bom[] = "\xef\xbb";
140 char data[1 + arraysize(unicode_utf8)] = {"\xbf"};
marja 2016/09/20 18:52:30 ... how paranoid do we want to be, should there al
vogelheim 2016/09/21 07:50:54 Done.
141 strncpy(data + 1, unicode_utf8, arraysize(unicode_utf8));
142
143 const char* chunks[] = {partial_bom, data, "\0"};
144 ChunkSource chunk_source(chunks);
145 std::unique_ptr<v8::internal::Utf16CharacterStream> stream(
146 v8::internal::ScannerStream::For(
147 &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8));
148
149 // Read the data without tripping over the BOM.
150 for (size_t i = 0; unicode_ucs2[i]; i++) {
151 CHECK_EQ(unicode_ucs2[i], stream->Advance());
152 }
153 }
154
112 TEST(Utf8ChunkBoundaries) { 155 TEST(Utf8ChunkBoundaries) {
113 // Test utf-8 parsing at chunk boundaries. 156 // Test utf-8 parsing at chunk boundaries.
114 157
115 // Split the test string at each byte and pass it to the stream. This way, 158 // Split the test string at each byte and pass it to the stream. This way,
116 // we'll have a split at each possible boundary. 159 // we'll have a split at each possible boundary.
117 size_t len = strlen(unicode_utf8); 160 size_t len = strlen(unicode_utf8);
118 char buffer[arraysize(unicode_utf8) + 3]; 161 char buffer[arraysize(unicode_utf8) + 3];
119 for (size_t i = 1; i < len; i++) { 162 for (size_t i = 1; i < len; i++) {
120 // Copy source string into buffer, splitting it at i. 163 // Copy source string into buffer, splitting it at i.
121 // Then add three chunks, 0..i-1, i..strlen-1, empty. 164 // Then add three chunks, 0..i-1, i..strlen-1, empty.
(...skipping 233 matching lines...) Expand 10 before | Expand all | Expand 10 after
355 398
356 // 4k large buffer. 399 // 4k large buffer.
357 char buffer[4096 + 1]; 400 char buffer[4096 + 1];
358 for (unsigned i = 0; i < arraysize(buffer); i++) { 401 for (unsigned i = 0; i < arraysize(buffer); i++) {
359 buffer[i] = static_cast<char>(i & 0x7F); 402 buffer[i] = static_cast<char>(i & 0x7F);
360 } 403 }
361 buffer[arraysize(buffer) - 1] = '\0'; 404 buffer[arraysize(buffer) - 1] = '\0';
362 TestCharacterStreams(buffer, arraysize(buffer) - 1); 405 TestCharacterStreams(buffer, arraysize(buffer) - 1);
363 TestCharacterStreams(buffer, arraysize(buffer) - 1, 576, 3298); 406 TestCharacterStreams(buffer, arraysize(buffer) - 1, 576, 3298);
364 } 407 }
OLDNEW
« src/parsing/scanner-character-streams.cc ('K') | « src/parsing/scanner-character-streams.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698